1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
30 #include "time-util.h"
35 #define EPOLL_QUEUE_MAX 64
36 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38 typedef enum EventSourceType {
48 struct sd_event_source {
53 sd_prepare_handler_t prepare;
55 EventSourceType type:4;
60 unsigned pending_index;
61 unsigned prepare_index;
62 unsigned pending_iteration;
63 unsigned prepare_iteration;
67 sd_io_handler_t callback;
74 sd_time_handler_t callback;
75 usec_t next, accuracy;
76 unsigned earliest_index;
77 unsigned latest_index;
80 sd_signal_handler_t callback;
81 struct signalfd_siginfo siginfo;
85 sd_child_handler_t callback;
91 sd_defer_handler_t callback;
94 sd_quit_handler_t callback;
111 /* For both clocks we maintain two priority queues each, one
112 * ordered for the earliest times the events may be
113 * dispatched, and one ordered by the latest times they must
114 * have been dispatched. The range between the top entries in
115 * the two prioqs is the time window we can freely schedule
117 Prioq *monotonic_earliest;
118 Prioq *monotonic_latest;
119 Prioq *realtime_earliest;
120 Prioq *realtime_latest;
122 usec_t realtime_next, monotonic_next;
126 sd_event_source **signal_sources;
128 Hashmap *child_sources;
129 unsigned n_enabled_child_sources;
136 dual_timestamp timestamp;
139 bool quit_requested:1;
140 bool need_process_child:1;
143 static int pending_prioq_compare(const void *a, const void *b) {
144 const sd_event_source *x = a, *y = b;
149 /* Enabled ones first */
150 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
152 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
155 /* Lower priority values first */
156 if (x->priority < y->priority)
158 if (x->priority > y->priority)
161 /* Older entries first */
162 if (x->pending_iteration < y->pending_iteration)
164 if (x->pending_iteration > y->pending_iteration)
167 /* Stability for the rest */
176 static int prepare_prioq_compare(const void *a, const void *b) {
177 const sd_event_source *x = a, *y = b;
182 /* Move most recently prepared ones last, so that we can stop
183 * preparing as soon as we hit one that has already been
184 * prepared in the current iteration */
185 if (x->prepare_iteration < y->prepare_iteration)
187 if (x->prepare_iteration > y->prepare_iteration)
190 /* Enabled ones first */
191 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
193 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
196 /* Lower priority values first */
197 if (x->priority < y->priority)
199 if (x->priority > y->priority)
202 /* Stability for the rest */
211 static int earliest_time_prioq_compare(const void *a, const void *b) {
212 const sd_event_source *x = a, *y = b;
214 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
215 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
217 /* Enabled ones first */
218 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
220 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
223 /* Move the pending ones to the end */
224 if (!x->pending && y->pending)
226 if (x->pending && !y->pending)
230 if (x->time.next < y->time.next)
232 if (x->time.next > y->time.next)
235 /* Stability for the rest */
244 static int latest_time_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
247 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
248 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Move the pending ones to the end */
257 if (!x->pending && y->pending)
259 if (x->pending && !y->pending)
263 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
265 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
268 /* Stability for the rest */
277 static int quit_prioq_compare(const void *a, const void *b) {
278 const sd_event_source *x = a, *y = b;
280 assert(x->type == SOURCE_QUIT);
281 assert(y->type == SOURCE_QUIT);
283 /* Enabled ones first */
284 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
286 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
289 /* Lower priority values first */
290 if (x->priority < y->priority)
292 if (x->priority > y->priority)
295 /* Stability for the rest */
304 static void event_free(sd_event *e) {
307 if (e->epoll_fd >= 0)
308 close_nointr_nofail(e->epoll_fd);
310 if (e->signal_fd >= 0)
311 close_nointr_nofail(e->signal_fd);
313 if (e->realtime_fd >= 0)
314 close_nointr_nofail(e->realtime_fd);
316 if (e->monotonic_fd >= 0)
317 close_nointr_nofail(e->monotonic_fd);
319 prioq_free(e->pending);
320 prioq_free(e->prepare);
321 prioq_free(e->monotonic_earliest);
322 prioq_free(e->monotonic_latest);
323 prioq_free(e->realtime_earliest);
324 prioq_free(e->realtime_latest);
327 free(e->signal_sources);
329 hashmap_free(e->child_sources);
333 int sd_event_new(sd_event** ret) {
337 assert_return(ret, -EINVAL);
339 e = new0(sd_event, 1);
344 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
345 e->realtime_next = e->monotonic_next = (usec_t) -1;
346 e->original_pid = getpid();
348 assert_se(sigemptyset(&e->sigset) == 0);
350 e->pending = prioq_new(pending_prioq_compare);
356 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
357 if (e->epoll_fd < 0) {
370 sd_event* sd_event_ref(sd_event *e) {
371 assert_return(e, NULL);
373 assert(e->n_ref >= 1);
379 sd_event* sd_event_unref(sd_event *e) {
380 assert_return(e, NULL);
382 assert(e->n_ref >= 1);
391 static bool event_pid_changed(sd_event *e) {
394 /* We don't support people creating am event loop and keeping
395 * it around over a fork(). Let's complain. */
397 return e->original_pid != getpid();
400 static int source_io_unregister(sd_event_source *s) {
404 assert(s->type == SOURCE_IO);
406 if (!s->io.registered)
409 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
413 s->io.registered = false;
417 static int source_io_register(
422 struct epoll_event ev = {};
426 assert(s->type == SOURCE_IO);
427 assert(enabled != SD_EVENT_OFF);
432 if (enabled == SD_EVENT_ONESHOT)
433 ev.events |= EPOLLONESHOT;
435 if (s->io.registered)
436 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
438 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
443 s->io.registered = true;
448 static void source_free(sd_event_source *s) {
456 source_io_unregister(s);
460 case SOURCE_MONOTONIC:
461 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
462 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
465 case SOURCE_REALTIME:
466 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
467 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
471 if (s->signal.sig > 0) {
472 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
473 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
475 if (s->event->signal_sources)
476 s->event->signal_sources[s->signal.sig] = NULL;
482 if (s->child.pid > 0) {
483 if (s->enabled != SD_EVENT_OFF) {
484 assert(s->event->n_enabled_child_sources > 0);
485 s->event->n_enabled_child_sources--;
488 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
489 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
491 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
501 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
506 prioq_remove(s->event->pending, s, &s->pending_index);
509 prioq_remove(s->event->prepare, s, &s->prepare_index);
511 sd_event_unref(s->event);
517 static int source_set_pending(sd_event_source *s, bool b) {
521 assert(s->type != SOURCE_QUIT);
529 s->pending_iteration = s->event->iteration;
531 r = prioq_put(s->event->pending, s, &s->pending_index);
537 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
542 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
547 s = new0(sd_event_source, 1);
552 s->event = sd_event_ref(e);
554 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
563 sd_io_handler_t callback,
565 sd_event_source **ret) {
570 assert_return(e, -EINVAL);
571 assert_return(fd >= 0, -EINVAL);
572 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
573 assert_return(callback, -EINVAL);
574 assert_return(ret, -EINVAL);
575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
576 assert_return(!event_pid_changed(e), -ECHILD);
578 s = source_new(e, SOURCE_IO);
583 s->io.events = events;
584 s->io.callback = callback;
585 s->userdata = userdata;
586 s->enabled = SD_EVENT_ON;
588 r = source_io_register(s, s->enabled, events);
598 static int event_setup_timer_fd(
600 EventSourceType type,
604 struct epoll_event ev = {};
611 if (_likely_(*timer_fd >= 0))
614 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
619 ev.data.ptr = INT_TO_PTR(type);
621 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
623 close_nointr_nofail(fd);
627 /* When we sleep for longer, we try to realign the wakeup to
628 the same time wihtin each second, so that events all across
629 the system can be coalesced into a single CPU
630 wakeup. However, let's take some system-specific randomness
631 for this value, so that in a network of systems with synced
632 clocks timer events are distributed a bit. Here, we
633 calculate a perturbation usec offset from the boot ID. */
635 if (sd_id128_get_boot(&bootid) >= 0)
636 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
642 static int event_add_time_internal(
644 EventSourceType type,
651 sd_time_handler_t callback,
653 sd_event_source **ret) {
658 assert_return(e, -EINVAL);
659 assert_return(callback, -EINVAL);
660 assert_return(ret, -EINVAL);
661 assert_return(usec != (uint64_t) -1, -EINVAL);
662 assert_return(accuracy != (uint64_t) -1, -EINVAL);
663 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
664 assert_return(!event_pid_changed(e), -ECHILD);
671 *earliest = prioq_new(earliest_time_prioq_compare);
677 *latest = prioq_new(latest_time_prioq_compare);
683 r = event_setup_timer_fd(e, type, timer_fd, id);
688 s = source_new(e, type);
693 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
694 s->time.callback = callback;
695 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
696 s->userdata = userdata;
697 s->enabled = SD_EVENT_ONESHOT;
699 r = prioq_put(*earliest, s, &s->time.earliest_index);
703 r = prioq_put(*latest, s, &s->time.latest_index);
715 int sd_event_add_monotonic(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
716 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
719 int sd_event_add_realtime(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
720 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
723 static int event_update_signal_fd(sd_event *e) {
724 struct epoll_event ev = {};
730 add_to_epoll = e->signal_fd < 0;
732 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
742 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
744 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
746 close_nointr_nofail(e->signal_fd);
755 int sd_event_add_signal(
758 sd_signal_handler_t callback,
760 sd_event_source **ret) {
765 assert_return(e, -EINVAL);
766 assert_return(sig > 0, -EINVAL);
767 assert_return(sig < _NSIG, -EINVAL);
768 assert_return(callback, -EINVAL);
769 assert_return(ret, -EINVAL);
770 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
771 assert_return(!event_pid_changed(e), -ECHILD);
773 if (!e->signal_sources) {
774 e->signal_sources = new0(sd_event_source*, _NSIG);
775 if (!e->signal_sources)
777 } else if (e->signal_sources[sig])
780 s = source_new(e, SOURCE_SIGNAL);
785 s->signal.callback = callback;
786 s->userdata = userdata;
787 s->enabled = SD_EVENT_ON;
789 e->signal_sources[sig] = s;
790 assert_se(sigaddset(&e->sigset, sig) == 0);
792 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
793 r = event_update_signal_fd(e);
804 int sd_event_add_child(
808 sd_child_handler_t callback,
810 sd_event_source **ret) {
815 assert_return(e, -EINVAL);
816 assert_return(pid > 1, -EINVAL);
817 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
818 assert_return(options != 0, -EINVAL);
819 assert_return(callback, -EINVAL);
820 assert_return(ret, -EINVAL);
821 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
822 assert_return(!event_pid_changed(e), -ECHILD);
824 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
828 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
831 s = source_new(e, SOURCE_CHILD);
836 s->child.options = options;
837 s->child.callback = callback;
838 s->userdata = userdata;
839 s->enabled = SD_EVENT_ONESHOT;
841 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
847 e->n_enabled_child_sources ++;
849 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
851 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
852 r = event_update_signal_fd(e);
859 e->need_process_child = true;
865 int sd_event_add_defer(
867 sd_defer_handler_t callback,
869 sd_event_source **ret) {
874 assert_return(e, -EINVAL);
875 assert_return(callback, -EINVAL);
876 assert_return(ret, -EINVAL);
877 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
878 assert_return(!event_pid_changed(e), -ECHILD);
880 s = source_new(e, SOURCE_DEFER);
884 s->defer.callback = callback;
885 s->userdata = userdata;
886 s->enabled = SD_EVENT_ONESHOT;
888 r = source_set_pending(s, true);
898 int sd_event_add_quit(
900 sd_quit_handler_t callback,
902 sd_event_source **ret) {
907 assert_return(e, -EINVAL);
908 assert_return(callback, -EINVAL);
909 assert_return(ret, -EINVAL);
910 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
911 assert_return(!event_pid_changed(e), -ECHILD);
914 e->quit = prioq_new(quit_prioq_compare);
919 s = source_new(e, SOURCE_QUIT);
923 s->quit.callback = callback;
924 s->userdata = userdata;
925 s->quit.prioq_index = PRIOQ_IDX_NULL;
926 s->enabled = SD_EVENT_ONESHOT;
928 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
938 sd_event_source* sd_event_source_ref(sd_event_source *s) {
939 assert_return(s, NULL);
941 assert(s->n_ref >= 1);
947 sd_event_source* sd_event_source_unref(sd_event_source *s) {
948 assert_return(s, NULL);
950 assert(s->n_ref >= 1);
959 sd_event *sd_event_get(sd_event_source *s) {
960 assert_return(s, NULL);
965 int sd_event_source_get_pending(sd_event_source *s) {
966 assert_return(s, -EINVAL);
967 assert_return(s->type != SOURCE_QUIT, -EDOM);
968 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
969 assert_return(!event_pid_changed(s->event), -ECHILD);
974 int sd_event_source_get_io_fd(sd_event_source *s) {
975 assert_return(s, -EINVAL);
976 assert_return(s->type == SOURCE_IO, -EDOM);
977 assert_return(!event_pid_changed(s->event), -ECHILD);
982 int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
983 assert_return(s, -EINVAL);
984 assert_return(events, -EINVAL);
985 assert_return(s->type == SOURCE_IO, -EDOM);
986 assert_return(!event_pid_changed(s->event), -ECHILD);
988 *events = s->io.events;
992 int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
995 assert_return(s, -EINVAL);
996 assert_return(s->type == SOURCE_IO, -EDOM);
997 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
998 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
999 assert_return(!event_pid_changed(s->event), -ECHILD);
1001 if (s->io.events == events)
1004 if (s->enabled != SD_EVENT_OFF) {
1005 r = source_io_register(s, s->enabled, events);
1010 s->io.events = events;
1015 int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1016 assert_return(s, -EINVAL);
1017 assert_return(revents, -EINVAL);
1018 assert_return(s->type == SOURCE_IO, -EDOM);
1019 assert_return(s->pending, -ENODATA);
1020 assert_return(!event_pid_changed(s->event), -ECHILD);
1022 *revents = s->io.revents;
1026 int sd_event_source_get_signal(sd_event_source *s) {
1027 assert_return(s, -EINVAL);
1028 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1029 assert_return(!event_pid_changed(s->event), -ECHILD);
1031 return s->signal.sig;
1034 int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1035 assert_return(s, -EINVAL);
1036 assert_return(!event_pid_changed(s->event), -ECHILD);
1041 int sd_event_source_set_priority(sd_event_source *s, int priority) {
1042 assert_return(s, -EINVAL);
1043 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1044 assert_return(!event_pid_changed(s->event), -ECHILD);
1046 if (s->priority == priority)
1049 s->priority = priority;
1052 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1055 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1057 if (s->type == SOURCE_QUIT)
1058 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1063 int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1064 assert_return(s, -EINVAL);
1065 assert_return(m, -EINVAL);
1066 assert_return(!event_pid_changed(s->event), -ECHILD);
1072 int sd_event_source_set_enabled(sd_event_source *s, int m) {
1075 assert_return(s, -EINVAL);
1076 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1077 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1078 assert_return(!event_pid_changed(s->event), -ECHILD);
1080 if (s->enabled == m)
1083 if (m == SD_EVENT_OFF) {
1088 r = source_io_unregister(s);
1095 case SOURCE_MONOTONIC:
1097 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1098 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1101 case SOURCE_REALTIME:
1103 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1104 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1109 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1110 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1111 event_update_signal_fd(s->event);
1119 assert(s->event->n_enabled_child_sources > 0);
1120 s->event->n_enabled_child_sources--;
1122 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1123 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1124 event_update_signal_fd(s->event);
1131 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1143 r = source_io_register(s, m, s->io.events);
1150 case SOURCE_MONOTONIC:
1152 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1153 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1156 case SOURCE_REALTIME:
1158 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1159 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1165 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1166 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1167 event_update_signal_fd(s->event);
1174 if (s->enabled == SD_EVENT_OFF) {
1175 s->event->n_enabled_child_sources++;
1177 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1178 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1179 event_update_signal_fd(s->event);
1186 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1196 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1199 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1204 int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1205 assert_return(s, -EINVAL);
1206 assert_return(usec, -EINVAL);
1207 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1208 assert_return(!event_pid_changed(s->event), -ECHILD);
1210 *usec = s->time.next;
1214 int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1215 assert_return(s, -EINVAL);
1216 assert_return(usec != (uint64_t) -1, -EINVAL);
1217 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1218 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1219 assert_return(!event_pid_changed(s->event), -ECHILD);
1221 if (s->time.next == usec)
1224 s->time.next = usec;
1226 if (s->type == SOURCE_REALTIME) {
1227 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1228 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1230 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1231 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1237 int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1238 assert_return(s, -EINVAL);
1239 assert_return(usec, -EINVAL);
1240 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1241 assert_return(!event_pid_changed(s->event), -ECHILD);
1243 *usec = s->time.accuracy;
1247 int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1248 assert_return(s, -EINVAL);
1249 assert_return(usec != (uint64_t) -1, -EINVAL);
1250 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1251 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1252 assert_return(!event_pid_changed(s->event), -ECHILD);
1255 usec = DEFAULT_ACCURACY_USEC;
1257 if (s->time.accuracy == usec)
1260 s->time.accuracy = usec;
1262 if (s->type == SOURCE_REALTIME)
1263 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1265 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1270 int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1271 assert_return(s, -EINVAL);
1272 assert_return(pid, -EINVAL);
1273 assert_return(s->type == SOURCE_CHILD, -EDOM);
1274 assert_return(!event_pid_changed(s->event), -ECHILD);
1276 *pid = s->child.pid;
1280 int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) {
1283 assert_return(s, -EINVAL);
1284 assert_return(s->type != SOURCE_QUIT, -EDOM);
1285 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1286 assert_return(!event_pid_changed(s->event), -ECHILD);
1288 if (s->prepare == callback)
1291 if (callback && s->prepare) {
1292 s->prepare = callback;
1296 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1300 s->prepare = callback;
1303 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1307 prioq_remove(s->event->prepare, s, &s->prepare_index);
1312 void* sd_event_source_get_userdata(sd_event_source *s) {
1313 assert_return(s, NULL);
1318 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1330 Find a good time to wake up again between times a and b. We
1331 have two goals here:
1333 a) We want to wake up as seldom as possible, hence prefer
1334 later times over earlier times.
1336 b) But if we have to wake up, then let's make sure to
1337 dispatch as much as possible on the entire system.
1339 We implement this by waking up everywhere at the same time
1340 within any given second if we can, synchronised via the
1341 perturbation value determined from the boot ID. If we can't,
1342 then we try to find the same spot in every a 250ms
1343 step. Otherwise, we pick the last possible time to wake up.
1346 c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1348 if (_unlikely_(c < USEC_PER_SEC))
1357 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1359 if (_unlikely_(c < USEC_PER_MSEC*250))
1362 c -= USEC_PER_MSEC*250;
1371 static int event_arm_timer(
1378 struct itimerspec its = {};
1379 sd_event_source *a, *b;
1386 a = prioq_peek(earliest);
1387 if (!a || a->enabled == SD_EVENT_OFF) {
1389 if (*next == (usec_t) -1)
1393 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1397 *next = (usec_t) -1;
1402 b = prioq_peek(latest);
1403 assert_se(b && b->enabled != SD_EVENT_OFF);
1405 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1409 assert_se(timer_fd >= 0);
1412 /* We don' want to disarm here, just mean some time looooong ago. */
1413 its.it_value.tv_sec = 0;
1414 its.it_value.tv_nsec = 1;
1416 timespec_store(&its.it_value, t);
1418 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1426 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1429 assert(s->type == SOURCE_IO);
1431 s->io.revents = events;
1433 return source_set_pending(s, true);
1436 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1444 assert_return(events == EPOLLIN, -EIO);
1446 ss = read(fd, &x, sizeof(x));
1448 if (errno == EAGAIN || errno == EINTR)
1454 if (ss != sizeof(x))
1457 *next = (usec_t) -1;
1462 static int process_timer(
1474 s = prioq_peek(earliest);
1477 s->enabled == SD_EVENT_OFF ||
1481 r = source_set_pending(s, true);
1485 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1486 prioq_reshuffle(latest, s, &s->time.latest_index);
1492 static int process_child(sd_event *e) {
1499 e->need_process_child = false;
1502 So, this is ugly. We iteratively invoke waitid() with P_PID
1503 + WNOHANG for each PID we wait for, instead of using
1504 P_ALL. This is because we only want to get child
1505 information of very specific child processes, and not all
1506 of them. We might not have processed the SIGCHLD even of a
1507 previous invocation and we don't want to maintain a
1508 unbounded *per-child* event queue, hence we really don't
1509 want anything flushed out of the kernel's queue that we
1510 don't care about. Since this is O(n) this means that if you
1511 have a lot of processes you probably want to handle SIGCHLD
1515 HASHMAP_FOREACH(s, e->child_sources, i) {
1516 assert(s->type == SOURCE_CHILD);
1521 if (s->enabled == SD_EVENT_OFF)
1524 zero(s->child.siginfo);
1525 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1529 if (s->child.siginfo.si_pid != 0) {
1530 r = source_set_pending(s, true);
1539 static int process_signal(sd_event *e, uint32_t events) {
1540 bool read_one = false;
1544 assert(e->signal_sources);
1546 assert_return(events == EPOLLIN, -EIO);
1549 struct signalfd_siginfo si;
1553 ss = read(e->signal_fd, &si, sizeof(si));
1555 if (errno == EAGAIN || errno == EINTR)
1561 if (ss != sizeof(si))
1566 s = e->signal_sources[si.ssi_signo];
1567 if (si.ssi_signo == SIGCHLD) {
1568 r = process_child(e);
1577 s->signal.siginfo = si;
1578 r = source_set_pending(s, true);
1587 static int source_dispatch(sd_event_source *s) {
1591 assert(s->pending || s->type == SOURCE_QUIT);
1593 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1594 r = source_set_pending(s, false);
1599 if (s->enabled == SD_EVENT_ONESHOT) {
1600 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1608 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1611 case SOURCE_MONOTONIC:
1612 r = s->time.callback(s, s->time.next, s->userdata);
1615 case SOURCE_REALTIME:
1616 r = s->time.callback(s, s->time.next, s->userdata);
1620 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1624 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1628 r = s->defer.callback(s, s->userdata);
1632 r = s->quit.callback(s, s->userdata);
1639 static int event_prepare(sd_event *e) {
1647 s = prioq_peek(e->prepare);
1648 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1651 s->prepare_iteration = e->iteration;
1652 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1657 r = s->prepare(s, s->userdata);
1666 static int dispatch_quit(sd_event *e) {
1672 p = prioq_peek(e->quit);
1673 if (!p || p->enabled == SD_EVENT_OFF) {
1674 e->state = SD_EVENT_FINISHED;
1680 e->state = SD_EVENT_QUITTING;
1682 r = source_dispatch(p);
1684 e->state = SD_EVENT_PASSIVE;
1690 static sd_event_source* event_next_pending(sd_event *e) {
1695 p = prioq_peek(e->pending);
1699 if (p->enabled == SD_EVENT_OFF)
1705 int sd_event_run(sd_event *e, uint64_t timeout) {
1706 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1710 assert_return(e, -EINVAL);
1711 assert_return(!event_pid_changed(e), -ECHILD);
1712 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1713 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1715 if (e->quit_requested)
1716 return dispatch_quit(e);
1720 e->state = SD_EVENT_RUNNING;
1722 r = event_prepare(e);
1726 if (event_next_pending(e) || e->need_process_child)
1730 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1734 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1739 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1740 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1742 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1746 dual_timestamp_get(&e->timestamp);
1748 for (i = 0; i < m; i++) {
1750 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1751 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1752 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1753 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1754 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1755 r = process_signal(e, ev_queue[i].events);
1757 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1763 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1767 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1771 if (e->need_process_child) {
1772 r = process_child(e);
1777 p = event_next_pending(e);
1783 r = source_dispatch(p);
1786 e->state = SD_EVENT_PASSIVE;
1792 int sd_event_loop(sd_event *e) {
1795 assert_return(e, -EINVAL);
1796 assert_return(!event_pid_changed(e), -ECHILD);
1797 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1801 while (e->state != SD_EVENT_FINISHED) {
1802 r = sd_event_run(e, (uint64_t) -1);
1814 int sd_event_get_state(sd_event *e) {
1815 assert_return(e, -EINVAL);
1816 assert_return(!event_pid_changed(e), -ECHILD);
1821 int sd_event_get_quit(sd_event *e) {
1822 assert_return(e, -EINVAL);
1823 assert_return(!event_pid_changed(e), -ECHILD);
1825 return e->quit_requested;
1828 int sd_event_request_quit(sd_event *e) {
1829 assert_return(e, -EINVAL);
1830 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1831 assert_return(!event_pid_changed(e), -ECHILD);
1833 e->quit_requested = true;
1837 int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1838 assert_return(e, -EINVAL);
1839 assert_return(usec, -EINVAL);
1840 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1841 assert_return(!event_pid_changed(e), -ECHILD);
1843 *usec = e->timestamp.realtime;
1847 int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1848 assert_return(e, -EINVAL);
1849 assert_return(usec, -EINVAL);
1850 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1851 assert_return(!event_pid_changed(e), -ECHILD);
1853 *usec = e->timestamp.monotonic;