1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
30 #include "time-util.h"
35 #define EPOLL_QUEUE_MAX 64
36 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38 typedef enum EventSourceType {
48 struct sd_event_source {
53 sd_prepare_handler_t prepare;
55 EventSourceType type:4;
60 unsigned pending_index;
61 unsigned prepare_index;
62 unsigned pending_iteration;
63 unsigned prepare_iteration;
67 sd_io_handler_t callback;
74 sd_time_handler_t callback;
75 usec_t next, accuracy;
76 unsigned earliest_index;
77 unsigned latest_index;
80 sd_signal_handler_t callback;
81 struct signalfd_siginfo siginfo;
85 sd_child_handler_t callback;
91 sd_defer_handler_t callback;
94 sd_quit_handler_t callback;
111 /* For both clocks we maintain two priority queues each, one
112 * ordered for the earliest times the events may be
113 * dispatched, and one ordered by the latest times they must
114 * have been dispatched. The range between the top entries in
115 * the two prioqs is the time window we can freely schedule
117 Prioq *monotonic_earliest;
118 Prioq *monotonic_latest;
119 Prioq *realtime_earliest;
120 Prioq *realtime_latest;
122 usec_t realtime_next, monotonic_next;
126 sd_event_source **signal_sources;
128 Hashmap *child_sources;
129 unsigned n_enabled_child_sources;
136 dual_timestamp timestamp;
139 bool quit_requested:1;
140 bool need_process_child:1;
143 static int pending_prioq_compare(const void *a, const void *b) {
144 const sd_event_source *x = a, *y = b;
149 /* Enabled ones first */
150 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
152 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
155 /* Lower priority values first */
156 if (x->priority < y->priority)
158 if (x->priority > y->priority)
161 /* Older entries first */
162 if (x->pending_iteration < y->pending_iteration)
164 if (x->pending_iteration > y->pending_iteration)
167 /* Stability for the rest */
176 static int prepare_prioq_compare(const void *a, const void *b) {
177 const sd_event_source *x = a, *y = b;
182 /* Move most recently prepared ones last, so that we can stop
183 * preparing as soon as we hit one that has already been
184 * prepared in the current iteration */
185 if (x->prepare_iteration < y->prepare_iteration)
187 if (x->prepare_iteration > y->prepare_iteration)
190 /* Enabled ones first */
191 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
193 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
196 /* Lower priority values first */
197 if (x->priority < y->priority)
199 if (x->priority > y->priority)
202 /* Stability for the rest */
211 static int earliest_time_prioq_compare(const void *a, const void *b) {
212 const sd_event_source *x = a, *y = b;
214 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
215 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
217 /* Enabled ones first */
218 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
220 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
223 /* Move the pending ones to the end */
224 if (!x->pending && y->pending)
226 if (x->pending && !y->pending)
230 if (x->time.next < y->time.next)
232 if (x->time.next > y->time.next)
235 /* Stability for the rest */
244 static int latest_time_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
247 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
248 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Move the pending ones to the end */
257 if (!x->pending && y->pending)
259 if (x->pending && !y->pending)
263 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
265 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
268 /* Stability for the rest */
277 static int quit_prioq_compare(const void *a, const void *b) {
278 const sd_event_source *x = a, *y = b;
280 assert(x->type == SOURCE_QUIT);
281 assert(y->type == SOURCE_QUIT);
283 /* Enabled ones first */
284 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
286 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
289 /* Lower priority values first */
290 if (x->priority < y->priority)
292 if (x->priority > y->priority)
295 /* Stability for the rest */
304 static void event_free(sd_event *e) {
307 if (e->epoll_fd >= 0)
308 close_nointr_nofail(e->epoll_fd);
310 if (e->signal_fd >= 0)
311 close_nointr_nofail(e->signal_fd);
313 if (e->realtime_fd >= 0)
314 close_nointr_nofail(e->realtime_fd);
316 if (e->monotonic_fd >= 0)
317 close_nointr_nofail(e->monotonic_fd);
319 prioq_free(e->pending);
320 prioq_free(e->prepare);
321 prioq_free(e->monotonic_earliest);
322 prioq_free(e->monotonic_latest);
323 prioq_free(e->realtime_earliest);
324 prioq_free(e->realtime_latest);
327 free(e->signal_sources);
329 hashmap_free(e->child_sources);
333 int sd_event_new(sd_event** ret) {
337 assert_return(ret, -EINVAL);
339 e = new0(sd_event, 1);
344 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
345 e->realtime_next = e->monotonic_next = (usec_t) -1;
346 e->original_pid = getpid();
348 assert_se(sigemptyset(&e->sigset) == 0);
350 e->pending = prioq_new(pending_prioq_compare);
356 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
357 if (e->epoll_fd < 0) {
370 sd_event* sd_event_ref(sd_event *e) {
371 assert_return(e, NULL);
373 assert(e->n_ref >= 1);
379 sd_event* sd_event_unref(sd_event *e) {
380 assert_return(e, NULL);
382 assert(e->n_ref >= 1);
391 static bool event_pid_changed(sd_event *e) {
394 /* We don't support people creating am event loop and keeping
395 * it around over a fork(). Let's complain. */
397 return e->original_pid != getpid();
400 static int source_io_unregister(sd_event_source *s) {
404 assert(s->type == SOURCE_IO);
406 if (!s->io.registered)
409 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
413 s->io.registered = false;
417 static int source_io_register(
422 struct epoll_event ev = {};
426 assert(s->type == SOURCE_IO);
427 assert(enabled != SD_EVENT_OFF);
432 if (enabled == SD_EVENT_ONESHOT)
433 ev.events |= EPOLLONESHOT;
435 if (s->io.registered)
436 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
438 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
443 s->io.registered = true;
448 static void source_free(sd_event_source *s) {
456 source_io_unregister(s);
460 case SOURCE_MONOTONIC:
461 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
462 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
465 case SOURCE_REALTIME:
466 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
467 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
471 if (s->signal.sig > 0) {
472 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
473 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
475 if (s->event->signal_sources)
476 s->event->signal_sources[s->signal.sig] = NULL;
482 if (s->child.pid > 0) {
483 if (s->enabled != SD_EVENT_OFF) {
484 assert(s->event->n_enabled_child_sources > 0);
485 s->event->n_enabled_child_sources--;
488 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
489 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
491 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
501 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
506 prioq_remove(s->event->pending, s, &s->pending_index);
509 prioq_remove(s->event->prepare, s, &s->prepare_index);
511 sd_event_unref(s->event);
517 static int source_set_pending(sd_event_source *s, bool b) {
521 assert(s->type != SOURCE_QUIT);
529 s->pending_iteration = s->event->iteration;
531 r = prioq_put(s->event->pending, s, &s->pending_index);
537 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
542 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
547 s = new0(sd_event_source, 1);
552 s->event = sd_event_ref(e);
554 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
563 sd_io_handler_t callback,
565 sd_event_source **ret) {
570 assert_return(e, -EINVAL);
571 assert_return(fd >= 0, -EINVAL);
572 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
573 assert_return(callback, -EINVAL);
574 assert_return(ret, -EINVAL);
575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
576 assert_return(!event_pid_changed(e), -ECHILD);
578 s = source_new(e, SOURCE_IO);
583 s->io.events = events;
584 s->io.callback = callback;
585 s->userdata = userdata;
586 s->enabled = SD_EVENT_ON;
588 r = source_io_register(s, s->enabled, events);
598 static int event_setup_timer_fd(
600 EventSourceType type,
604 struct epoll_event ev = {};
611 if (_likely_(*timer_fd >= 0))
614 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
619 ev.data.ptr = INT_TO_PTR(type);
621 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
623 close_nointr_nofail(fd);
627 /* When we sleep for longer, we try to realign the wakeup to
628 the same time wihtin each second, so that events all across
629 the system can be coalesced into a single CPU
630 wakeup. However, let's take some system-specific randomness
631 for this value, so that in a network of systems with synced
632 clocks timer events are distributed a bit. Here, we
633 calculate a perturbation usec offset from the boot ID. */
635 if (sd_id128_get_boot(&bootid) >= 0)
636 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
642 static int event_add_time_internal(
644 EventSourceType type,
651 sd_time_handler_t callback,
653 sd_event_source **ret) {
658 assert_return(e, -EINVAL);
659 assert_return(callback, -EINVAL);
660 assert_return(ret, -EINVAL);
661 assert_return(usec != (uint64_t) -1, -EINVAL);
662 assert_return(accuracy != (uint64_t) -1, -EINVAL);
663 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
664 assert_return(!event_pid_changed(e), -ECHILD);
671 *earliest = prioq_new(earliest_time_prioq_compare);
677 *latest = prioq_new(latest_time_prioq_compare);
683 r = event_setup_timer_fd(e, type, timer_fd, id);
688 s = source_new(e, type);
693 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
694 s->time.callback = callback;
695 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
696 s->userdata = userdata;
697 s->enabled = SD_EVENT_ONESHOT;
699 r = prioq_put(*earliest, s, &s->time.earliest_index);
703 r = prioq_put(*latest, s, &s->time.latest_index);
715 int sd_event_add_monotonic(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
716 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
719 int sd_event_add_realtime(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
720 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
723 static int event_update_signal_fd(sd_event *e) {
724 struct epoll_event ev = {};
730 add_to_epoll = e->signal_fd < 0;
732 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
742 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
744 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
746 close_nointr_nofail(e->signal_fd);
755 int sd_event_add_signal(
758 sd_signal_handler_t callback,
760 sd_event_source **ret) {
765 assert_return(e, -EINVAL);
766 assert_return(sig > 0, -EINVAL);
767 assert_return(sig < _NSIG, -EINVAL);
768 assert_return(callback, -EINVAL);
769 assert_return(ret, -EINVAL);
770 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
771 assert_return(!event_pid_changed(e), -ECHILD);
773 if (!e->signal_sources) {
774 e->signal_sources = new0(sd_event_source*, _NSIG);
775 if (!e->signal_sources)
777 } else if (e->signal_sources[sig])
780 s = source_new(e, SOURCE_SIGNAL);
785 s->signal.callback = callback;
786 s->userdata = userdata;
787 s->enabled = SD_EVENT_ON;
789 e->signal_sources[sig] = s;
790 assert_se(sigaddset(&e->sigset, sig) == 0);
792 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
793 r = event_update_signal_fd(e);
804 int sd_event_add_child(
808 sd_child_handler_t callback,
810 sd_event_source **ret) {
815 assert_return(e, -EINVAL);
816 assert_return(pid > 1, -EINVAL);
817 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
818 assert_return(options != 0, -EINVAL);
819 assert_return(callback, -EINVAL);
820 assert_return(ret, -EINVAL);
821 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
822 assert_return(!event_pid_changed(e), -ECHILD);
824 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
828 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
831 s = source_new(e, SOURCE_CHILD);
836 s->child.options = options;
837 s->child.callback = callback;
838 s->userdata = userdata;
839 s->enabled = SD_EVENT_ONESHOT;
841 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
847 e->n_enabled_child_sources ++;
849 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
851 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
852 r = event_update_signal_fd(e);
859 e->need_process_child = true;
865 int sd_event_add_defer(
867 sd_defer_handler_t callback,
869 sd_event_source **ret) {
874 assert_return(e, -EINVAL);
875 assert_return(callback, -EINVAL);
876 assert_return(ret, -EINVAL);
877 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
878 assert_return(!event_pid_changed(e), -ECHILD);
880 s = source_new(e, SOURCE_DEFER);
884 s->defer.callback = callback;
885 s->userdata = userdata;
886 s->enabled = SD_EVENT_ONESHOT;
888 r = source_set_pending(s, true);
898 int sd_event_add_quit(
900 sd_quit_handler_t callback,
902 sd_event_source **ret) {
907 assert_return(e, -EINVAL);
908 assert_return(callback, -EINVAL);
909 assert_return(ret, -EINVAL);
910 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
911 assert_return(!event_pid_changed(e), -ECHILD);
914 e->quit = prioq_new(quit_prioq_compare);
919 s = source_new(e, SOURCE_QUIT);
923 s->quit.callback = callback;
924 s->userdata = userdata;
925 s->quit.prioq_index = PRIOQ_IDX_NULL;
926 s->enabled = SD_EVENT_ONESHOT;
928 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
938 sd_event_source* sd_event_source_ref(sd_event_source *s) {
939 assert_return(s, NULL);
941 assert(s->n_ref >= 1);
947 sd_event_source* sd_event_source_unref(sd_event_source *s) {
948 assert_return(s, NULL);
950 assert(s->n_ref >= 1);
959 sd_event *sd_event_get(sd_event_source *s) {
960 assert_return(s, NULL);
965 int sd_event_source_get_pending(sd_event_source *s) {
966 assert_return(s, -EINVAL);
967 assert_return(s->type != SOURCE_QUIT, -EDOM);
968 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
969 assert_return(!event_pid_changed(s->event), -ECHILD);
974 int sd_event_source_get_io_fd(sd_event_source *s) {
975 assert_return(s, -EINVAL);
976 assert_return(s->type == SOURCE_IO, -EDOM);
977 assert_return(!event_pid_changed(s->event), -ECHILD);
982 int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
983 assert_return(s, -EINVAL);
984 assert_return(events, -EINVAL);
985 assert_return(s->type == SOURCE_IO, -EDOM);
986 assert_return(!event_pid_changed(s->event), -ECHILD);
988 *events = s->io.events;
992 int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
995 assert_return(s, -EINVAL);
996 assert_return(s->type == SOURCE_IO, -EDOM);
997 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
998 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
999 assert_return(!event_pid_changed(s->event), -ECHILD);
1001 if (s->io.events == events)
1004 if (s->enabled != SD_EVENT_OFF) {
1005 r = source_io_register(s, s->enabled, events);
1010 s->io.events = events;
1015 int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1016 assert_return(s, -EINVAL);
1017 assert_return(revents, -EINVAL);
1018 assert_return(s->type == SOURCE_IO, -EDOM);
1019 assert_return(s->pending, -ENODATA);
1020 assert_return(!event_pid_changed(s->event), -ECHILD);
1022 *revents = s->io.revents;
1026 int sd_event_source_get_signal(sd_event_source *s) {
1027 assert_return(s, -EINVAL);
1028 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1029 assert_return(!event_pid_changed(s->event), -ECHILD);
1031 return s->signal.sig;
1034 int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1035 assert_return(s, -EINVAL);
1036 assert_return(!event_pid_changed(s->event), -ECHILD);
1041 int sd_event_source_set_priority(sd_event_source *s, int priority) {
1042 assert_return(s, -EINVAL);
1043 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1044 assert_return(!event_pid_changed(s->event), -ECHILD);
1046 if (s->priority == priority)
1049 s->priority = priority;
1052 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1055 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1057 if (s->type == SOURCE_QUIT)
1058 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1063 int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1064 assert_return(s, -EINVAL);
1065 assert_return(m, -EINVAL);
1066 assert_return(!event_pid_changed(s->event), -ECHILD);
1072 int sd_event_source_set_enabled(sd_event_source *s, int m) {
1075 assert_return(s, -EINVAL);
1076 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1077 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1078 assert_return(!event_pid_changed(s->event), -ECHILD);
1080 if (s->enabled == m)
1083 if (m == SD_EVENT_OFF) {
1088 r = source_io_unregister(s);
1095 case SOURCE_MONOTONIC:
1097 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1098 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1101 case SOURCE_REALTIME:
1103 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1104 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1109 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1110 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1111 event_update_signal_fd(s->event);
1119 assert(s->event->n_enabled_child_sources > 0);
1120 s->event->n_enabled_child_sources--;
1122 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1123 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1124 event_update_signal_fd(s->event);
1131 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1143 r = source_io_register(s, m, s->io.events);
1150 case SOURCE_MONOTONIC:
1152 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1153 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1156 case SOURCE_REALTIME:
1158 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1159 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1165 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1166 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1167 event_update_signal_fd(s->event);
1174 if (s->enabled == SD_EVENT_OFF) {
1175 s->event->n_enabled_child_sources++;
1177 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1178 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1179 event_update_signal_fd(s->event);
1186 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1196 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1199 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1204 int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1205 assert_return(s, -EINVAL);
1206 assert_return(usec, -EINVAL);
1207 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1208 assert_return(!event_pid_changed(s->event), -ECHILD);
1210 *usec = s->time.next;
1214 int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1215 assert_return(s, -EINVAL);
1216 assert_return(usec != (uint64_t) -1, -EINVAL);
1217 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1218 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1219 assert_return(!event_pid_changed(s->event), -ECHILD);
1221 if (s->time.next == usec)
1224 s->time.next = usec;
1226 if (s->type == SOURCE_REALTIME) {
1227 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1228 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1230 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1231 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1237 int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1238 assert_return(s, -EINVAL);
1239 assert_return(usec, -EINVAL);
1240 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1241 assert_return(!event_pid_changed(s->event), -ECHILD);
1243 *usec = s->time.accuracy;
1247 int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1248 assert_return(s, -EINVAL);
1249 assert_return(usec != (uint64_t) -1, -EINVAL);
1250 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1251 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1252 assert_return(!event_pid_changed(s->event), -ECHILD);
1255 usec = DEFAULT_ACCURACY_USEC;
1257 if (s->time.accuracy == usec)
1260 s->time.accuracy = usec;
1262 if (s->type == SOURCE_REALTIME)
1263 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1265 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1270 int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1271 assert_return(s, -EINVAL);
1272 assert_return(pid, -EINVAL);
1273 assert_return(s->type == SOURCE_CHILD, -EDOM);
1274 assert_return(!event_pid_changed(s->event), -ECHILD);
1276 *pid = s->child.pid;
1280 int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) {
1283 assert_return(s, -EINVAL);
1284 assert_return(s->type != SOURCE_QUIT, -EDOM);
1285 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1286 assert_return(!event_pid_changed(s->event), -ECHILD);
1288 if (s->prepare == callback)
1291 if (callback && s->prepare) {
1292 s->prepare = callback;
1296 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1300 s->prepare = callback;
1303 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1307 prioq_remove(s->event->prepare, s, &s->prepare_index);
1312 void* sd_event_source_get_userdata(sd_event_source *s) {
1313 assert_return(s, NULL);
1318 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1330 Find a good time to wake up again between times a and b. We
1331 have two goals here:
1333 a) We want to wake up as seldom as possible, hence prefer
1334 later times over earlier times.
1336 b) But if we have to wake up, then let's make sure to
1337 dispatch as much as possible on the entire system.
1339 We implement this by waking up everywhere at the same time
1340 within any given second if we can, synchronised via the
1341 perturbation value determined from the boot ID. If we can't,
1342 then we try to find the same spot in every a 250ms
1343 step. Otherwise, we pick the last possible time to wake up.
1346 c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1348 if (_unlikely_(c < USEC_PER_SEC))
1357 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1359 if (_unlikely_(c < USEC_PER_MSEC*250))
1362 c -= USEC_PER_MSEC*250;
1371 static int event_arm_timer(
1378 struct itimerspec its = {};
1379 sd_event_source *a, *b;
1386 a = prioq_peek(earliest);
1387 if (!a || a->enabled == SD_EVENT_OFF) {
1389 if (*next == (usec_t) -1)
1393 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1397 *next = (usec_t) -1;
1402 b = prioq_peek(latest);
1403 assert_se(b && b->enabled != SD_EVENT_OFF);
1405 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1409 assert_se(timer_fd >= 0);
1412 /* We don' want to disarm here, just mean some time looooong ago. */
1413 its.it_value.tv_sec = 0;
1414 its.it_value.tv_nsec = 1;
1416 timespec_store(&its.it_value, t);
1418 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1426 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1429 assert(s->type == SOURCE_IO);
1431 s->io.revents = events;
1434 If this is a oneshot event source, then we added it to the
1435 epoll with EPOLLONESHOT, hence we know it's not registered
1436 anymore. We can save a syscall here...
1439 if (s->enabled == SD_EVENT_ONESHOT)
1440 s->io.registered = false;
1442 return source_set_pending(s, true);
1445 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1453 assert_return(events == EPOLLIN, -EIO);
1455 ss = read(fd, &x, sizeof(x));
1457 if (errno == EAGAIN || errno == EINTR)
1463 if (ss != sizeof(x))
1466 *next = (usec_t) -1;
1471 static int process_timer(
1483 s = prioq_peek(earliest);
1486 s->enabled == SD_EVENT_OFF ||
1490 r = source_set_pending(s, true);
1494 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1495 prioq_reshuffle(latest, s, &s->time.latest_index);
1501 static int process_child(sd_event *e) {
1508 e->need_process_child = false;
1511 So, this is ugly. We iteratively invoke waitid() with P_PID
1512 + WNOHANG for each PID we wait for, instead of using
1513 P_ALL. This is because we only want to get child
1514 information of very specific child processes, and not all
1515 of them. We might not have processed the SIGCHLD even of a
1516 previous invocation and we don't want to maintain a
1517 unbounded *per-child* event queue, hence we really don't
1518 want anything flushed out of the kernel's queue that we
1519 don't care about. Since this is O(n) this means that if you
1520 have a lot of processes you probably want to handle SIGCHLD
1524 HASHMAP_FOREACH(s, e->child_sources, i) {
1525 assert(s->type == SOURCE_CHILD);
1530 if (s->enabled == SD_EVENT_OFF)
1533 zero(s->child.siginfo);
1534 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1538 if (s->child.siginfo.si_pid != 0) {
1539 r = source_set_pending(s, true);
1548 static int process_signal(sd_event *e, uint32_t events) {
1549 bool read_one = false;
1553 assert(e->signal_sources);
1555 assert_return(events == EPOLLIN, -EIO);
1558 struct signalfd_siginfo si;
1562 ss = read(e->signal_fd, &si, sizeof(si));
1564 if (errno == EAGAIN || errno == EINTR)
1570 if (ss != sizeof(si))
1575 s = e->signal_sources[si.ssi_signo];
1576 if (si.ssi_signo == SIGCHLD) {
1577 r = process_child(e);
1586 s->signal.siginfo = si;
1587 r = source_set_pending(s, true);
1596 static int source_dispatch(sd_event_source *s) {
1600 assert(s->pending || s->type == SOURCE_QUIT);
1602 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1603 r = source_set_pending(s, false);
1608 if (s->enabled == SD_EVENT_ONESHOT) {
1609 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1617 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1620 case SOURCE_MONOTONIC:
1621 r = s->time.callback(s, s->time.next, s->userdata);
1624 case SOURCE_REALTIME:
1625 r = s->time.callback(s, s->time.next, s->userdata);
1629 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1633 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1637 r = s->defer.callback(s, s->userdata);
1641 r = s->quit.callback(s, s->userdata);
1648 static int event_prepare(sd_event *e) {
1656 s = prioq_peek(e->prepare);
1657 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1660 s->prepare_iteration = e->iteration;
1661 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1666 r = s->prepare(s, s->userdata);
1675 static int dispatch_quit(sd_event *e) {
1681 p = prioq_peek(e->quit);
1682 if (!p || p->enabled == SD_EVENT_OFF) {
1683 e->state = SD_EVENT_FINISHED;
1689 e->state = SD_EVENT_QUITTING;
1691 r = source_dispatch(p);
1693 e->state = SD_EVENT_PASSIVE;
1699 static sd_event_source* event_next_pending(sd_event *e) {
1704 p = prioq_peek(e->pending);
1708 if (p->enabled == SD_EVENT_OFF)
1714 int sd_event_run(sd_event *e, uint64_t timeout) {
1715 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1719 assert_return(e, -EINVAL);
1720 assert_return(!event_pid_changed(e), -ECHILD);
1721 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1722 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1724 if (e->quit_requested)
1725 return dispatch_quit(e);
1729 e->state = SD_EVENT_RUNNING;
1731 r = event_prepare(e);
1735 if (event_next_pending(e) || e->need_process_child)
1739 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1743 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1748 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1749 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1751 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1755 dual_timestamp_get(&e->timestamp);
1757 for (i = 0; i < m; i++) {
1759 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1760 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1761 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1762 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1763 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1764 r = process_signal(e, ev_queue[i].events);
1766 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1772 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1776 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1780 if (e->need_process_child) {
1781 r = process_child(e);
1786 p = event_next_pending(e);
1792 r = source_dispatch(p);
1795 e->state = SD_EVENT_PASSIVE;
1801 int sd_event_loop(sd_event *e) {
1804 assert_return(e, -EINVAL);
1805 assert_return(!event_pid_changed(e), -ECHILD);
1806 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1810 while (e->state != SD_EVENT_FINISHED) {
1811 r = sd_event_run(e, (uint64_t) -1);
1823 int sd_event_get_state(sd_event *e) {
1824 assert_return(e, -EINVAL);
1825 assert_return(!event_pid_changed(e), -ECHILD);
1830 int sd_event_get_quit(sd_event *e) {
1831 assert_return(e, -EINVAL);
1832 assert_return(!event_pid_changed(e), -ECHILD);
1834 return e->quit_requested;
1837 int sd_event_request_quit(sd_event *e) {
1838 assert_return(e, -EINVAL);
1839 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1840 assert_return(!event_pid_changed(e), -ECHILD);
1842 e->quit_requested = true;
1846 int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1847 assert_return(e, -EINVAL);
1848 assert_return(usec, -EINVAL);
1849 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1850 assert_return(!event_pid_changed(e), -ECHILD);
1852 *usec = e->timestamp.realtime;
1856 int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1857 assert_return(e, -EINVAL);
1858 assert_return(usec, -EINVAL);
1859 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1860 assert_return(!event_pid_changed(e), -ECHILD);
1862 *usec = e->timestamp.monotonic;