1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
30 #include "time-util.h"
35 #define EPOLL_QUEUE_MAX 64
36 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38 typedef enum EventSourceType {
48 struct sd_event_source {
53 sd_prepare_handler_t prepare;
55 EventSourceType type:4;
60 unsigned pending_index;
61 unsigned prepare_index;
62 unsigned pending_iteration;
63 unsigned prepare_iteration;
67 sd_io_handler_t callback;
74 sd_time_handler_t callback;
75 usec_t next, accuracy;
76 unsigned earliest_index;
77 unsigned latest_index;
80 sd_signal_handler_t callback;
81 struct signalfd_siginfo siginfo;
85 sd_child_handler_t callback;
91 sd_defer_handler_t callback;
94 sd_quit_handler_t callback;
111 /* For both clocks we maintain two priority queues each, one
112 * ordered for the earliest times the events may be
113 * dispatched, and one ordered by the latest times they must
114 * have been dispatched. The range between the top entries in
115 * the two prioqs is the time window we can freely schedule
117 Prioq *monotonic_earliest;
118 Prioq *monotonic_latest;
119 Prioq *realtime_earliest;
120 Prioq *realtime_latest;
122 usec_t realtime_next, monotonic_next;
126 sd_event_source **signal_sources;
128 Hashmap *child_sources;
129 unsigned n_enabled_child_sources;
136 dual_timestamp timestamp;
139 bool quit_requested:1;
140 bool need_process_child:1;
143 static int pending_prioq_compare(const void *a, const void *b) {
144 const sd_event_source *x = a, *y = b;
149 /* Enabled ones first */
150 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
152 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
155 /* Lower priority values first */
156 if (x->priority < y->priority)
158 if (x->priority > y->priority)
161 /* Older entries first */
162 if (x->pending_iteration < y->pending_iteration)
164 if (x->pending_iteration > y->pending_iteration)
167 /* Stability for the rest */
176 static int prepare_prioq_compare(const void *a, const void *b) {
177 const sd_event_source *x = a, *y = b;
182 /* Move most recently prepared ones last, so that we can stop
183 * preparing as soon as we hit one that has already been
184 * prepared in the current iteration */
185 if (x->prepare_iteration < y->prepare_iteration)
187 if (x->prepare_iteration > y->prepare_iteration)
190 /* Enabled ones first */
191 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
193 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
196 /* Lower priority values first */
197 if (x->priority < y->priority)
199 if (x->priority > y->priority)
202 /* Stability for the rest */
211 static int earliest_time_prioq_compare(const void *a, const void *b) {
212 const sd_event_source *x = a, *y = b;
214 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
215 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
217 /* Enabled ones first */
218 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
220 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
223 /* Move the pending ones to the end */
224 if (!x->pending && y->pending)
226 if (x->pending && !y->pending)
230 if (x->time.next < y->time.next)
232 if (x->time.next > y->time.next)
235 /* Stability for the rest */
244 static int latest_time_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
247 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
248 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Move the pending ones to the end */
257 if (!x->pending && y->pending)
259 if (x->pending && !y->pending)
263 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
265 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
268 /* Stability for the rest */
277 static int quit_prioq_compare(const void *a, const void *b) {
278 const sd_event_source *x = a, *y = b;
280 assert(x->type == SOURCE_QUIT);
281 assert(y->type == SOURCE_QUIT);
283 /* Enabled ones first */
284 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
286 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
289 /* Lower priority values first */
290 if (x->priority < y->priority)
292 if (x->priority > y->priority)
295 /* Stability for the rest */
304 static void event_free(sd_event *e) {
307 if (e->epoll_fd >= 0)
308 close_nointr_nofail(e->epoll_fd);
310 if (e->signal_fd >= 0)
311 close_nointr_nofail(e->signal_fd);
313 if (e->realtime_fd >= 0)
314 close_nointr_nofail(e->realtime_fd);
316 if (e->monotonic_fd >= 0)
317 close_nointr_nofail(e->monotonic_fd);
319 prioq_free(e->pending);
320 prioq_free(e->prepare);
321 prioq_free(e->monotonic_earliest);
322 prioq_free(e->monotonic_latest);
323 prioq_free(e->realtime_earliest);
324 prioq_free(e->realtime_latest);
327 free(e->signal_sources);
329 hashmap_free(e->child_sources);
333 _public_ int sd_event_new(sd_event** ret) {
337 assert_return(ret, -EINVAL);
339 e = new0(sd_event, 1);
344 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
345 e->realtime_next = e->monotonic_next = (usec_t) -1;
346 e->original_pid = getpid();
348 assert_se(sigemptyset(&e->sigset) == 0);
350 e->pending = prioq_new(pending_prioq_compare);
356 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
357 if (e->epoll_fd < 0) {
370 _public_ sd_event* sd_event_ref(sd_event *e) {
371 assert_return(e, NULL);
373 assert(e->n_ref >= 1);
379 _public_ sd_event* sd_event_unref(sd_event *e) {
380 assert_return(e, NULL);
382 assert(e->n_ref >= 1);
391 static bool event_pid_changed(sd_event *e) {
394 /* We don't support people creating am event loop and keeping
395 * it around over a fork(). Let's complain. */
397 return e->original_pid != getpid();
400 static int source_io_unregister(sd_event_source *s) {
404 assert(s->type == SOURCE_IO);
406 if (!s->io.registered)
409 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
413 s->io.registered = false;
417 static int source_io_register(
422 struct epoll_event ev = {};
426 assert(s->type == SOURCE_IO);
427 assert(enabled != SD_EVENT_OFF);
432 if (enabled == SD_EVENT_ONESHOT)
433 ev.events |= EPOLLONESHOT;
435 if (s->io.registered)
436 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
438 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
443 s->io.registered = true;
448 static void source_free(sd_event_source *s) {
456 source_io_unregister(s);
460 case SOURCE_MONOTONIC:
461 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
462 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
465 case SOURCE_REALTIME:
466 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
467 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
471 if (s->signal.sig > 0) {
472 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
473 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
475 if (s->event->signal_sources)
476 s->event->signal_sources[s->signal.sig] = NULL;
482 if (s->child.pid > 0) {
483 if (s->enabled != SD_EVENT_OFF) {
484 assert(s->event->n_enabled_child_sources > 0);
485 s->event->n_enabled_child_sources--;
488 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
489 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
491 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
501 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
506 prioq_remove(s->event->pending, s, &s->pending_index);
509 prioq_remove(s->event->prepare, s, &s->prepare_index);
511 sd_event_unref(s->event);
517 static int source_set_pending(sd_event_source *s, bool b) {
521 assert(s->type != SOURCE_QUIT);
529 s->pending_iteration = s->event->iteration;
531 r = prioq_put(s->event->pending, s, &s->pending_index);
537 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
542 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
547 s = new0(sd_event_source, 1);
552 s->event = sd_event_ref(e);
554 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
559 _public_ int sd_event_add_io(
563 sd_io_handler_t callback,
565 sd_event_source **ret) {
570 assert_return(e, -EINVAL);
571 assert_return(fd >= 0, -EINVAL);
572 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
573 assert_return(callback, -EINVAL);
574 assert_return(ret, -EINVAL);
575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
576 assert_return(!event_pid_changed(e), -ECHILD);
578 s = source_new(e, SOURCE_IO);
583 s->io.events = events;
584 s->io.callback = callback;
585 s->userdata = userdata;
586 s->enabled = SD_EVENT_ON;
588 r = source_io_register(s, s->enabled, events);
598 static int event_setup_timer_fd(
600 EventSourceType type,
604 struct epoll_event ev = {};
611 if (_likely_(*timer_fd >= 0))
614 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
619 ev.data.ptr = INT_TO_PTR(type);
621 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
623 close_nointr_nofail(fd);
627 /* When we sleep for longer, we try to realign the wakeup to
628 the same time wihtin each second, so that events all across
629 the system can be coalesced into a single CPU
630 wakeup. However, let's take some system-specific randomness
631 for this value, so that in a network of systems with synced
632 clocks timer events are distributed a bit. Here, we
633 calculate a perturbation usec offset from the boot ID. */
635 if (sd_id128_get_boot(&bootid) >= 0)
636 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
642 static int event_add_time_internal(
644 EventSourceType type,
651 sd_time_handler_t callback,
653 sd_event_source **ret) {
658 assert_return(e, -EINVAL);
659 assert_return(callback, -EINVAL);
660 assert_return(ret, -EINVAL);
661 assert_return(usec != (uint64_t) -1, -EINVAL);
662 assert_return(accuracy != (uint64_t) -1, -EINVAL);
663 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
664 assert_return(!event_pid_changed(e), -ECHILD);
671 *earliest = prioq_new(earliest_time_prioq_compare);
677 *latest = prioq_new(latest_time_prioq_compare);
683 r = event_setup_timer_fd(e, type, timer_fd, id);
688 s = source_new(e, type);
693 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
694 s->time.callback = callback;
695 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
696 s->userdata = userdata;
697 s->enabled = SD_EVENT_ONESHOT;
699 r = prioq_put(*earliest, s, &s->time.earliest_index);
703 r = prioq_put(*latest, s, &s->time.latest_index);
715 _public_ int sd_event_add_monotonic(sd_event *e,
718 sd_time_handler_t callback,
720 sd_event_source **ret) {
722 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
725 _public_ int sd_event_add_realtime(sd_event *e,
728 sd_time_handler_t callback,
730 sd_event_source **ret) {
732 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
735 static int event_update_signal_fd(sd_event *e) {
736 struct epoll_event ev = {};
742 add_to_epoll = e->signal_fd < 0;
744 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
754 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
756 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
758 close_nointr_nofail(e->signal_fd);
767 _public_ int sd_event_add_signal(
770 sd_signal_handler_t callback,
772 sd_event_source **ret) {
777 assert_return(e, -EINVAL);
778 assert_return(sig > 0, -EINVAL);
779 assert_return(sig < _NSIG, -EINVAL);
780 assert_return(callback, -EINVAL);
781 assert_return(ret, -EINVAL);
782 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
783 assert_return(!event_pid_changed(e), -ECHILD);
785 if (!e->signal_sources) {
786 e->signal_sources = new0(sd_event_source*, _NSIG);
787 if (!e->signal_sources)
789 } else if (e->signal_sources[sig])
792 s = source_new(e, SOURCE_SIGNAL);
797 s->signal.callback = callback;
798 s->userdata = userdata;
799 s->enabled = SD_EVENT_ON;
801 e->signal_sources[sig] = s;
802 assert_se(sigaddset(&e->sigset, sig) == 0);
804 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
805 r = event_update_signal_fd(e);
816 _public_ int sd_event_add_child(
820 sd_child_handler_t callback,
822 sd_event_source **ret) {
827 assert_return(e, -EINVAL);
828 assert_return(pid > 1, -EINVAL);
829 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
830 assert_return(options != 0, -EINVAL);
831 assert_return(callback, -EINVAL);
832 assert_return(ret, -EINVAL);
833 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
834 assert_return(!event_pid_changed(e), -ECHILD);
836 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
840 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
843 s = source_new(e, SOURCE_CHILD);
848 s->child.options = options;
849 s->child.callback = callback;
850 s->userdata = userdata;
851 s->enabled = SD_EVENT_ONESHOT;
853 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
859 e->n_enabled_child_sources ++;
861 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
863 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
864 r = event_update_signal_fd(e);
871 e->need_process_child = true;
877 _public_ int sd_event_add_defer(
879 sd_defer_handler_t callback,
881 sd_event_source **ret) {
886 assert_return(e, -EINVAL);
887 assert_return(callback, -EINVAL);
888 assert_return(ret, -EINVAL);
889 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
890 assert_return(!event_pid_changed(e), -ECHILD);
892 s = source_new(e, SOURCE_DEFER);
896 s->defer.callback = callback;
897 s->userdata = userdata;
898 s->enabled = SD_EVENT_ONESHOT;
900 r = source_set_pending(s, true);
910 _public_ int sd_event_add_quit(
912 sd_quit_handler_t callback,
914 sd_event_source **ret) {
919 assert_return(e, -EINVAL);
920 assert_return(callback, -EINVAL);
921 assert_return(ret, -EINVAL);
922 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
923 assert_return(!event_pid_changed(e), -ECHILD);
926 e->quit = prioq_new(quit_prioq_compare);
931 s = source_new(e, SOURCE_QUIT);
935 s->quit.callback = callback;
936 s->userdata = userdata;
937 s->quit.prioq_index = PRIOQ_IDX_NULL;
938 s->enabled = SD_EVENT_ONESHOT;
940 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
950 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
951 assert_return(s, NULL);
953 assert(s->n_ref >= 1);
959 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
960 assert_return(s, NULL);
962 assert(s->n_ref >= 1);
971 _public_ sd_event *sd_event_get(sd_event_source *s) {
972 assert_return(s, NULL);
977 _public_ int sd_event_source_get_pending(sd_event_source *s) {
978 assert_return(s, -EINVAL);
979 assert_return(s->type != SOURCE_QUIT, -EDOM);
980 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
981 assert_return(!event_pid_changed(s->event), -ECHILD);
986 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
987 assert_return(s, -EINVAL);
988 assert_return(s->type == SOURCE_IO, -EDOM);
989 assert_return(!event_pid_changed(s->event), -ECHILD);
994 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
995 assert_return(s, -EINVAL);
996 assert_return(events, -EINVAL);
997 assert_return(s->type == SOURCE_IO, -EDOM);
998 assert_return(!event_pid_changed(s->event), -ECHILD);
1000 *events = s->io.events;
1004 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1007 assert_return(s, -EINVAL);
1008 assert_return(s->type == SOURCE_IO, -EDOM);
1009 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
1010 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1011 assert_return(!event_pid_changed(s->event), -ECHILD);
1013 if (s->io.events == events)
1016 if (s->enabled != SD_EVENT_OFF) {
1017 r = source_io_register(s, s->enabled, events);
1022 s->io.events = events;
1027 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1028 assert_return(s, -EINVAL);
1029 assert_return(revents, -EINVAL);
1030 assert_return(s->type == SOURCE_IO, -EDOM);
1031 assert_return(s->pending, -ENODATA);
1032 assert_return(!event_pid_changed(s->event), -ECHILD);
1034 *revents = s->io.revents;
1038 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1039 assert_return(s, -EINVAL);
1040 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1041 assert_return(!event_pid_changed(s->event), -ECHILD);
1043 return s->signal.sig;
1046 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1047 assert_return(s, -EINVAL);
1048 assert_return(!event_pid_changed(s->event), -ECHILD);
1053 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1054 assert_return(s, -EINVAL);
1055 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1056 assert_return(!event_pid_changed(s->event), -ECHILD);
1058 if (s->priority == priority)
1061 s->priority = priority;
1064 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1067 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1069 if (s->type == SOURCE_QUIT)
1070 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1075 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1076 assert_return(s, -EINVAL);
1077 assert_return(m, -EINVAL);
1078 assert_return(!event_pid_changed(s->event), -ECHILD);
1084 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1087 assert_return(s, -EINVAL);
1088 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1089 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1090 assert_return(!event_pid_changed(s->event), -ECHILD);
1092 if (s->enabled == m)
1095 if (m == SD_EVENT_OFF) {
1100 r = source_io_unregister(s);
1107 case SOURCE_MONOTONIC:
1109 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1110 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1113 case SOURCE_REALTIME:
1115 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1116 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1121 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1122 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1123 event_update_signal_fd(s->event);
1131 assert(s->event->n_enabled_child_sources > 0);
1132 s->event->n_enabled_child_sources--;
1134 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1135 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1136 event_update_signal_fd(s->event);
1143 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1155 r = source_io_register(s, m, s->io.events);
1162 case SOURCE_MONOTONIC:
1164 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1165 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1168 case SOURCE_REALTIME:
1170 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1171 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1177 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1178 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1179 event_update_signal_fd(s->event);
1186 if (s->enabled == SD_EVENT_OFF) {
1187 s->event->n_enabled_child_sources++;
1189 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1190 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1191 event_update_signal_fd(s->event);
1198 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1208 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1211 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1216 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1217 assert_return(s, -EINVAL);
1218 assert_return(usec, -EINVAL);
1219 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1220 assert_return(!event_pid_changed(s->event), -ECHILD);
1222 *usec = s->time.next;
1226 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1227 assert_return(s, -EINVAL);
1228 assert_return(usec != (uint64_t) -1, -EINVAL);
1229 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1230 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1231 assert_return(!event_pid_changed(s->event), -ECHILD);
1233 if (s->time.next == usec)
1236 s->time.next = usec;
1238 if (s->type == SOURCE_REALTIME) {
1239 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1240 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1242 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1243 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1249 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1250 assert_return(s, -EINVAL);
1251 assert_return(usec, -EINVAL);
1252 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1253 assert_return(!event_pid_changed(s->event), -ECHILD);
1255 *usec = s->time.accuracy;
1259 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1260 assert_return(s, -EINVAL);
1261 assert_return(usec != (uint64_t) -1, -EINVAL);
1262 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1263 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1264 assert_return(!event_pid_changed(s->event), -ECHILD);
1267 usec = DEFAULT_ACCURACY_USEC;
1269 if (s->time.accuracy == usec)
1272 s->time.accuracy = usec;
1274 if (s->type == SOURCE_REALTIME)
1275 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1277 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1282 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1283 assert_return(s, -EINVAL);
1284 assert_return(pid, -EINVAL);
1285 assert_return(s->type == SOURCE_CHILD, -EDOM);
1286 assert_return(!event_pid_changed(s->event), -ECHILD);
1288 *pid = s->child.pid;
1292 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) {
1295 assert_return(s, -EINVAL);
1296 assert_return(s->type != SOURCE_QUIT, -EDOM);
1297 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1298 assert_return(!event_pid_changed(s->event), -ECHILD);
1300 if (s->prepare == callback)
1303 if (callback && s->prepare) {
1304 s->prepare = callback;
1308 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1312 s->prepare = callback;
1315 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1319 prioq_remove(s->event->prepare, s, &s->prepare_index);
1324 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1325 assert_return(s, NULL);
1330 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1342 Find a good time to wake up again between times a and b. We
1343 have two goals here:
1345 a) We want to wake up as seldom as possible, hence prefer
1346 later times over earlier times.
1348 b) But if we have to wake up, then let's make sure to
1349 dispatch as much as possible on the entire system.
1351 We implement this by waking up everywhere at the same time
1352 within any given second if we can, synchronised via the
1353 perturbation value determined from the boot ID. If we can't,
1354 then we try to find the same spot in every a 250ms
1355 step. Otherwise, we pick the last possible time to wake up.
1358 c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1360 if (_unlikely_(c < USEC_PER_SEC))
1369 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1371 if (_unlikely_(c < USEC_PER_MSEC*250))
1374 c -= USEC_PER_MSEC*250;
1383 static int event_arm_timer(
1390 struct itimerspec its = {};
1391 sd_event_source *a, *b;
1398 a = prioq_peek(earliest);
1399 if (!a || a->enabled == SD_EVENT_OFF) {
1401 if (*next == (usec_t) -1)
1405 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1409 *next = (usec_t) -1;
1414 b = prioq_peek(latest);
1415 assert_se(b && b->enabled != SD_EVENT_OFF);
1417 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1421 assert_se(timer_fd >= 0);
1424 /* We don' want to disarm here, just mean some time looooong ago. */
1425 its.it_value.tv_sec = 0;
1426 its.it_value.tv_nsec = 1;
1428 timespec_store(&its.it_value, t);
1430 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1438 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1441 assert(s->type == SOURCE_IO);
1443 s->io.revents = events;
1445 return source_set_pending(s, true);
1448 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1456 assert_return(events == EPOLLIN, -EIO);
1458 ss = read(fd, &x, sizeof(x));
1460 if (errno == EAGAIN || errno == EINTR)
1466 if (ss != sizeof(x))
1469 *next = (usec_t) -1;
1474 static int process_timer(
1486 s = prioq_peek(earliest);
1489 s->enabled == SD_EVENT_OFF ||
1493 r = source_set_pending(s, true);
1497 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1498 prioq_reshuffle(latest, s, &s->time.latest_index);
1504 static int process_child(sd_event *e) {
1511 e->need_process_child = false;
1514 So, this is ugly. We iteratively invoke waitid() with P_PID
1515 + WNOHANG for each PID we wait for, instead of using
1516 P_ALL. This is because we only want to get child
1517 information of very specific child processes, and not all
1518 of them. We might not have processed the SIGCHLD even of a
1519 previous invocation and we don't want to maintain a
1520 unbounded *per-child* event queue, hence we really don't
1521 want anything flushed out of the kernel's queue that we
1522 don't care about. Since this is O(n) this means that if you
1523 have a lot of processes you probably want to handle SIGCHLD
1527 HASHMAP_FOREACH(s, e->child_sources, i) {
1528 assert(s->type == SOURCE_CHILD);
1533 if (s->enabled == SD_EVENT_OFF)
1536 zero(s->child.siginfo);
1537 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1541 if (s->child.siginfo.si_pid != 0) {
1542 r = source_set_pending(s, true);
1551 static int process_signal(sd_event *e, uint32_t events) {
1552 bool read_one = false;
1556 assert(e->signal_sources);
1558 assert_return(events == EPOLLIN, -EIO);
1561 struct signalfd_siginfo si;
1565 ss = read(e->signal_fd, &si, sizeof(si));
1567 if (errno == EAGAIN || errno == EINTR)
1573 if (ss != sizeof(si))
1578 s = e->signal_sources[si.ssi_signo];
1579 if (si.ssi_signo == SIGCHLD) {
1580 r = process_child(e);
1589 s->signal.siginfo = si;
1590 r = source_set_pending(s, true);
1599 static int source_dispatch(sd_event_source *s) {
1603 assert(s->pending || s->type == SOURCE_QUIT);
1605 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1606 r = source_set_pending(s, false);
1611 if (s->enabled == SD_EVENT_ONESHOT) {
1612 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1617 sd_event_source_ref(s);
1622 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1625 case SOURCE_MONOTONIC:
1626 r = s->time.callback(s, s->time.next, s->userdata);
1629 case SOURCE_REALTIME:
1630 r = s->time.callback(s, s->time.next, s->userdata);
1634 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1638 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1642 r = s->defer.callback(s, s->userdata);
1646 r = s->quit.callback(s, s->userdata);
1650 sd_event_source_unref(s);
1655 static int event_prepare(sd_event *e) {
1663 s = prioq_peek(e->prepare);
1664 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1667 s->prepare_iteration = e->iteration;
1668 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1673 r = s->prepare(s, s->userdata);
1682 static int dispatch_quit(sd_event *e) {
1688 p = prioq_peek(e->quit);
1689 if (!p || p->enabled == SD_EVENT_OFF) {
1690 e->state = SD_EVENT_FINISHED;
1696 e->state = SD_EVENT_QUITTING;
1698 r = source_dispatch(p);
1700 e->state = SD_EVENT_PASSIVE;
1706 static sd_event_source* event_next_pending(sd_event *e) {
1711 p = prioq_peek(e->pending);
1715 if (p->enabled == SD_EVENT_OFF)
1721 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1722 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1726 assert_return(e, -EINVAL);
1727 assert_return(!event_pid_changed(e), -ECHILD);
1728 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1729 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1731 if (e->quit_requested)
1732 return dispatch_quit(e);
1736 e->state = SD_EVENT_RUNNING;
1738 r = event_prepare(e);
1742 if (event_next_pending(e) || e->need_process_child)
1746 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1750 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1755 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1756 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1758 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1762 dual_timestamp_get(&e->timestamp);
1764 for (i = 0; i < m; i++) {
1766 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1767 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1768 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1769 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1770 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1771 r = process_signal(e, ev_queue[i].events);
1773 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1779 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1783 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1787 if (e->need_process_child) {
1788 r = process_child(e);
1793 p = event_next_pending(e);
1799 r = source_dispatch(p);
1802 e->state = SD_EVENT_PASSIVE;
1808 _public_ int sd_event_loop(sd_event *e) {
1811 assert_return(e, -EINVAL);
1812 assert_return(!event_pid_changed(e), -ECHILD);
1813 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1817 while (e->state != SD_EVENT_FINISHED) {
1818 r = sd_event_run(e, (uint64_t) -1);
1830 _public_ int sd_event_get_state(sd_event *e) {
1831 assert_return(e, -EINVAL);
1832 assert_return(!event_pid_changed(e), -ECHILD);
1837 _public_ int sd_event_get_quit(sd_event *e) {
1838 assert_return(e, -EINVAL);
1839 assert_return(!event_pid_changed(e), -ECHILD);
1841 return e->quit_requested;
1844 _public_ int sd_event_request_quit(sd_event *e) {
1845 assert_return(e, -EINVAL);
1846 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1847 assert_return(!event_pid_changed(e), -ECHILD);
1849 e->quit_requested = true;
1853 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1854 assert_return(e, -EINVAL);
1855 assert_return(usec, -EINVAL);
1856 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1857 assert_return(!event_pid_changed(e), -ECHILD);
1859 *usec = e->timestamp.realtime;
1863 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1864 assert_return(e, -EINVAL);
1865 assert_return(usec, -EINVAL);
1866 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1867 assert_return(!event_pid_changed(e), -ECHILD);
1869 *usec = e->timestamp.monotonic;