2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
24 #include "sd-daemon.h"
28 #include "alloc-util.h"
35 #include "process-util.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType {
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
62 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType {
87 _WAKEUP_TYPE_INVALID = -1,
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92 struct sd_event_source {
99 sd_event_handler_t prepare;
103 EventSourceType type:5;
110 unsigned pending_index;
111 unsigned prepare_index;
112 uint64_t pending_iteration;
113 uint64_t prepare_iteration;
115 LIST_FIELDS(sd_event_source, sources);
119 sd_event_io_handler_t callback;
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
137 sd_event_child_handler_t callback;
143 sd_event_handler_t callback;
146 sd_event_handler_t callback;
149 sd_event_handler_t callback;
150 unsigned prioq_index;
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source *current;
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
199 struct clock_data boottime;
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
219 triple_timestamp timestamp;
222 bool exit_requested:1;
223 bool need_process_child:1;
225 bool profile_delays:1;
230 sd_event **default_event_ptr;
232 usec_t watchdog_last, watchdog_period;
236 LIST_HEAD(sd_event_source, sources);
238 usec_t last_run, last_log;
239 unsigned delays[sizeof(usec_t) * 8];
242 static void source_disconnect(sd_event_source *s);
244 static int pending_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Lower priority values first */
257 if (x->priority < y->priority)
259 if (x->priority > y->priority)
262 /* Older entries first */
263 if (x->pending_iteration < y->pending_iteration)
265 if (x->pending_iteration > y->pending_iteration)
271 static int prepare_prioq_compare(const void *a, const void *b) {
272 const sd_event_source *x = a, *y = b;
277 /* Enabled ones first */
278 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
280 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
283 /* Move most recently prepared ones last, so that we can stop
284 * preparing as soon as we hit one that has already been
285 * prepared in the current iteration */
286 if (x->prepare_iteration < y->prepare_iteration)
288 if (x->prepare_iteration > y->prepare_iteration)
291 /* Lower priority values first */
292 if (x->priority < y->priority)
294 if (x->priority > y->priority)
300 static int earliest_time_prioq_compare(const void *a, const void *b) {
301 const sd_event_source *x = a, *y = b;
303 assert(EVENT_SOURCE_IS_TIME(x->type));
304 assert(x->type == y->type);
306 /* Enabled ones first */
307 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
309 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
312 /* Move the pending ones to the end */
313 if (!x->pending && y->pending)
315 if (x->pending && !y->pending)
319 if (x->time.next < y->time.next)
321 if (x->time.next > y->time.next)
327 static usec_t time_event_source_latest(const sd_event_source *s) {
328 return usec_add(s->time.next, s->time.accuracy);
331 static int latest_time_prioq_compare(const void *a, const void *b) {
332 const sd_event_source *x = a, *y = b;
334 assert(EVENT_SOURCE_IS_TIME(x->type));
335 assert(x->type == y->type);
337 /* Enabled ones first */
338 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
340 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
343 /* Move the pending ones to the end */
344 if (!x->pending && y->pending)
346 if (x->pending && !y->pending)
350 if (time_event_source_latest(x) < time_event_source_latest(y))
352 if (time_event_source_latest(x) > time_event_source_latest(y))
358 static int exit_prioq_compare(const void *a, const void *b) {
359 const sd_event_source *x = a, *y = b;
361 assert(x->type == SOURCE_EXIT);
362 assert(y->type == SOURCE_EXIT);
364 /* Enabled ones first */
365 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
367 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
370 /* Lower priority values first */
371 if (x->priority < y->priority)
373 if (x->priority > y->priority)
379 static void free_clock_data(struct clock_data *d) {
381 assert(d->wakeup == WAKEUP_CLOCK_DATA);
384 prioq_free(d->earliest);
385 prioq_free(d->latest);
388 static void event_free(sd_event *e) {
393 while ((s = e->sources)) {
395 source_disconnect(s);
396 sd_event_source_unref(s);
399 assert(e->n_sources == 0);
401 if (e->default_event_ptr)
402 *(e->default_event_ptr) = NULL;
404 safe_close(e->epoll_fd);
405 safe_close(e->watchdog_fd);
407 free_clock_data(&e->realtime);
408 free_clock_data(&e->boottime);
409 free_clock_data(&e->monotonic);
410 free_clock_data(&e->realtime_alarm);
411 free_clock_data(&e->boottime_alarm);
413 prioq_free(e->pending);
414 prioq_free(e->prepare);
417 free(e->signal_sources);
418 hashmap_free(e->signal_data);
420 hashmap_free(e->child_sources);
421 set_free(e->post_sources);
425 _public_ int sd_event_new(sd_event** ret) {
429 assert_return(ret, -EINVAL);
431 e = new0(sd_event, 1);
436 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
437 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
438 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
439 e->original_pid = getpid();
440 e->perturb = USEC_INFINITY;
442 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
446 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
447 if (e->epoll_fd < 0) {
452 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
453 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
454 e->profile_delays = true;
465 _public_ sd_event* sd_event_ref(sd_event *e) {
470 assert(e->n_ref >= 1);
476 _public_ sd_event* sd_event_unref(sd_event *e) {
481 assert(e->n_ref >= 1);
490 static bool event_pid_changed(sd_event *e) {
493 /* We don't support people creating an event loop and keeping
494 * it around over a fork(). Let's complain. */
496 return e->original_pid != getpid();
499 static void source_io_unregister(sd_event_source *s) {
503 assert(s->type == SOURCE_IO);
505 if (event_pid_changed(s->event))
508 if (!s->io.registered)
511 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
514 strna(s->description), event_source_type_to_string(s->type));
516 s->io.registered = false;
519 static int source_io_register(
524 struct epoll_event ev = {};
528 assert(s->type == SOURCE_IO);
529 assert(enabled != SD_EVENT_OFF);
534 if (enabled == SD_EVENT_ONESHOT)
535 ev.events |= EPOLLONESHOT;
537 if (s->io.registered)
538 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
544 s->io.registered = true;
549 #if 0 /// UNNEEDED by elogind
550 static clockid_t event_source_type_to_clock(EventSourceType t) {
554 case SOURCE_TIME_REALTIME:
555 return CLOCK_REALTIME;
557 case SOURCE_TIME_BOOTTIME:
558 return CLOCK_BOOTTIME;
560 case SOURCE_TIME_MONOTONIC:
561 return CLOCK_MONOTONIC;
563 case SOURCE_TIME_REALTIME_ALARM:
564 return CLOCK_REALTIME_ALARM;
566 case SOURCE_TIME_BOOTTIME_ALARM:
567 return CLOCK_BOOTTIME_ALARM;
570 return (clockid_t) -1;
575 static EventSourceType clock_to_event_source_type(clockid_t clock) {
580 return SOURCE_TIME_REALTIME;
583 return SOURCE_TIME_BOOTTIME;
585 case CLOCK_MONOTONIC:
586 return SOURCE_TIME_MONOTONIC;
588 case CLOCK_REALTIME_ALARM:
589 return SOURCE_TIME_REALTIME_ALARM;
591 case CLOCK_BOOTTIME_ALARM:
592 return SOURCE_TIME_BOOTTIME_ALARM;
595 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
599 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
604 case SOURCE_TIME_REALTIME:
607 case SOURCE_TIME_BOOTTIME:
610 case SOURCE_TIME_MONOTONIC:
611 return &e->monotonic;
613 case SOURCE_TIME_REALTIME_ALARM:
614 return &e->realtime_alarm;
616 case SOURCE_TIME_BOOTTIME_ALARM:
617 return &e->boottime_alarm;
624 static int event_make_signal_data(
627 struct signal_data **ret) {
629 struct epoll_event ev = {};
630 struct signal_data *d;
638 if (event_pid_changed(e))
641 if (e->signal_sources && e->signal_sources[sig])
642 priority = e->signal_sources[sig]->priority;
646 d = hashmap_get(e->signal_data, &priority);
648 if (sigismember(&d->sigset, sig) > 0) {
654 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
658 d = new0(struct signal_data, 1);
662 d->wakeup = WAKEUP_SIGNAL_DATA;
664 d->priority = priority;
666 r = hashmap_put(e->signal_data, &d->priority, d);
676 assert_se(sigaddset(&ss_copy, sig) >= 0);
678 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
697 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
710 d->fd = safe_close(d->fd);
711 hashmap_remove(e->signal_data, &d->priority);
718 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
722 /* Turns off the specified signal in the signal data
723 * object. If the signal mask of the object becomes empty that
726 if (sigismember(&d->sigset, sig) == 0)
729 assert_se(sigdelset(&d->sigset, sig) >= 0);
731 if (sigisemptyset(&d->sigset)) {
733 /* If all the mask is all-zero we can get rid of the structure */
734 hashmap_remove(e->signal_data, &d->priority);
743 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
744 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
747 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
748 struct signal_data *d;
749 static const int64_t zero_priority = 0;
753 /* Rechecks if the specified signal is still something we are
754 * interested in. If not, we'll unmask it, and possibly drop
755 * the signalfd for it. */
757 if (sig == SIGCHLD &&
758 e->n_enabled_child_sources > 0)
761 if (e->signal_sources &&
762 e->signal_sources[sig] &&
763 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
767 * The specified signal might be enabled in three different queues:
769 * 1) the one that belongs to the priority passed (if it is non-NULL)
770 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
771 * 3) the 0 priority (to cover the SIGCHLD case)
773 * Hence, let's remove it from all three here.
777 d = hashmap_get(e->signal_data, priority);
779 event_unmask_signal_data(e, d, sig);
782 if (e->signal_sources && e->signal_sources[sig]) {
783 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
785 event_unmask_signal_data(e, d, sig);
788 d = hashmap_get(e->signal_data, &zero_priority);
790 event_unmask_signal_data(e, d, sig);
793 static void source_disconnect(sd_event_source *s) {
801 assert(s->event->n_sources > 0);
807 source_io_unregister(s);
811 case SOURCE_TIME_REALTIME:
812 case SOURCE_TIME_BOOTTIME:
813 case SOURCE_TIME_MONOTONIC:
814 case SOURCE_TIME_REALTIME_ALARM:
815 case SOURCE_TIME_BOOTTIME_ALARM: {
816 struct clock_data *d;
818 d = event_get_clock_data(s->event, s->type);
821 prioq_remove(d->earliest, s, &s->time.earliest_index);
822 prioq_remove(d->latest, s, &s->time.latest_index);
823 d->needs_rearm = true;
828 if (s->signal.sig > 0) {
830 if (s->event->signal_sources)
831 s->event->signal_sources[s->signal.sig] = NULL;
833 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
839 if (s->child.pid > 0) {
840 if (s->enabled != SD_EVENT_OFF) {
841 assert(s->event->n_enabled_child_sources > 0);
842 s->event->n_enabled_child_sources--;
845 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
846 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
856 set_remove(s->event->post_sources, s);
860 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
864 assert_not_reached("Wut? I shouldn't exist.");
868 prioq_remove(s->event->pending, s, &s->pending_index);
871 prioq_remove(s->event->prepare, s, &s->prepare_index);
875 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
877 LIST_REMOVE(sources, event->sources, s);
881 sd_event_unref(event);
884 static void source_free(sd_event_source *s) {
887 source_disconnect(s);
888 free(s->description);
892 static int source_set_pending(sd_event_source *s, bool b) {
896 assert(s->type != SOURCE_EXIT);
904 s->pending_iteration = s->event->iteration;
906 r = prioq_put(s->event->pending, s, &s->pending_index);
912 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
914 if (EVENT_SOURCE_IS_TIME(s->type)) {
915 struct clock_data *d;
917 d = event_get_clock_data(s->event, s->type);
920 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
921 prioq_reshuffle(d->latest, s, &s->time.latest_index);
922 d->needs_rearm = true;
925 if (s->type == SOURCE_SIGNAL && !b) {
926 struct signal_data *d;
928 d = hashmap_get(s->event->signal_data, &s->priority);
929 if (d && d->current == s)
936 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
941 s = new0(sd_event_source, 1);
947 s->floating = floating;
949 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
954 LIST_PREPEND(sources, e->sources, s);
960 _public_ int sd_event_add_io(
962 sd_event_source **ret,
965 sd_event_io_handler_t callback,
971 assert_return(e, -EINVAL);
972 assert_return(fd >= 0, -EBADF);
973 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
974 assert_return(callback, -EINVAL);
975 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
976 assert_return(!event_pid_changed(e), -ECHILD);
978 s = source_new(e, !ret, SOURCE_IO);
982 s->wakeup = WAKEUP_EVENT_SOURCE;
984 s->io.events = events;
985 s->io.callback = callback;
986 s->userdata = userdata;
987 s->enabled = SD_EVENT_ON;
989 r = source_io_register(s, s->enabled, events);
1001 static void initialize_perturb(sd_event *e) {
1002 sd_id128_t bootid = {};
1004 /* When we sleep for longer, we try to realign the wakeup to
1005 the same time wihtin each minute/second/250ms, so that
1006 events all across the system can be coalesced into a single
1007 CPU wakeup. However, let's take some system-specific
1008 randomness for this value, so that in a network of systems
1009 with synced clocks timer events are distributed a
1010 bit. Here, we calculate a perturbation usec offset from the
1013 if (_likely_(e->perturb != USEC_INFINITY))
1016 if (sd_id128_get_boot(&bootid) >= 0)
1017 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1020 static int event_setup_timer_fd(
1022 struct clock_data *d,
1025 struct epoll_event ev = {};
1031 if (_likely_(d->fd >= 0))
1034 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1038 ev.events = EPOLLIN;
1041 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1051 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1054 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1057 _public_ int sd_event_add_time(
1059 sd_event_source **ret,
1063 sd_event_time_handler_t callback,
1066 EventSourceType type;
1068 struct clock_data *d;
1071 assert_return(e, -EINVAL);
1072 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1073 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1074 assert_return(!event_pid_changed(e), -ECHILD);
1076 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1079 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1084 callback = time_exit_callback;
1086 d = event_get_clock_data(e, type);
1089 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1093 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1098 r = event_setup_timer_fd(e, d, clock);
1103 s = source_new(e, !ret, type);
1107 s->time.next = usec;
1108 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1109 s->time.callback = callback;
1110 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1111 s->userdata = userdata;
1112 s->enabled = SD_EVENT_ONESHOT;
1114 d->needs_rearm = true;
1116 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1120 r = prioq_put(d->latest, s, &s->time.latest_index);
1134 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1137 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1140 _public_ int sd_event_add_signal(
1142 sd_event_source **ret,
1144 sd_event_signal_handler_t callback,
1148 struct signal_data *d;
1152 assert_return(e, -EINVAL);
1153 assert_return(SIGNAL_VALID(sig), -EINVAL);
1154 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1155 assert_return(!event_pid_changed(e), -ECHILD);
1158 callback = signal_exit_callback;
1160 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1164 if (!sigismember(&ss, sig))
1167 if (!e->signal_sources) {
1168 e->signal_sources = new0(sd_event_source*, _NSIG);
1169 if (!e->signal_sources)
1171 } else if (e->signal_sources[sig])
1174 s = source_new(e, !ret, SOURCE_SIGNAL);
1178 s->signal.sig = sig;
1179 s->signal.callback = callback;
1180 s->userdata = userdata;
1181 s->enabled = SD_EVENT_ON;
1183 e->signal_sources[sig] = s;
1185 r = event_make_signal_data(e, sig, &d);
1191 /* Use the signal name as description for the event source by default */
1192 (void) sd_event_source_set_description(s, signal_to_string(sig));
1200 #if 0 /// UNNEEDED by elogind
1201 _public_ int sd_event_add_child(
1203 sd_event_source **ret,
1206 sd_event_child_handler_t callback,
1212 assert_return(e, -EINVAL);
1213 assert_return(pid > 1, -EINVAL);
1214 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1215 assert_return(options != 0, -EINVAL);
1216 assert_return(callback, -EINVAL);
1217 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1218 assert_return(!event_pid_changed(e), -ECHILD);
1220 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1224 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1227 s = source_new(e, !ret, SOURCE_CHILD);
1232 s->child.options = options;
1233 s->child.callback = callback;
1234 s->userdata = userdata;
1235 s->enabled = SD_EVENT_ONESHOT;
1237 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1243 e->n_enabled_child_sources++;
1245 r = event_make_signal_data(e, SIGCHLD, NULL);
1247 e->n_enabled_child_sources--;
1252 e->need_process_child = true;
1260 _public_ int sd_event_add_defer(
1262 sd_event_source **ret,
1263 sd_event_handler_t callback,
1269 assert_return(e, -EINVAL);
1270 assert_return(callback, -EINVAL);
1271 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1272 assert_return(!event_pid_changed(e), -ECHILD);
1274 s = source_new(e, !ret, SOURCE_DEFER);
1278 s->defer.callback = callback;
1279 s->userdata = userdata;
1280 s->enabled = SD_EVENT_ONESHOT;
1282 r = source_set_pending(s, true);
1295 _public_ int sd_event_add_post(
1297 sd_event_source **ret,
1298 sd_event_handler_t callback,
1304 assert_return(e, -EINVAL);
1305 assert_return(callback, -EINVAL);
1306 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1307 assert_return(!event_pid_changed(e), -ECHILD);
1309 r = set_ensure_allocated(&e->post_sources, NULL);
1313 s = source_new(e, !ret, SOURCE_POST);
1317 s->post.callback = callback;
1318 s->userdata = userdata;
1319 s->enabled = SD_EVENT_ON;
1321 r = set_put(e->post_sources, s);
1333 _public_ int sd_event_add_exit(
1335 sd_event_source **ret,
1336 sd_event_handler_t callback,
1342 assert_return(e, -EINVAL);
1343 assert_return(callback, -EINVAL);
1344 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1345 assert_return(!event_pid_changed(e), -ECHILD);
1347 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1351 s = source_new(e, !ret, SOURCE_EXIT);
1355 s->exit.callback = callback;
1356 s->userdata = userdata;
1357 s->exit.prioq_index = PRIOQ_IDX_NULL;
1358 s->enabled = SD_EVENT_ONESHOT;
1360 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1372 #if 0 /// UNNEEDED by elogind
1373 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1378 assert(s->n_ref >= 1);
1385 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1390 assert(s->n_ref >= 1);
1393 if (s->n_ref <= 0) {
1394 /* Here's a special hack: when we are called from a
1395 * dispatch handler we won't free the event source
1396 * immediately, but we will detach the fd from the
1397 * epoll. This way it is safe for the caller to unref
1398 * the event source and immediately close the fd, but
1399 * we still retain a valid event source object after
1402 if (s->dispatching) {
1403 if (s->type == SOURCE_IO)
1404 source_io_unregister(s);
1406 source_disconnect(s);
1414 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1415 assert_return(s, -EINVAL);
1416 assert_return(!event_pid_changed(s->event), -ECHILD);
1418 return free_and_strdup(&s->description, description);
1421 #if 0 /// UNNEEDED by elogind
1422 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1423 assert_return(s, -EINVAL);
1424 assert_return(description, -EINVAL);
1425 assert_return(s->description, -ENXIO);
1426 assert_return(!event_pid_changed(s->event), -ECHILD);
1428 *description = s->description;
1433 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1434 assert_return(s, NULL);
1439 #if 0 /// UNNEEDED by elogind
1440 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1441 assert_return(s, -EINVAL);
1442 assert_return(s->type != SOURCE_EXIT, -EDOM);
1443 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1444 assert_return(!event_pid_changed(s->event), -ECHILD);
1449 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1450 assert_return(s, -EINVAL);
1451 assert_return(s->type == SOURCE_IO, -EDOM);
1452 assert_return(!event_pid_changed(s->event), -ECHILD);
1458 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1461 assert_return(s, -EINVAL);
1462 assert_return(fd >= 0, -EBADF);
1463 assert_return(s->type == SOURCE_IO, -EDOM);
1464 assert_return(!event_pid_changed(s->event), -ECHILD);
1469 if (s->enabled == SD_EVENT_OFF) {
1471 s->io.registered = false;
1475 saved_fd = s->io.fd;
1476 assert(s->io.registered);
1479 s->io.registered = false;
1481 r = source_io_register(s, s->enabled, s->io.events);
1483 s->io.fd = saved_fd;
1484 s->io.registered = true;
1488 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1494 #if 0 /// UNNEEDED by elogind
1495 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1496 assert_return(s, -EINVAL);
1497 assert_return(events, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1499 assert_return(!event_pid_changed(s->event), -ECHILD);
1501 *events = s->io.events;
1506 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1509 assert_return(s, -EINVAL);
1510 assert_return(s->type == SOURCE_IO, -EDOM);
1511 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1512 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1513 assert_return(!event_pid_changed(s->event), -ECHILD);
1515 /* edge-triggered updates are never skipped, so we can reset edges */
1516 if (s->io.events == events && !(events & EPOLLET))
1519 if (s->enabled != SD_EVENT_OFF) {
1520 r = source_io_register(s, s->enabled, events);
1525 s->io.events = events;
1526 source_set_pending(s, false);
1531 #if 0 /// UNNEEDED by elogind
1532 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1533 assert_return(s, -EINVAL);
1534 assert_return(revents, -EINVAL);
1535 assert_return(s->type == SOURCE_IO, -EDOM);
1536 assert_return(s->pending, -ENODATA);
1537 assert_return(!event_pid_changed(s->event), -ECHILD);
1539 *revents = s->io.revents;
1543 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1544 assert_return(s, -EINVAL);
1545 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1546 assert_return(!event_pid_changed(s->event), -ECHILD);
1548 return s->signal.sig;
1551 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1552 assert_return(s, -EINVAL);
1553 assert_return(!event_pid_changed(s->event), -ECHILD);
1559 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1562 assert_return(s, -EINVAL);
1563 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1564 assert_return(!event_pid_changed(s->event), -ECHILD);
1566 if (s->priority == priority)
1569 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1570 struct signal_data *old, *d;
1572 /* Move us from the signalfd belonging to the old
1573 * priority to the signalfd of the new priority */
1575 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1577 s->priority = priority;
1579 r = event_make_signal_data(s->event, s->signal.sig, &d);
1581 s->priority = old->priority;
1585 event_unmask_signal_data(s->event, old, s->signal.sig);
1587 s->priority = priority;
1590 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1593 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1595 if (s->type == SOURCE_EXIT)
1596 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1601 #if 0 /// UNNEEDED by elogind
1602 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1603 assert_return(s, -EINVAL);
1604 assert_return(m, -EINVAL);
1605 assert_return(!event_pid_changed(s->event), -ECHILD);
1612 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1615 assert_return(s, -EINVAL);
1616 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1617 assert_return(!event_pid_changed(s->event), -ECHILD);
1619 /* If we are dead anyway, we are fine with turning off
1620 * sources, but everything else needs to fail. */
1621 if (s->event->state == SD_EVENT_FINISHED)
1622 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1624 if (s->enabled == m)
1627 if (m == SD_EVENT_OFF) {
1632 source_io_unregister(s);
1636 case SOURCE_TIME_REALTIME:
1637 case SOURCE_TIME_BOOTTIME:
1638 case SOURCE_TIME_MONOTONIC:
1639 case SOURCE_TIME_REALTIME_ALARM:
1640 case SOURCE_TIME_BOOTTIME_ALARM: {
1641 struct clock_data *d;
1644 d = event_get_clock_data(s->event, s->type);
1647 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1648 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1649 d->needs_rearm = true;
1656 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1662 assert(s->event->n_enabled_child_sources > 0);
1663 s->event->n_enabled_child_sources--;
1665 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1670 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1679 assert_not_reached("Wut? I shouldn't exist.");
1686 r = source_io_register(s, m, s->io.events);
1693 case SOURCE_TIME_REALTIME:
1694 case SOURCE_TIME_BOOTTIME:
1695 case SOURCE_TIME_MONOTONIC:
1696 case SOURCE_TIME_REALTIME_ALARM:
1697 case SOURCE_TIME_BOOTTIME_ALARM: {
1698 struct clock_data *d;
1701 d = event_get_clock_data(s->event, s->type);
1704 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1705 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1706 d->needs_rearm = true;
1714 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1716 s->enabled = SD_EVENT_OFF;
1717 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1725 if (s->enabled == SD_EVENT_OFF)
1726 s->event->n_enabled_child_sources++;
1730 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1732 s->enabled = SD_EVENT_OFF;
1733 s->event->n_enabled_child_sources--;
1734 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1742 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1751 assert_not_reached("Wut? I shouldn't exist.");
1756 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1759 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1764 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1765 assert_return(s, -EINVAL);
1766 assert_return(usec, -EINVAL);
1767 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1768 assert_return(!event_pid_changed(s->event), -ECHILD);
1770 *usec = s->time.next;
1774 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1775 struct clock_data *d;
1777 assert_return(s, -EINVAL);
1778 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1779 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1780 assert_return(!event_pid_changed(s->event), -ECHILD);
1782 s->time.next = usec;
1784 source_set_pending(s, false);
1786 d = event_get_clock_data(s->event, s->type);
1789 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1790 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1791 d->needs_rearm = true;
1796 #if 0 /// UNNEEDED by elogind
1797 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1798 assert_return(s, -EINVAL);
1799 assert_return(usec, -EINVAL);
1800 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1801 assert_return(!event_pid_changed(s->event), -ECHILD);
1803 *usec = s->time.accuracy;
1807 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1808 struct clock_data *d;
1810 assert_return(s, -EINVAL);
1811 assert_return(usec != (uint64_t) -1, -EINVAL);
1812 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1813 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1814 assert_return(!event_pid_changed(s->event), -ECHILD);
1817 usec = DEFAULT_ACCURACY_USEC;
1819 s->time.accuracy = usec;
1821 source_set_pending(s, false);
1823 d = event_get_clock_data(s->event, s->type);
1826 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1827 d->needs_rearm = true;
1832 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1833 assert_return(s, -EINVAL);
1834 assert_return(clock, -EINVAL);
1835 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1836 assert_return(!event_pid_changed(s->event), -ECHILD);
1838 *clock = event_source_type_to_clock(s->type);
1842 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1843 assert_return(s, -EINVAL);
1844 assert_return(pid, -EINVAL);
1845 assert_return(s->type == SOURCE_CHILD, -EDOM);
1846 assert_return(!event_pid_changed(s->event), -ECHILD);
1848 *pid = s->child.pid;
1853 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1856 assert_return(s, -EINVAL);
1857 assert_return(s->type != SOURCE_EXIT, -EDOM);
1858 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1859 assert_return(!event_pid_changed(s->event), -ECHILD);
1861 if (s->prepare == callback)
1864 if (callback && s->prepare) {
1865 s->prepare = callback;
1869 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1873 s->prepare = callback;
1876 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1880 prioq_remove(s->event->prepare, s, &s->prepare_index);
1885 #if 0 /// UNNEEDED by elogind
1886 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1887 assert_return(s, NULL);
1892 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1895 assert_return(s, NULL);
1898 s->userdata = userdata;
1904 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1911 if (a >= USEC_INFINITY)
1912 return USEC_INFINITY;
1917 initialize_perturb(e);
1920 Find a good time to wake up again between times a and b. We
1921 have two goals here:
1923 a) We want to wake up as seldom as possible, hence prefer
1924 later times over earlier times.
1926 b) But if we have to wake up, then let's make sure to
1927 dispatch as much as possible on the entire system.
1929 We implement this by waking up everywhere at the same time
1930 within any given minute if we can, synchronised via the
1931 perturbation value determined from the boot ID. If we can't,
1932 then we try to find the same spot in every 10s, then 1s and
1933 then 250ms step. Otherwise, we pick the last possible time
1937 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1939 if (_unlikely_(c < USEC_PER_MINUTE))
1942 c -= USEC_PER_MINUTE;
1948 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1950 if (_unlikely_(c < USEC_PER_SEC*10))
1953 c -= USEC_PER_SEC*10;
1959 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1961 if (_unlikely_(c < USEC_PER_SEC))
1970 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1972 if (_unlikely_(c < USEC_PER_MSEC*250))
1975 c -= USEC_PER_MSEC*250;
1984 static int event_arm_timer(
1986 struct clock_data *d) {
1988 struct itimerspec its = {};
1989 sd_event_source *a, *b;
1996 if (!d->needs_rearm)
1999 d->needs_rearm = false;
2001 a = prioq_peek(d->earliest);
2002 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2007 if (d->next == USEC_INFINITY)
2011 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2015 d->next = USEC_INFINITY;
2019 b = prioq_peek(d->latest);
2020 assert_se(b && b->enabled != SD_EVENT_OFF);
2022 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2026 assert_se(d->fd >= 0);
2029 /* We don' want to disarm here, just mean some time looooong ago. */
2030 its.it_value.tv_sec = 0;
2031 its.it_value.tv_nsec = 1;
2033 timespec_store(&its.it_value, t);
2035 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2043 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2046 assert(s->type == SOURCE_IO);
2048 /* If the event source was already pending, we just OR in the
2049 * new revents, otherwise we reset the value. The ORing is
2050 * necessary to handle EPOLLONESHOT events properly where
2051 * readability might happen independently of writability, and
2052 * we need to keep track of both */
2055 s->io.revents |= revents;
2057 s->io.revents = revents;
2059 return source_set_pending(s, true);
2062 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2069 assert_return(events == EPOLLIN, -EIO);
2071 ss = read(fd, &x, sizeof(x));
2073 if (errno == EAGAIN || errno == EINTR)
2079 if (_unlikely_(ss != sizeof(x)))
2083 *next = USEC_INFINITY;
2088 static int process_timer(
2091 struct clock_data *d) {
2100 s = prioq_peek(d->earliest);
2103 s->enabled == SD_EVENT_OFF ||
2107 r = source_set_pending(s, true);
2111 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2112 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2113 d->needs_rearm = true;
2119 static int process_child(sd_event *e) {
2126 e->need_process_child = false;
2129 So, this is ugly. We iteratively invoke waitid() with P_PID
2130 + WNOHANG for each PID we wait for, instead of using
2131 P_ALL. This is because we only want to get child
2132 information of very specific child processes, and not all
2133 of them. We might not have processed the SIGCHLD even of a
2134 previous invocation and we don't want to maintain a
2135 unbounded *per-child* event queue, hence we really don't
2136 want anything flushed out of the kernel's queue that we
2137 don't care about. Since this is O(n) this means that if you
2138 have a lot of processes you probably want to handle SIGCHLD
2141 We do not reap the children here (by using WNOWAIT), this
2142 is only done after the event source is dispatched so that
2143 the callback still sees the process as a zombie.
2146 HASHMAP_FOREACH(s, e->child_sources, i) {
2147 assert(s->type == SOURCE_CHILD);
2152 if (s->enabled == SD_EVENT_OFF)
2155 zero(s->child.siginfo);
2156 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2157 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2161 if (s->child.siginfo.si_pid != 0) {
2163 s->child.siginfo.si_code == CLD_EXITED ||
2164 s->child.siginfo.si_code == CLD_KILLED ||
2165 s->child.siginfo.si_code == CLD_DUMPED;
2167 if (!zombie && (s->child.options & WEXITED)) {
2168 /* If the child isn't dead then let's
2169 * immediately remove the state change
2170 * from the queue, since there's no
2171 * benefit in leaving it queued */
2173 assert(s->child.options & (WSTOPPED|WCONTINUED));
2174 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2177 r = source_set_pending(s, true);
2186 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2187 bool read_one = false;
2191 assert_return(events == EPOLLIN, -EIO);
2193 /* If there's a signal queued on this priority and SIGCHLD is
2194 on this priority too, then make sure to recheck the
2195 children we watch. This is because we only ever dequeue
2196 the first signal per priority, and if we dequeue one, and
2197 SIGCHLD might be enqueued later we wouldn't know, but we
2198 might have higher priority children we care about hence we
2199 need to check that explicitly. */
2201 if (sigismember(&d->sigset, SIGCHLD))
2202 e->need_process_child = true;
2204 /* If there's already an event source pending for this
2205 * priority we don't read another */
2210 struct signalfd_siginfo si;
2212 sd_event_source *s = NULL;
2214 n = read(d->fd, &si, sizeof(si));
2216 if (errno == EAGAIN || errno == EINTR)
2222 if (_unlikely_(n != sizeof(si)))
2225 assert(SIGNAL_VALID(si.ssi_signo));
2229 if (e->signal_sources)
2230 s = e->signal_sources[si.ssi_signo];
2236 s->signal.siginfo = si;
2239 r = source_set_pending(s, true);
2247 static int source_dispatch(sd_event_source *s) {
2251 assert(s->pending || s->type == SOURCE_EXIT);
2253 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2254 r = source_set_pending(s, false);
2259 if (s->type != SOURCE_POST) {
2263 /* If we execute a non-post source, let's mark all
2264 * post sources as pending */
2266 SET_FOREACH(z, s->event->post_sources, i) {
2267 if (z->enabled == SD_EVENT_OFF)
2270 r = source_set_pending(z, true);
2276 if (s->enabled == SD_EVENT_ONESHOT) {
2277 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2282 s->dispatching = true;
2287 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2290 case SOURCE_TIME_REALTIME:
2291 case SOURCE_TIME_BOOTTIME:
2292 case SOURCE_TIME_MONOTONIC:
2293 case SOURCE_TIME_REALTIME_ALARM:
2294 case SOURCE_TIME_BOOTTIME_ALARM:
2295 r = s->time.callback(s, s->time.next, s->userdata);
2299 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2302 case SOURCE_CHILD: {
2305 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2306 s->child.siginfo.si_code == CLD_KILLED ||
2307 s->child.siginfo.si_code == CLD_DUMPED;
2309 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2311 /* Now, reap the PID for good. */
2313 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2319 r = s->defer.callback(s, s->userdata);
2323 r = s->post.callback(s, s->userdata);
2327 r = s->exit.callback(s, s->userdata);
2330 case SOURCE_WATCHDOG:
2331 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2332 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2333 assert_not_reached("Wut? I shouldn't exist.");
2336 s->dispatching = false;
2339 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2340 strna(s->description), event_source_type_to_string(s->type));
2345 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2350 static int event_prepare(sd_event *e) {
2358 s = prioq_peek(e->prepare);
2359 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2362 s->prepare_iteration = e->iteration;
2363 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2369 s->dispatching = true;
2370 r = s->prepare(s, s->userdata);
2371 s->dispatching = false;
2374 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2375 strna(s->description), event_source_type_to_string(s->type));
2380 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2386 static int dispatch_exit(sd_event *e) {
2392 p = prioq_peek(e->exit);
2393 if (!p || p->enabled == SD_EVENT_OFF) {
2394 e->state = SD_EVENT_FINISHED;
2400 e->state = SD_EVENT_EXITING;
2402 r = source_dispatch(p);
2404 e->state = SD_EVENT_INITIAL;
2410 static sd_event_source* event_next_pending(sd_event *e) {
2415 p = prioq_peek(e->pending);
2419 if (p->enabled == SD_EVENT_OFF)
2425 static int arm_watchdog(sd_event *e) {
2426 struct itimerspec its = {};
2431 assert(e->watchdog_fd >= 0);
2433 t = sleep_between(e,
2434 e->watchdog_last + (e->watchdog_period / 2),
2435 e->watchdog_last + (e->watchdog_period * 3 / 4));
2437 timespec_store(&its.it_value, t);
2439 /* Make sure we never set the watchdog to 0, which tells the
2440 * kernel to disable it. */
2441 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2442 its.it_value.tv_nsec = 1;
2444 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2451 static int process_watchdog(sd_event *e) {
2457 /* Don't notify watchdog too often */
2458 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2461 sd_notify(false, "WATCHDOG=1");
2462 e->watchdog_last = e->timestamp.monotonic;
2464 return arm_watchdog(e);
2467 _public_ int sd_event_prepare(sd_event *e) {
2470 assert_return(e, -EINVAL);
2471 assert_return(!event_pid_changed(e), -ECHILD);
2472 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2473 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2475 if (e->exit_requested)
2480 e->state = SD_EVENT_PREPARING;
2481 r = event_prepare(e);
2482 e->state = SD_EVENT_INITIAL;
2486 r = event_arm_timer(e, &e->realtime);
2490 r = event_arm_timer(e, &e->boottime);
2494 r = event_arm_timer(e, &e->monotonic);
2498 r = event_arm_timer(e, &e->realtime_alarm);
2502 r = event_arm_timer(e, &e->boottime_alarm);
2506 if (event_next_pending(e) || e->need_process_child)
2509 e->state = SD_EVENT_ARMED;
2514 e->state = SD_EVENT_ARMED;
2515 r = sd_event_wait(e, 0);
2517 e->state = SD_EVENT_ARMED;
2522 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2523 struct epoll_event *ev_queue;
2524 unsigned ev_queue_max;
2527 assert_return(e, -EINVAL);
2528 assert_return(!event_pid_changed(e), -ECHILD);
2529 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2530 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2532 if (e->exit_requested) {
2533 e->state = SD_EVENT_PENDING;
2537 ev_queue_max = MAX(e->n_sources, 1u);
2538 ev_queue = newa(struct epoll_event, ev_queue_max);
2540 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2541 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2543 if (errno == EINTR) {
2544 e->state = SD_EVENT_PENDING;
2552 triple_timestamp_get(&e->timestamp);
2554 for (i = 0; i < m; i++) {
2556 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2557 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2559 WakeupType *t = ev_queue[i].data.ptr;
2563 case WAKEUP_EVENT_SOURCE:
2564 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2567 case WAKEUP_CLOCK_DATA: {
2568 struct clock_data *d = ev_queue[i].data.ptr;
2569 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2573 case WAKEUP_SIGNAL_DATA:
2574 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2578 assert_not_reached("Invalid wake-up pointer");
2585 r = process_watchdog(e);
2589 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2593 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2597 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2601 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2605 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2609 if (e->need_process_child) {
2610 r = process_child(e);
2615 if (event_next_pending(e)) {
2616 e->state = SD_EVENT_PENDING;
2624 e->state = SD_EVENT_INITIAL;
2629 _public_ int sd_event_dispatch(sd_event *e) {
2633 assert_return(e, -EINVAL);
2634 assert_return(!event_pid_changed(e), -ECHILD);
2635 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2636 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2638 if (e->exit_requested)
2639 return dispatch_exit(e);
2641 p = event_next_pending(e);
2645 e->state = SD_EVENT_RUNNING;
2646 r = source_dispatch(p);
2647 e->state = SD_EVENT_INITIAL;
2654 e->state = SD_EVENT_INITIAL;
2659 static void event_log_delays(sd_event *e) {
2660 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2664 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2665 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2668 log_debug("Event loop iterations: %.*s", o, b);
2671 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2674 assert_return(e, -EINVAL);
2675 assert_return(!event_pid_changed(e), -ECHILD);
2676 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2677 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2679 if (e->profile_delays && e->last_run) {
2683 this_run = now(CLOCK_MONOTONIC);
2685 l = u64log2(this_run - e->last_run);
2686 assert(l < sizeof(e->delays));
2689 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2690 event_log_delays(e);
2691 e->last_log = this_run;
2695 r = sd_event_prepare(e);
2697 /* There was nothing? Then wait... */
2698 r = sd_event_wait(e, timeout);
2700 if (e->profile_delays)
2701 e->last_run = now(CLOCK_MONOTONIC);
2704 /* There's something now, then let's dispatch it */
2705 r = sd_event_dispatch(e);
2715 #if 0 /// UNNEEDED by elogind
2716 _public_ int sd_event_loop(sd_event *e) {
2719 assert_return(e, -EINVAL);
2720 assert_return(!event_pid_changed(e), -ECHILD);
2721 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2725 while (e->state != SD_EVENT_FINISHED) {
2726 r = sd_event_run(e, (uint64_t) -1);
2738 _public_ int sd_event_get_fd(sd_event *e) {
2740 assert_return(e, -EINVAL);
2741 assert_return(!event_pid_changed(e), -ECHILD);
2747 _public_ int sd_event_get_state(sd_event *e) {
2748 assert_return(e, -EINVAL);
2749 assert_return(!event_pid_changed(e), -ECHILD);
2754 #if 0 /// UNNEEDED by elogind
2755 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2756 assert_return(e, -EINVAL);
2757 assert_return(code, -EINVAL);
2758 assert_return(!event_pid_changed(e), -ECHILD);
2760 if (!e->exit_requested)
2763 *code = e->exit_code;
2768 _public_ int sd_event_exit(sd_event *e, int code) {
2769 assert_return(e, -EINVAL);
2770 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2771 assert_return(!event_pid_changed(e), -ECHILD);
2773 e->exit_requested = true;
2774 e->exit_code = code;
2779 #if 0 /// UNNEEDED by elogind
2780 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2781 assert_return(e, -EINVAL);
2782 assert_return(usec, -EINVAL);
2783 assert_return(!event_pid_changed(e), -ECHILD);
2785 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2788 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2789 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2790 * the purpose of getting the time this doesn't matter. */
2791 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2794 if (!triple_timestamp_is_set(&e->timestamp)) {
2795 /* Implicitly fall back to now() if we never ran
2796 * before and thus have no cached time. */
2801 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2806 _public_ int sd_event_default(sd_event **ret) {
2808 static thread_local sd_event *default_event = NULL;
2813 return !!default_event;
2815 if (default_event) {
2816 *ret = sd_event_ref(default_event);
2820 r = sd_event_new(&e);
2824 e->default_event_ptr = &default_event;
2832 #if 0 /// UNNEEDED by elogind
2833 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2834 assert_return(e, -EINVAL);
2835 assert_return(tid, -EINVAL);
2836 assert_return(!event_pid_changed(e), -ECHILD);
2847 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2850 assert_return(e, -EINVAL);
2851 assert_return(!event_pid_changed(e), -ECHILD);
2853 if (e->watchdog == !!b)
2857 struct epoll_event ev = {};
2859 r = sd_watchdog_enabled(false, &e->watchdog_period);
2863 /* Issue first ping immediately */
2864 sd_notify(false, "WATCHDOG=1");
2865 e->watchdog_last = now(CLOCK_MONOTONIC);
2867 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2868 if (e->watchdog_fd < 0)
2871 r = arm_watchdog(e);
2875 ev.events = EPOLLIN;
2876 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2878 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2885 if (e->watchdog_fd >= 0) {
2886 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2887 e->watchdog_fd = safe_close(e->watchdog_fd);
2895 e->watchdog_fd = safe_close(e->watchdog_fd);
2899 #if 0 /// UNNEEDED by elogind
2900 _public_ int sd_event_get_watchdog(sd_event *e) {
2901 assert_return(e, -EINVAL);
2902 assert_return(!event_pid_changed(e), -ECHILD);
2908 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2909 assert_return(e, -EINVAL);
2910 assert_return(!event_pid_changed(e), -ECHILD);
2912 *ret = e->iteration;