1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
23 #include "process-util.h"
25 #include "signal-util.h"
26 #include "string-table.h"
27 #include "string-util.h"
28 #include "time-util.h"
31 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
33 typedef enum EventSourceType {
37 SOURCE_TIME_MONOTONIC,
38 SOURCE_TIME_REALTIME_ALARM,
39 SOURCE_TIME_BOOTTIME_ALARM,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType {
75 _WAKEUP_TYPE_INVALID = -1,
78 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
80 struct sd_event_source {
87 sd_event_handler_t prepare;
91 EventSourceType type:5;
98 unsigned pending_index;
99 unsigned prepare_index;
100 uint64_t pending_iteration;
101 uint64_t prepare_iteration;
103 LIST_FIELDS(sd_event_source, sources);
107 sd_event_io_handler_t callback;
115 sd_event_time_handler_t callback;
116 usec_t next, accuracy;
117 unsigned earliest_index;
118 unsigned latest_index;
121 sd_event_signal_handler_t callback;
122 struct signalfd_siginfo siginfo;
126 sd_event_child_handler_t callback;
132 sd_event_handler_t callback;
135 sd_event_handler_t callback;
138 sd_event_handler_t callback;
139 unsigned prioq_index;
148 /* For all clocks we maintain two priority queues each, one
149 * ordered for the earliest times the events may be
150 * dispatched, and one ordered by the latest times they must
151 * have been dispatched. The range between the top entries in
152 * the two prioqs is the time window we can freely schedule
165 /* For each priority we maintain one signal fd, so that we
166 * only have to dequeue a single event per priority at a
172 sd_event_source *current;
184 /* timerfd_create() only supports these five clocks so far. We
185 * can add support for more clocks when the kernel learns to
186 * deal with them, too. */
187 struct clock_data realtime;
188 struct clock_data boottime;
189 struct clock_data monotonic;
190 struct clock_data realtime_alarm;
191 struct clock_data boottime_alarm;
195 sd_event_source **signal_sources; /* indexed by signal number */
196 Hashmap *signal_data; /* indexed by priority */
198 Hashmap *child_sources;
199 unsigned n_enabled_child_sources;
208 triple_timestamp timestamp;
211 bool exit_requested:1;
212 bool need_process_child:1;
214 bool profile_delays:1;
219 sd_event **default_event_ptr;
221 usec_t watchdog_last, watchdog_period;
225 LIST_HEAD(sd_event_source, sources);
227 usec_t last_run, last_log;
228 unsigned delays[sizeof(usec_t) * 8];
231 static thread_local sd_event *default_event = NULL;
233 static void source_disconnect(sd_event_source *s);
235 static sd_event *event_resolve(sd_event *e) {
236 return e == SD_EVENT_DEFAULT ? default_event : e;
239 static int pending_prioq_compare(const void *a, const void *b) {
240 const sd_event_source *x = a, *y = b;
245 /* Enabled ones first */
246 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
248 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
251 /* Lower priority values first */
252 if (x->priority < y->priority)
254 if (x->priority > y->priority)
257 /* Older entries first */
258 if (x->pending_iteration < y->pending_iteration)
260 if (x->pending_iteration > y->pending_iteration)
266 static int prepare_prioq_compare(const void *a, const void *b) {
267 const sd_event_source *x = a, *y = b;
272 /* Enabled ones first */
273 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
275 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
278 /* Move most recently prepared ones last, so that we can stop
279 * preparing as soon as we hit one that has already been
280 * prepared in the current iteration */
281 if (x->prepare_iteration < y->prepare_iteration)
283 if (x->prepare_iteration > y->prepare_iteration)
286 /* Lower priority values first */
287 if (x->priority < y->priority)
289 if (x->priority > y->priority)
295 static int earliest_time_prioq_compare(const void *a, const void *b) {
296 const sd_event_source *x = a, *y = b;
298 assert(EVENT_SOURCE_IS_TIME(x->type));
299 assert(x->type == y->type);
301 /* Enabled ones first */
302 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307 /* Move the pending ones to the end */
308 if (!x->pending && y->pending)
310 if (x->pending && !y->pending)
314 if (x->time.next < y->time.next)
316 if (x->time.next > y->time.next)
322 static usec_t time_event_source_latest(const sd_event_source *s) {
323 return usec_add(s->time.next, s->time.accuracy);
326 static int latest_time_prioq_compare(const void *a, const void *b) {
327 const sd_event_source *x = a, *y = b;
329 assert(EVENT_SOURCE_IS_TIME(x->type));
330 assert(x->type == y->type);
332 /* Enabled ones first */
333 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
335 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338 /* Move the pending ones to the end */
339 if (!x->pending && y->pending)
341 if (x->pending && !y->pending)
345 if (time_event_source_latest(x) < time_event_source_latest(y))
347 if (time_event_source_latest(x) > time_event_source_latest(y))
353 static int exit_prioq_compare(const void *a, const void *b) {
354 const sd_event_source *x = a, *y = b;
356 assert(x->type == SOURCE_EXIT);
357 assert(y->type == SOURCE_EXIT);
359 /* Enabled ones first */
360 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
362 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
365 /* Lower priority values first */
366 if (x->priority < y->priority)
368 if (x->priority > y->priority)
374 static void free_clock_data(struct clock_data *d) {
376 assert(d->wakeup == WAKEUP_CLOCK_DATA);
379 prioq_free(d->earliest);
380 prioq_free(d->latest);
383 static void event_free(sd_event *e) {
388 while ((s = e->sources)) {
390 source_disconnect(s);
391 sd_event_source_unref(s);
394 assert(e->n_sources == 0);
396 if (e->default_event_ptr)
397 *(e->default_event_ptr) = NULL;
399 safe_close(e->epoll_fd);
400 safe_close(e->watchdog_fd);
402 free_clock_data(&e->realtime);
403 free_clock_data(&e->boottime);
404 free_clock_data(&e->monotonic);
405 free_clock_data(&e->realtime_alarm);
406 free_clock_data(&e->boottime_alarm);
408 prioq_free(e->pending);
409 prioq_free(e->prepare);
412 free(e->signal_sources);
413 hashmap_free(e->signal_data);
415 hashmap_free(e->child_sources);
416 set_free(e->post_sources);
420 _public_ int sd_event_new(sd_event** ret) {
424 assert_return(ret, -EINVAL);
426 e = new0(sd_event, 1);
431 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
432 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
433 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
434 e->original_pid = getpid_cached();
435 e->perturb = USEC_INFINITY;
437 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
441 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
442 if (e->epoll_fd < 0) {
447 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
449 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
450 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
451 e->profile_delays = true;
462 _public_ sd_event* sd_event_ref(sd_event *e) {
467 assert(e->n_ref >= 1);
473 _public_ sd_event* sd_event_unref(sd_event *e) {
478 assert(e->n_ref >= 1);
487 static bool event_pid_changed(sd_event *e) {
490 /* We don't support people creating an event loop and keeping
491 * it around over a fork(). Let's complain. */
493 return e->original_pid != getpid_cached();
496 static void source_io_unregister(sd_event_source *s) {
500 assert(s->type == SOURCE_IO);
502 if (event_pid_changed(s->event))
505 if (!s->io.registered)
508 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
510 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
511 strna(s->description), event_source_type_to_string(s->type));
513 s->io.registered = false;
516 static int source_io_register(
521 struct epoll_event ev = {};
525 assert(s->type == SOURCE_IO);
526 assert(enabled != SD_EVENT_OFF);
531 if (enabled == SD_EVENT_ONESHOT)
532 ev.events |= EPOLLONESHOT;
534 if (s->io.registered)
535 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
537 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
541 s->io.registered = true;
546 static clockid_t event_source_type_to_clock(EventSourceType t) {
550 case SOURCE_TIME_REALTIME:
551 return CLOCK_REALTIME;
553 case SOURCE_TIME_BOOTTIME:
554 return CLOCK_BOOTTIME;
556 case SOURCE_TIME_MONOTONIC:
557 return CLOCK_MONOTONIC;
559 case SOURCE_TIME_REALTIME_ALARM:
560 return CLOCK_REALTIME_ALARM;
562 case SOURCE_TIME_BOOTTIME_ALARM:
563 return CLOCK_BOOTTIME_ALARM;
566 return (clockid_t) -1;
570 static EventSourceType clock_to_event_source_type(clockid_t clock) {
575 return SOURCE_TIME_REALTIME;
578 return SOURCE_TIME_BOOTTIME;
580 case CLOCK_MONOTONIC:
581 return SOURCE_TIME_MONOTONIC;
583 case CLOCK_REALTIME_ALARM:
584 return SOURCE_TIME_REALTIME_ALARM;
586 case CLOCK_BOOTTIME_ALARM:
587 return SOURCE_TIME_BOOTTIME_ALARM;
590 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
594 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
599 case SOURCE_TIME_REALTIME:
602 case SOURCE_TIME_BOOTTIME:
605 case SOURCE_TIME_MONOTONIC:
606 return &e->monotonic;
608 case SOURCE_TIME_REALTIME_ALARM:
609 return &e->realtime_alarm;
611 case SOURCE_TIME_BOOTTIME_ALARM:
612 return &e->boottime_alarm;
619 static int event_make_signal_data(
622 struct signal_data **ret) {
624 struct epoll_event ev = {};
625 struct signal_data *d;
633 if (event_pid_changed(e))
636 if (e->signal_sources && e->signal_sources[sig])
637 priority = e->signal_sources[sig]->priority;
641 d = hashmap_get(e->signal_data, &priority);
643 if (sigismember(&d->sigset, sig) > 0) {
649 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
653 d = new0(struct signal_data, 1);
657 d->wakeup = WAKEUP_SIGNAL_DATA;
659 d->priority = priority;
661 r = hashmap_put(e->signal_data, &d->priority, d);
671 assert_se(sigaddset(&ss_copy, sig) >= 0);
673 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
687 d->fd = fd_move_above_stdio(r);
692 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
705 d->fd = safe_close(d->fd);
706 hashmap_remove(e->signal_data, &d->priority);
713 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
717 /* Turns off the specified signal in the signal data
718 * object. If the signal mask of the object becomes empty that
721 if (sigismember(&d->sigset, sig) == 0)
724 assert_se(sigdelset(&d->sigset, sig) >= 0);
726 if (sigisemptyset(&d->sigset)) {
728 /* If all the mask is all-zero we can get rid of the structure */
729 hashmap_remove(e->signal_data, &d->priority);
737 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
738 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
741 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
742 struct signal_data *d;
743 static const int64_t zero_priority = 0;
747 /* Rechecks if the specified signal is still something we are
748 * interested in. If not, we'll unmask it, and possibly drop
749 * the signalfd for it. */
751 if (sig == SIGCHLD &&
752 e->n_enabled_child_sources > 0)
755 if (e->signal_sources &&
756 e->signal_sources[sig] &&
757 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
761 * The specified signal might be enabled in three different queues:
763 * 1) the one that belongs to the priority passed (if it is non-NULL)
764 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
765 * 3) the 0 priority (to cover the SIGCHLD case)
767 * Hence, let's remove it from all three here.
771 d = hashmap_get(e->signal_data, priority);
773 event_unmask_signal_data(e, d, sig);
776 if (e->signal_sources && e->signal_sources[sig]) {
777 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
779 event_unmask_signal_data(e, d, sig);
782 d = hashmap_get(e->signal_data, &zero_priority);
784 event_unmask_signal_data(e, d, sig);
787 static void source_disconnect(sd_event_source *s) {
795 assert(s->event->n_sources > 0);
801 source_io_unregister(s);
805 case SOURCE_TIME_REALTIME:
806 case SOURCE_TIME_BOOTTIME:
807 case SOURCE_TIME_MONOTONIC:
808 case SOURCE_TIME_REALTIME_ALARM:
809 case SOURCE_TIME_BOOTTIME_ALARM: {
810 struct clock_data *d;
812 d = event_get_clock_data(s->event, s->type);
815 prioq_remove(d->earliest, s, &s->time.earliest_index);
816 prioq_remove(d->latest, s, &s->time.latest_index);
817 d->needs_rearm = true;
822 if (s->signal.sig > 0) {
824 if (s->event->signal_sources)
825 s->event->signal_sources[s->signal.sig] = NULL;
827 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
833 if (s->child.pid > 0) {
834 if (s->enabled != SD_EVENT_OFF) {
835 assert(s->event->n_enabled_child_sources > 0);
836 s->event->n_enabled_child_sources--;
839 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
840 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
850 set_remove(s->event->post_sources, s);
854 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
858 assert_not_reached("Wut? I shouldn't exist.");
862 prioq_remove(s->event->pending, s, &s->pending_index);
865 prioq_remove(s->event->prepare, s, &s->prepare_index);
869 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
871 LIST_REMOVE(sources, event->sources, s);
875 sd_event_unref(event);
878 static void source_free(sd_event_source *s) {
881 source_disconnect(s);
883 if (s->type == SOURCE_IO && s->io.owned)
884 safe_close(s->io.fd);
886 free(s->description);
890 static int source_set_pending(sd_event_source *s, bool b) {
894 assert(s->type != SOURCE_EXIT);
902 s->pending_iteration = s->event->iteration;
904 r = prioq_put(s->event->pending, s, &s->pending_index);
910 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912 if (EVENT_SOURCE_IS_TIME(s->type)) {
913 struct clock_data *d;
915 d = event_get_clock_data(s->event, s->type);
918 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
919 prioq_reshuffle(d->latest, s, &s->time.latest_index);
920 d->needs_rearm = true;
923 if (s->type == SOURCE_SIGNAL && !b) {
924 struct signal_data *d;
926 d = hashmap_get(s->event->signal_data, &s->priority);
927 if (d && d->current == s)
934 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
939 s = new0(sd_event_source, 1);
945 s->floating = floating;
947 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
952 LIST_PREPEND(sources, e->sources, s);
958 _public_ int sd_event_add_io(
960 sd_event_source **ret,
963 sd_event_io_handler_t callback,
969 assert_return(e, -EINVAL);
970 assert_return(e = event_resolve(e), -ENOPKG);
971 assert_return(fd >= 0, -EBADF);
972 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
973 assert_return(callback, -EINVAL);
974 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
975 assert_return(!event_pid_changed(e), -ECHILD);
977 s = source_new(e, !ret, SOURCE_IO);
981 s->wakeup = WAKEUP_EVENT_SOURCE;
983 s->io.events = events;
984 s->io.callback = callback;
985 s->userdata = userdata;
986 s->enabled = SD_EVENT_ON;
988 r = source_io_register(s, s->enabled, events);
1000 static void initialize_perturb(sd_event *e) {
1001 sd_id128_t bootid = {};
1003 /* When we sleep for longer, we try to realign the wakeup to
1004 the same time wihtin each minute/second/250ms, so that
1005 events all across the system can be coalesced into a single
1006 CPU wakeup. However, let's take some system-specific
1007 randomness for this value, so that in a network of systems
1008 with synced clocks timer events are distributed a
1009 bit. Here, we calculate a perturbation usec offset from the
1012 if (_likely_(e->perturb != USEC_INFINITY))
1015 if (sd_id128_get_boot(&bootid) >= 0)
1016 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1019 static int event_setup_timer_fd(
1021 struct clock_data *d,
1024 struct epoll_event ev = {};
1030 if (_likely_(d->fd >= 0))
1033 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1037 fd = fd_move_above_stdio(fd);
1039 ev.events = EPOLLIN;
1042 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1052 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1055 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1058 _public_ int sd_event_add_time(
1060 sd_event_source **ret,
1064 sd_event_time_handler_t callback,
1067 EventSourceType type;
1069 struct clock_data *d;
1072 assert_return(e, -EINVAL);
1073 assert_return(e = event_resolve(e), -ENOPKG);
1074 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1075 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1076 assert_return(!event_pid_changed(e), -ECHILD);
1078 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1081 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1086 callback = time_exit_callback;
1088 d = event_get_clock_data(e, type);
1091 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1095 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1100 r = event_setup_timer_fd(e, d, clock);
1105 s = source_new(e, !ret, type);
1109 s->time.next = usec;
1110 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1111 s->time.callback = callback;
1112 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1113 s->userdata = userdata;
1114 s->enabled = SD_EVENT_ONESHOT;
1116 d->needs_rearm = true;
1118 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1122 r = prioq_put(d->latest, s, &s->time.latest_index);
1136 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1139 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1142 _public_ int sd_event_add_signal(
1144 sd_event_source **ret,
1146 sd_event_signal_handler_t callback,
1150 struct signal_data *d;
1154 assert_return(e, -EINVAL);
1155 assert_return(e = event_resolve(e), -ENOPKG);
1156 assert_return(SIGNAL_VALID(sig), -EINVAL);
1157 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1158 assert_return(!event_pid_changed(e), -ECHILD);
1161 callback = signal_exit_callback;
1163 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1167 if (!sigismember(&ss, sig))
1170 if (!e->signal_sources) {
1171 e->signal_sources = new0(sd_event_source*, _NSIG);
1172 if (!e->signal_sources)
1174 } else if (e->signal_sources[sig])
1177 s = source_new(e, !ret, SOURCE_SIGNAL);
1181 s->signal.sig = sig;
1182 s->signal.callback = callback;
1183 s->userdata = userdata;
1184 s->enabled = SD_EVENT_ON;
1186 e->signal_sources[sig] = s;
1188 r = event_make_signal_data(e, sig, &d);
1194 /* Use the signal name as description for the event source by default */
1195 (void) sd_event_source_set_description(s, signal_to_string(sig));
1203 _public_ int sd_event_add_child(
1205 sd_event_source **ret,
1208 sd_event_child_handler_t callback,
1214 assert_return(e, -EINVAL);
1215 assert_return(e = event_resolve(e), -ENOPKG);
1216 assert_return(pid > 1, -EINVAL);
1217 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1218 assert_return(options != 0, -EINVAL);
1219 assert_return(callback, -EINVAL);
1220 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1221 assert_return(!event_pid_changed(e), -ECHILD);
1223 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1227 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1230 s = source_new(e, !ret, SOURCE_CHILD);
1235 s->child.options = options;
1236 s->child.callback = callback;
1237 s->userdata = userdata;
1238 s->enabled = SD_EVENT_ONESHOT;
1240 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1246 e->n_enabled_child_sources++;
1248 r = event_make_signal_data(e, SIGCHLD, NULL);
1250 e->n_enabled_child_sources--;
1255 e->need_process_child = true;
1263 _public_ int sd_event_add_defer(
1265 sd_event_source **ret,
1266 sd_event_handler_t callback,
1272 assert_return(e, -EINVAL);
1273 assert_return(e = event_resolve(e), -ENOPKG);
1274 assert_return(callback, -EINVAL);
1275 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1276 assert_return(!event_pid_changed(e), -ECHILD);
1278 s = source_new(e, !ret, SOURCE_DEFER);
1282 s->defer.callback = callback;
1283 s->userdata = userdata;
1284 s->enabled = SD_EVENT_ONESHOT;
1286 r = source_set_pending(s, true);
1298 _public_ int sd_event_add_post(
1300 sd_event_source **ret,
1301 sd_event_handler_t callback,
1307 assert_return(e, -EINVAL);
1308 assert_return(e = event_resolve(e), -ENOPKG);
1309 assert_return(callback, -EINVAL);
1310 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1311 assert_return(!event_pid_changed(e), -ECHILD);
1313 r = set_ensure_allocated(&e->post_sources, NULL);
1317 s = source_new(e, !ret, SOURCE_POST);
1321 s->post.callback = callback;
1322 s->userdata = userdata;
1323 s->enabled = SD_EVENT_ON;
1325 r = set_put(e->post_sources, s);
1337 _public_ int sd_event_add_exit(
1339 sd_event_source **ret,
1340 sd_event_handler_t callback,
1346 assert_return(e, -EINVAL);
1347 assert_return(e = event_resolve(e), -ENOPKG);
1348 assert_return(callback, -EINVAL);
1349 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1350 assert_return(!event_pid_changed(e), -ECHILD);
1352 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1356 s = source_new(e, !ret, SOURCE_EXIT);
1360 s->exit.callback = callback;
1361 s->userdata = userdata;
1362 s->exit.prioq_index = PRIOQ_IDX_NULL;
1363 s->enabled = SD_EVENT_ONESHOT;
1365 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1377 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1382 assert(s->n_ref >= 1);
1388 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1393 assert(s->n_ref >= 1);
1396 if (s->n_ref <= 0) {
1397 /* Here's a special hack: when we are called from a
1398 * dispatch handler we won't free the event source
1399 * immediately, but we will detach the fd from the
1400 * epoll. This way it is safe for the caller to unref
1401 * the event source and immediately close the fd, but
1402 * we still retain a valid event source object after
1405 if (s->dispatching) {
1406 if (s->type == SOURCE_IO)
1407 source_io_unregister(s);
1409 source_disconnect(s);
1417 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1418 assert_return(s, -EINVAL);
1419 assert_return(!event_pid_changed(s->event), -ECHILD);
1421 return free_and_strdup(&s->description, description);
1424 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1425 assert_return(s, -EINVAL);
1426 assert_return(description, -EINVAL);
1427 assert_return(s->description, -ENXIO);
1428 assert_return(!event_pid_changed(s->event), -ECHILD);
1430 *description = s->description;
1434 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1435 assert_return(s, NULL);
1440 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1441 assert_return(s, -EINVAL);
1442 assert_return(s->type != SOURCE_EXIT, -EDOM);
1443 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1444 assert_return(!event_pid_changed(s->event), -ECHILD);
1449 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1450 assert_return(s, -EINVAL);
1451 assert_return(s->type == SOURCE_IO, -EDOM);
1452 assert_return(!event_pid_changed(s->event), -ECHILD);
1457 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1460 assert_return(s, -EINVAL);
1461 assert_return(fd >= 0, -EBADF);
1462 assert_return(s->type == SOURCE_IO, -EDOM);
1463 assert_return(!event_pid_changed(s->event), -ECHILD);
1468 if (s->enabled == SD_EVENT_OFF) {
1470 s->io.registered = false;
1474 saved_fd = s->io.fd;
1475 assert(s->io.registered);
1478 s->io.registered = false;
1480 r = source_io_register(s, s->enabled, s->io.events);
1482 s->io.fd = saved_fd;
1483 s->io.registered = true;
1487 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1493 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1494 assert_return(s, -EINVAL);
1495 assert_return(s->type == SOURCE_IO, -EDOM);
1500 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1501 assert_return(s, -EINVAL);
1502 assert_return(s->type == SOURCE_IO, -EDOM);
1508 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1509 assert_return(s, -EINVAL);
1510 assert_return(events, -EINVAL);
1511 assert_return(s->type == SOURCE_IO, -EDOM);
1512 assert_return(!event_pid_changed(s->event), -ECHILD);
1514 *events = s->io.events;
1518 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1521 assert_return(s, -EINVAL);
1522 assert_return(s->type == SOURCE_IO, -EDOM);
1523 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1524 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1525 assert_return(!event_pid_changed(s->event), -ECHILD);
1527 /* edge-triggered updates are never skipped, so we can reset edges */
1528 if (s->io.events == events && !(events & EPOLLET))
1531 if (s->enabled != SD_EVENT_OFF) {
1532 r = source_io_register(s, s->enabled, events);
1537 s->io.events = events;
1538 source_set_pending(s, false);
1543 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1544 assert_return(s, -EINVAL);
1545 assert_return(revents, -EINVAL);
1546 assert_return(s->type == SOURCE_IO, -EDOM);
1547 assert_return(s->pending, -ENODATA);
1548 assert_return(!event_pid_changed(s->event), -ECHILD);
1550 *revents = s->io.revents;
1554 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1555 assert_return(s, -EINVAL);
1556 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1557 assert_return(!event_pid_changed(s->event), -ECHILD);
1559 return s->signal.sig;
1562 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1563 assert_return(s, -EINVAL);
1564 assert_return(!event_pid_changed(s->event), -ECHILD);
1566 *priority = s->priority;
1570 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1573 assert_return(s, -EINVAL);
1574 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1575 assert_return(!event_pid_changed(s->event), -ECHILD);
1577 if (s->priority == priority)
1580 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1581 struct signal_data *old, *d;
1583 /* Move us from the signalfd belonging to the old
1584 * priority to the signalfd of the new priority */
1586 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1588 s->priority = priority;
1590 r = event_make_signal_data(s->event, s->signal.sig, &d);
1592 s->priority = old->priority;
1596 event_unmask_signal_data(s->event, old, s->signal.sig);
1598 s->priority = priority;
1601 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1604 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1606 if (s->type == SOURCE_EXIT)
1607 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1612 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1613 assert_return(s, -EINVAL);
1614 assert_return(m, -EINVAL);
1615 assert_return(!event_pid_changed(s->event), -ECHILD);
1621 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1624 assert_return(s, -EINVAL);
1625 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1626 assert_return(!event_pid_changed(s->event), -ECHILD);
1628 /* If we are dead anyway, we are fine with turning off
1629 * sources, but everything else needs to fail. */
1630 if (s->event->state == SD_EVENT_FINISHED)
1631 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1633 if (s->enabled == m)
1636 if (m == SD_EVENT_OFF) {
1641 source_io_unregister(s);
1645 case SOURCE_TIME_REALTIME:
1646 case SOURCE_TIME_BOOTTIME:
1647 case SOURCE_TIME_MONOTONIC:
1648 case SOURCE_TIME_REALTIME_ALARM:
1649 case SOURCE_TIME_BOOTTIME_ALARM: {
1650 struct clock_data *d;
1653 d = event_get_clock_data(s->event, s->type);
1656 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1657 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1658 d->needs_rearm = true;
1665 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1671 assert(s->event->n_enabled_child_sources > 0);
1672 s->event->n_enabled_child_sources--;
1674 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1679 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1688 assert_not_reached("Wut? I shouldn't exist.");
1695 r = source_io_register(s, m, s->io.events);
1702 case SOURCE_TIME_REALTIME:
1703 case SOURCE_TIME_BOOTTIME:
1704 case SOURCE_TIME_MONOTONIC:
1705 case SOURCE_TIME_REALTIME_ALARM:
1706 case SOURCE_TIME_BOOTTIME_ALARM: {
1707 struct clock_data *d;
1710 d = event_get_clock_data(s->event, s->type);
1713 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1714 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1715 d->needs_rearm = true;
1723 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1725 s->enabled = SD_EVENT_OFF;
1726 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1734 if (s->enabled == SD_EVENT_OFF)
1735 s->event->n_enabled_child_sources++;
1739 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1741 s->enabled = SD_EVENT_OFF;
1742 s->event->n_enabled_child_sources--;
1743 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1751 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1760 assert_not_reached("Wut? I shouldn't exist.");
1765 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1768 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1773 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1774 assert_return(s, -EINVAL);
1775 assert_return(usec, -EINVAL);
1776 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1777 assert_return(!event_pid_changed(s->event), -ECHILD);
1779 *usec = s->time.next;
1783 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1784 struct clock_data *d;
1786 assert_return(s, -EINVAL);
1787 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1788 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1789 assert_return(!event_pid_changed(s->event), -ECHILD);
1791 s->time.next = usec;
1793 source_set_pending(s, false);
1795 d = event_get_clock_data(s->event, s->type);
1798 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1799 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1800 d->needs_rearm = true;
1805 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1806 assert_return(s, -EINVAL);
1807 assert_return(usec, -EINVAL);
1808 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1809 assert_return(!event_pid_changed(s->event), -ECHILD);
1811 *usec = s->time.accuracy;
1815 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1816 struct clock_data *d;
1818 assert_return(s, -EINVAL);
1819 assert_return(usec != (uint64_t) -1, -EINVAL);
1820 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1821 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1822 assert_return(!event_pid_changed(s->event), -ECHILD);
1825 usec = DEFAULT_ACCURACY_USEC;
1827 s->time.accuracy = usec;
1829 source_set_pending(s, false);
1831 d = event_get_clock_data(s->event, s->type);
1834 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1835 d->needs_rearm = true;
1840 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1841 assert_return(s, -EINVAL);
1842 assert_return(clock, -EINVAL);
1843 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1844 assert_return(!event_pid_changed(s->event), -ECHILD);
1846 *clock = event_source_type_to_clock(s->type);
1850 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1851 assert_return(s, -EINVAL);
1852 assert_return(pid, -EINVAL);
1853 assert_return(s->type == SOURCE_CHILD, -EDOM);
1854 assert_return(!event_pid_changed(s->event), -ECHILD);
1856 *pid = s->child.pid;
1860 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1863 assert_return(s, -EINVAL);
1864 assert_return(s->type != SOURCE_EXIT, -EDOM);
1865 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1866 assert_return(!event_pid_changed(s->event), -ECHILD);
1868 if (s->prepare == callback)
1871 if (callback && s->prepare) {
1872 s->prepare = callback;
1876 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1880 s->prepare = callback;
1883 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1887 prioq_remove(s->event->prepare, s, &s->prepare_index);
1892 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1893 assert_return(s, NULL);
1898 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1901 assert_return(s, NULL);
1904 s->userdata = userdata;
1909 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1916 if (a >= USEC_INFINITY)
1917 return USEC_INFINITY;
1922 initialize_perturb(e);
1925 Find a good time to wake up again between times a and b. We
1926 have two goals here:
1928 a) We want to wake up as seldom as possible, hence prefer
1929 later times over earlier times.
1931 b) But if we have to wake up, then let's make sure to
1932 dispatch as much as possible on the entire system.
1934 We implement this by waking up everywhere at the same time
1935 within any given minute if we can, synchronised via the
1936 perturbation value determined from the boot ID. If we can't,
1937 then we try to find the same spot in every 10s, then 1s and
1938 then 250ms step. Otherwise, we pick the last possible time
1942 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1944 if (_unlikely_(c < USEC_PER_MINUTE))
1947 c -= USEC_PER_MINUTE;
1953 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1955 if (_unlikely_(c < USEC_PER_SEC*10))
1958 c -= USEC_PER_SEC*10;
1964 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1966 if (_unlikely_(c < USEC_PER_SEC))
1975 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1977 if (_unlikely_(c < USEC_PER_MSEC*250))
1980 c -= USEC_PER_MSEC*250;
1989 static int event_arm_timer(
1991 struct clock_data *d) {
1993 struct itimerspec its = {};
1994 sd_event_source *a, *b;
2001 if (!d->needs_rearm)
2004 d->needs_rearm = false;
2006 a = prioq_peek(d->earliest);
2007 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2012 if (d->next == USEC_INFINITY)
2016 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2020 d->next = USEC_INFINITY;
2024 b = prioq_peek(d->latest);
2025 assert_se(b && b->enabled != SD_EVENT_OFF);
2027 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2031 assert_se(d->fd >= 0);
2034 /* We don' want to disarm here, just mean some time looooong ago. */
2035 its.it_value.tv_sec = 0;
2036 its.it_value.tv_nsec = 1;
2038 timespec_store(&its.it_value, t);
2040 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2048 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2051 assert(s->type == SOURCE_IO);
2053 /* If the event source was already pending, we just OR in the
2054 * new revents, otherwise we reset the value. The ORing is
2055 * necessary to handle EPOLLONESHOT events properly where
2056 * readability might happen independently of writability, and
2057 * we need to keep track of both */
2060 s->io.revents |= revents;
2062 s->io.revents = revents;
2064 return source_set_pending(s, true);
2067 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2074 assert_return(events == EPOLLIN, -EIO);
2076 ss = read(fd, &x, sizeof(x));
2078 if (IN_SET(errno, EAGAIN, EINTR))
2084 if (_unlikely_(ss != sizeof(x)))
2088 *next = USEC_INFINITY;
2093 static int process_timer(
2096 struct clock_data *d) {
2105 s = prioq_peek(d->earliest);
2108 s->enabled == SD_EVENT_OFF ||
2112 r = source_set_pending(s, true);
2116 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2117 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2118 d->needs_rearm = true;
2124 static int process_child(sd_event *e) {
2131 e->need_process_child = false;
2134 So, this is ugly. We iteratively invoke waitid() with P_PID
2135 + WNOHANG for each PID we wait for, instead of using
2136 P_ALL. This is because we only want to get child
2137 information of very specific child processes, and not all
2138 of them. We might not have processed the SIGCHLD even of a
2139 previous invocation and we don't want to maintain a
2140 unbounded *per-child* event queue, hence we really don't
2141 want anything flushed out of the kernel's queue that we
2142 don't care about. Since this is O(n) this means that if you
2143 have a lot of processes you probably want to handle SIGCHLD
2146 We do not reap the children here (by using WNOWAIT), this
2147 is only done after the event source is dispatched so that
2148 the callback still sees the process as a zombie.
2151 HASHMAP_FOREACH(s, e->child_sources, i) {
2152 assert(s->type == SOURCE_CHILD);
2157 if (s->enabled == SD_EVENT_OFF)
2160 zero(s->child.siginfo);
2161 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2162 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2166 if (s->child.siginfo.si_pid != 0) {
2167 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2169 if (!zombie && (s->child.options & WEXITED)) {
2170 /* If the child isn't dead then let's
2171 * immediately remove the state change
2172 * from the queue, since there's no
2173 * benefit in leaving it queued */
2175 assert(s->child.options & (WSTOPPED|WCONTINUED));
2176 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2179 r = source_set_pending(s, true);
2188 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2189 bool read_one = false;
2193 assert_return(events == EPOLLIN, -EIO);
2195 /* If there's a signal queued on this priority and SIGCHLD is
2196 on this priority too, then make sure to recheck the
2197 children we watch. This is because we only ever dequeue
2198 the first signal per priority, and if we dequeue one, and
2199 SIGCHLD might be enqueued later we wouldn't know, but we
2200 might have higher priority children we care about hence we
2201 need to check that explicitly. */
2203 if (sigismember(&d->sigset, SIGCHLD))
2204 e->need_process_child = true;
2206 /* If there's already an event source pending for this
2207 * priority we don't read another */
2212 struct signalfd_siginfo si;
2214 sd_event_source *s = NULL;
2216 n = read(d->fd, &si, sizeof(si));
2218 if (IN_SET(errno, EAGAIN, EINTR))
2224 if (_unlikely_(n != sizeof(si)))
2227 assert(SIGNAL_VALID(si.ssi_signo));
2231 if (e->signal_sources)
2232 s = e->signal_sources[si.ssi_signo];
2238 s->signal.siginfo = si;
2241 r = source_set_pending(s, true);
2249 static int source_dispatch(sd_event_source *s) {
2250 EventSourceType saved_type;
2254 assert(s->pending || s->type == SOURCE_EXIT);
2256 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2258 saved_type = s->type;
2260 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2261 r = source_set_pending(s, false);
2266 if (s->type != SOURCE_POST) {
2270 /* If we execute a non-post source, let's mark all
2271 * post sources as pending */
2273 SET_FOREACH(z, s->event->post_sources, i) {
2274 if (z->enabled == SD_EVENT_OFF)
2277 r = source_set_pending(z, true);
2283 if (s->enabled == SD_EVENT_ONESHOT) {
2284 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2289 s->dispatching = true;
2294 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2297 case SOURCE_TIME_REALTIME:
2298 case SOURCE_TIME_BOOTTIME:
2299 case SOURCE_TIME_MONOTONIC:
2300 case SOURCE_TIME_REALTIME_ALARM:
2301 case SOURCE_TIME_BOOTTIME_ALARM:
2302 r = s->time.callback(s, s->time.next, s->userdata);
2306 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2309 case SOURCE_CHILD: {
2312 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2314 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2316 /* Now, reap the PID for good. */
2318 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2324 r = s->defer.callback(s, s->userdata);
2328 r = s->post.callback(s, s->userdata);
2332 r = s->exit.callback(s, s->userdata);
2335 case SOURCE_WATCHDOG:
2336 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2337 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2338 assert_not_reached("Wut? I shouldn't exist.");
2341 s->dispatching = false;
2344 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2345 strna(s->description), event_source_type_to_string(saved_type));
2350 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2355 static int event_prepare(sd_event *e) {
2363 s = prioq_peek(e->prepare);
2364 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2367 s->prepare_iteration = e->iteration;
2368 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2374 s->dispatching = true;
2375 r = s->prepare(s, s->userdata);
2376 s->dispatching = false;
2379 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2380 strna(s->description), event_source_type_to_string(s->type));
2385 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2391 static int dispatch_exit(sd_event *e) {
2397 p = prioq_peek(e->exit);
2398 if (!p || p->enabled == SD_EVENT_OFF) {
2399 e->state = SD_EVENT_FINISHED;
2405 e->state = SD_EVENT_EXITING;
2407 r = source_dispatch(p);
2409 e->state = SD_EVENT_INITIAL;
2415 static sd_event_source* event_next_pending(sd_event *e) {
2420 p = prioq_peek(e->pending);
2424 if (p->enabled == SD_EVENT_OFF)
2430 static int arm_watchdog(sd_event *e) {
2431 struct itimerspec its = {};
2436 assert(e->watchdog_fd >= 0);
2438 t = sleep_between(e,
2439 e->watchdog_last + (e->watchdog_period / 2),
2440 e->watchdog_last + (e->watchdog_period * 3 / 4));
2442 timespec_store(&its.it_value, t);
2444 /* Make sure we never set the watchdog to 0, which tells the
2445 * kernel to disable it. */
2446 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2447 its.it_value.tv_nsec = 1;
2449 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2456 static int process_watchdog(sd_event *e) {
2462 /* Don't notify watchdog too often */
2463 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2466 sd_notify(false, "WATCHDOG=1");
2467 e->watchdog_last = e->timestamp.monotonic;
2469 return arm_watchdog(e);
2472 _public_ int sd_event_prepare(sd_event *e) {
2475 assert_return(e, -EINVAL);
2476 assert_return(e = event_resolve(e), -ENOPKG);
2477 assert_return(!event_pid_changed(e), -ECHILD);
2478 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2479 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2481 if (e->exit_requested)
2486 e->state = SD_EVENT_PREPARING;
2487 r = event_prepare(e);
2488 e->state = SD_EVENT_INITIAL;
2492 r = event_arm_timer(e, &e->realtime);
2496 r = event_arm_timer(e, &e->boottime);
2500 r = event_arm_timer(e, &e->monotonic);
2504 r = event_arm_timer(e, &e->realtime_alarm);
2508 r = event_arm_timer(e, &e->boottime_alarm);
2512 if (event_next_pending(e) || e->need_process_child)
2515 e->state = SD_EVENT_ARMED;
2520 e->state = SD_EVENT_ARMED;
2521 r = sd_event_wait(e, 0);
2523 e->state = SD_EVENT_ARMED;
2528 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2529 struct epoll_event *ev_queue;
2530 unsigned ev_queue_max;
2533 assert_return(e, -EINVAL);
2534 assert_return(e = event_resolve(e), -ENOPKG);
2535 assert_return(!event_pid_changed(e), -ECHILD);
2536 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2537 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2539 if (e->exit_requested) {
2540 e->state = SD_EVENT_PENDING;
2544 ev_queue_max = MAX(e->n_sources, 1u);
2545 ev_queue = newa(struct epoll_event, ev_queue_max);
2547 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2548 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2550 if (errno == EINTR) {
2551 e->state = SD_EVENT_PENDING;
2559 triple_timestamp_get(&e->timestamp);
2561 for (i = 0; i < m; i++) {
2563 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2564 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2566 WakeupType *t = ev_queue[i].data.ptr;
2570 case WAKEUP_EVENT_SOURCE:
2571 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2574 case WAKEUP_CLOCK_DATA: {
2575 struct clock_data *d = ev_queue[i].data.ptr;
2576 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2580 case WAKEUP_SIGNAL_DATA:
2581 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2585 assert_not_reached("Invalid wake-up pointer");
2592 r = process_watchdog(e);
2596 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2600 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2604 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2608 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2612 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2616 if (e->need_process_child) {
2617 r = process_child(e);
2622 if (event_next_pending(e)) {
2623 e->state = SD_EVENT_PENDING;
2631 e->state = SD_EVENT_INITIAL;
2636 _public_ int sd_event_dispatch(sd_event *e) {
2640 assert_return(e, -EINVAL);
2641 assert_return(e = event_resolve(e), -ENOPKG);
2642 assert_return(!event_pid_changed(e), -ECHILD);
2643 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2644 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2646 if (e->exit_requested)
2647 return dispatch_exit(e);
2649 p = event_next_pending(e);
2653 e->state = SD_EVENT_RUNNING;
2654 r = source_dispatch(p);
2655 e->state = SD_EVENT_INITIAL;
2662 e->state = SD_EVENT_INITIAL;
2667 static void event_log_delays(sd_event *e) {
2668 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2672 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2673 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2676 log_debug("Event loop iterations: %.*s", o, b);
2679 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2682 assert_return(e, -EINVAL);
2683 assert_return(e = event_resolve(e), -ENOPKG);
2684 assert_return(!event_pid_changed(e), -ECHILD);
2685 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2686 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2688 if (e->profile_delays && e->last_run) {
2692 this_run = now(CLOCK_MONOTONIC);
2694 l = u64log2(this_run - e->last_run);
2695 assert(l < sizeof(e->delays));
2698 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2699 event_log_delays(e);
2700 e->last_log = this_run;
2704 r = sd_event_prepare(e);
2706 /* There was nothing? Then wait... */
2707 r = sd_event_wait(e, timeout);
2709 if (e->profile_delays)
2710 e->last_run = now(CLOCK_MONOTONIC);
2713 /* There's something now, then let's dispatch it */
2714 r = sd_event_dispatch(e);
2724 _public_ int sd_event_loop(sd_event *e) {
2727 assert_return(e, -EINVAL);
2728 assert_return(e = event_resolve(e), -ENOPKG);
2729 assert_return(!event_pid_changed(e), -ECHILD);
2730 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2734 while (e->state != SD_EVENT_FINISHED) {
2735 r = sd_event_run(e, (uint64_t) -1);
2747 _public_ int sd_event_get_fd(sd_event *e) {
2749 assert_return(e, -EINVAL);
2750 assert_return(e = event_resolve(e), -ENOPKG);
2751 assert_return(!event_pid_changed(e), -ECHILD);
2756 _public_ int sd_event_get_state(sd_event *e) {
2757 assert_return(e, -EINVAL);
2758 assert_return(e = event_resolve(e), -ENOPKG);
2759 assert_return(!event_pid_changed(e), -ECHILD);
2764 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2765 assert_return(e, -EINVAL);
2766 assert_return(e = event_resolve(e), -ENOPKG);
2767 assert_return(code, -EINVAL);
2768 assert_return(!event_pid_changed(e), -ECHILD);
2770 if (!e->exit_requested)
2773 *code = e->exit_code;
2777 _public_ int sd_event_exit(sd_event *e, int code) {
2778 assert_return(e, -EINVAL);
2779 assert_return(e = event_resolve(e), -ENOPKG);
2780 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2781 assert_return(!event_pid_changed(e), -ECHILD);
2783 e->exit_requested = true;
2784 e->exit_code = code;
2789 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2790 assert_return(e, -EINVAL);
2791 assert_return(e = event_resolve(e), -ENOPKG);
2792 assert_return(usec, -EINVAL);
2793 assert_return(!event_pid_changed(e), -ECHILD);
2795 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2798 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2799 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2800 * the purpose of getting the time this doesn't matter. */
2801 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2804 if (!triple_timestamp_is_set(&e->timestamp)) {
2805 /* Implicitly fall back to now() if we never ran
2806 * before and thus have no cached time. */
2811 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2815 _public_ int sd_event_default(sd_event **ret) {
2820 return !!default_event;
2822 if (default_event) {
2823 *ret = sd_event_ref(default_event);
2827 r = sd_event_new(&e);
2831 e->default_event_ptr = &default_event;
2839 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2840 assert_return(e, -EINVAL);
2841 assert_return(e = event_resolve(e), -ENOPKG);
2842 assert_return(tid, -EINVAL);
2843 assert_return(!event_pid_changed(e), -ECHILD);
2853 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2856 assert_return(e, -EINVAL);
2857 assert_return(e = event_resolve(e), -ENOPKG);
2858 assert_return(!event_pid_changed(e), -ECHILD);
2860 if (e->watchdog == !!b)
2864 struct epoll_event ev = {};
2866 r = sd_watchdog_enabled(false, &e->watchdog_period);
2870 /* Issue first ping immediately */
2871 sd_notify(false, "WATCHDOG=1");
2872 e->watchdog_last = now(CLOCK_MONOTONIC);
2874 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2875 if (e->watchdog_fd < 0)
2878 r = arm_watchdog(e);
2882 ev.events = EPOLLIN;
2883 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2885 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2892 if (e->watchdog_fd >= 0) {
2893 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2894 e->watchdog_fd = safe_close(e->watchdog_fd);
2902 e->watchdog_fd = safe_close(e->watchdog_fd);
2906 _public_ int sd_event_get_watchdog(sd_event *e) {
2907 assert_return(e, -EINVAL);
2908 assert_return(e = event_resolve(e), -ENOPKG);
2909 assert_return(!event_pid_changed(e), -ECHILD);
2914 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2915 assert_return(e, -EINVAL);
2916 assert_return(e = event_resolve(e), -ENOPKG);
2917 assert_return(!event_pid_changed(e), -ECHILD);
2919 *ret = e->iteration;