1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
23 #include "process-util.h"
25 #include "signal-util.h"
26 #include "string-table.h"
27 #include "string-util.h"
28 #include "time-util.h"
31 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
33 typedef enum EventSourceType {
37 SOURCE_TIME_MONOTONIC,
38 SOURCE_TIME_REALTIME_ALARM,
39 SOURCE_TIME_BOOTTIME_ALARM,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType {
75 _WAKEUP_TYPE_INVALID = -1,
78 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
80 struct sd_event_source {
87 sd_event_handler_t prepare;
91 EventSourceType type:5;
98 unsigned pending_index;
99 unsigned prepare_index;
100 uint64_t pending_iteration;
101 uint64_t prepare_iteration;
103 LIST_FIELDS(sd_event_source, sources);
107 sd_event_io_handler_t callback;
115 sd_event_time_handler_t callback;
116 usec_t next, accuracy;
117 unsigned earliest_index;
118 unsigned latest_index;
121 sd_event_signal_handler_t callback;
122 struct signalfd_siginfo siginfo;
126 sd_event_child_handler_t callback;
132 sd_event_handler_t callback;
135 sd_event_handler_t callback;
138 sd_event_handler_t callback;
139 unsigned prioq_index;
148 /* For all clocks we maintain two priority queues each, one
149 * ordered for the earliest times the events may be
150 * dispatched, and one ordered by the latest times they must
151 * have been dispatched. The range between the top entries in
152 * the two prioqs is the time window we can freely schedule
165 /* For each priority we maintain one signal fd, so that we
166 * only have to dequeue a single event per priority at a
172 sd_event_source *current;
184 /* timerfd_create() only supports these five clocks so far. We
185 * can add support for more clocks when the kernel learns to
186 * deal with them, too. */
187 struct clock_data realtime;
188 struct clock_data boottime;
189 struct clock_data monotonic;
190 struct clock_data realtime_alarm;
191 struct clock_data boottime_alarm;
195 sd_event_source **signal_sources; /* indexed by signal number */
196 Hashmap *signal_data; /* indexed by priority */
198 Hashmap *child_sources;
199 unsigned n_enabled_child_sources;
208 triple_timestamp timestamp;
211 bool exit_requested:1;
212 bool need_process_child:1;
214 bool profile_delays:1;
219 sd_event **default_event_ptr;
221 usec_t watchdog_last, watchdog_period;
225 LIST_HEAD(sd_event_source, sources);
227 usec_t last_run, last_log;
228 unsigned delays[sizeof(usec_t) * 8];
231 static thread_local sd_event *default_event = NULL;
233 static void source_disconnect(sd_event_source *s);
235 static sd_event *event_resolve(sd_event *e) {
236 return e == SD_EVENT_DEFAULT ? default_event : e;
239 static int pending_prioq_compare(const void *a, const void *b) {
240 const sd_event_source *x = a, *y = b;
245 /* Enabled ones first */
246 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
248 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
251 /* Lower priority values first */
252 if (x->priority < y->priority)
254 if (x->priority > y->priority)
257 /* Older entries first */
258 if (x->pending_iteration < y->pending_iteration)
260 if (x->pending_iteration > y->pending_iteration)
266 static int prepare_prioq_compare(const void *a, const void *b) {
267 const sd_event_source *x = a, *y = b;
272 /* Enabled ones first */
273 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
275 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
278 /* Move most recently prepared ones last, so that we can stop
279 * preparing as soon as we hit one that has already been
280 * prepared in the current iteration */
281 if (x->prepare_iteration < y->prepare_iteration)
283 if (x->prepare_iteration > y->prepare_iteration)
286 /* Lower priority values first */
287 if (x->priority < y->priority)
289 if (x->priority > y->priority)
295 static int earliest_time_prioq_compare(const void *a, const void *b) {
296 const sd_event_source *x = a, *y = b;
298 assert(EVENT_SOURCE_IS_TIME(x->type));
299 assert(x->type == y->type);
301 /* Enabled ones first */
302 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307 /* Move the pending ones to the end */
308 if (!x->pending && y->pending)
310 if (x->pending && !y->pending)
314 if (x->time.next < y->time.next)
316 if (x->time.next > y->time.next)
322 static usec_t time_event_source_latest(const sd_event_source *s) {
323 return usec_add(s->time.next, s->time.accuracy);
326 static int latest_time_prioq_compare(const void *a, const void *b) {
327 const sd_event_source *x = a, *y = b;
329 assert(EVENT_SOURCE_IS_TIME(x->type));
330 assert(x->type == y->type);
332 /* Enabled ones first */
333 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
335 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338 /* Move the pending ones to the end */
339 if (!x->pending && y->pending)
341 if (x->pending && !y->pending)
345 if (time_event_source_latest(x) < time_event_source_latest(y))
347 if (time_event_source_latest(x) > time_event_source_latest(y))
353 static int exit_prioq_compare(const void *a, const void *b) {
354 const sd_event_source *x = a, *y = b;
356 assert(x->type == SOURCE_EXIT);
357 assert(y->type == SOURCE_EXIT);
359 /* Enabled ones first */
360 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
362 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
365 /* Lower priority values first */
366 if (x->priority < y->priority)
368 if (x->priority > y->priority)
374 static void free_clock_data(struct clock_data *d) {
376 assert(d->wakeup == WAKEUP_CLOCK_DATA);
379 prioq_free(d->earliest);
380 prioq_free(d->latest);
383 static void event_free(sd_event *e) {
388 while ((s = e->sources)) {
390 source_disconnect(s);
391 sd_event_source_unref(s);
394 assert(e->n_sources == 0);
396 if (e->default_event_ptr)
397 *(e->default_event_ptr) = NULL;
399 safe_close(e->epoll_fd);
400 safe_close(e->watchdog_fd);
402 free_clock_data(&e->realtime);
403 free_clock_data(&e->boottime);
404 free_clock_data(&e->monotonic);
405 free_clock_data(&e->realtime_alarm);
406 free_clock_data(&e->boottime_alarm);
408 prioq_free(e->pending);
409 prioq_free(e->prepare);
412 free(e->signal_sources);
413 hashmap_free(e->signal_data);
415 hashmap_free(e->child_sources);
416 set_free(e->post_sources);
420 _public_ int sd_event_new(sd_event** ret) {
424 assert_return(ret, -EINVAL);
426 e = new0(sd_event, 1);
431 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
432 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
433 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
434 e->original_pid = getpid_cached();
435 e->perturb = USEC_INFINITY;
437 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
441 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
442 if (e->epoll_fd < 0) {
447 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
449 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
450 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
451 e->profile_delays = true;
462 _public_ sd_event* sd_event_ref(sd_event *e) {
467 assert(e->n_ref >= 1);
473 _public_ sd_event* sd_event_unref(sd_event *e) {
478 assert(e->n_ref >= 1);
487 static bool event_pid_changed(sd_event *e) {
490 /* We don't support people creating an event loop and keeping
491 * it around over a fork(). Let's complain. */
493 return e->original_pid != getpid_cached();
496 static void source_io_unregister(sd_event_source *s) {
500 assert(s->type == SOURCE_IO);
502 if (event_pid_changed(s->event))
505 if (!s->io.registered)
508 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
510 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
511 strna(s->description), event_source_type_to_string(s->type));
513 s->io.registered = false;
516 static int source_io_register(
521 struct epoll_event ev;
525 assert(s->type == SOURCE_IO);
526 assert(enabled != SD_EVENT_OFF);
528 ev = (struct epoll_event) {
529 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
533 if (s->io.registered)
534 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
536 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
540 s->io.registered = true;
545 static clockid_t event_source_type_to_clock(EventSourceType t) {
549 case SOURCE_TIME_REALTIME:
550 return CLOCK_REALTIME;
552 case SOURCE_TIME_BOOTTIME:
553 return CLOCK_BOOTTIME;
555 case SOURCE_TIME_MONOTONIC:
556 return CLOCK_MONOTONIC;
558 case SOURCE_TIME_REALTIME_ALARM:
559 return CLOCK_REALTIME_ALARM;
561 case SOURCE_TIME_BOOTTIME_ALARM:
562 return CLOCK_BOOTTIME_ALARM;
565 return (clockid_t) -1;
569 static EventSourceType clock_to_event_source_type(clockid_t clock) {
574 return SOURCE_TIME_REALTIME;
577 return SOURCE_TIME_BOOTTIME;
579 case CLOCK_MONOTONIC:
580 return SOURCE_TIME_MONOTONIC;
582 case CLOCK_REALTIME_ALARM:
583 return SOURCE_TIME_REALTIME_ALARM;
585 case CLOCK_BOOTTIME_ALARM:
586 return SOURCE_TIME_BOOTTIME_ALARM;
589 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
593 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
598 case SOURCE_TIME_REALTIME:
601 case SOURCE_TIME_BOOTTIME:
604 case SOURCE_TIME_MONOTONIC:
605 return &e->monotonic;
607 case SOURCE_TIME_REALTIME_ALARM:
608 return &e->realtime_alarm;
610 case SOURCE_TIME_BOOTTIME_ALARM:
611 return &e->boottime_alarm;
618 static int event_make_signal_data(
621 struct signal_data **ret) {
623 struct epoll_event ev;
624 struct signal_data *d;
632 if (event_pid_changed(e))
635 if (e->signal_sources && e->signal_sources[sig])
636 priority = e->signal_sources[sig]->priority;
638 priority = SD_EVENT_PRIORITY_NORMAL;
640 d = hashmap_get(e->signal_data, &priority);
642 if (sigismember(&d->sigset, sig) > 0) {
648 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
652 d = new0(struct signal_data, 1);
656 d->wakeup = WAKEUP_SIGNAL_DATA;
658 d->priority = priority;
660 r = hashmap_put(e->signal_data, &d->priority, d);
670 assert_se(sigaddset(&ss_copy, sig) >= 0);
672 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
686 d->fd = fd_move_above_stdio(r);
688 ev = (struct epoll_event) {
693 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
706 d->fd = safe_close(d->fd);
707 hashmap_remove(e->signal_data, &d->priority);
714 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718 /* Turns off the specified signal in the signal data
719 * object. If the signal mask of the object becomes empty that
722 if (sigismember(&d->sigset, sig) == 0)
725 assert_se(sigdelset(&d->sigset, sig) >= 0);
727 if (sigisemptyset(&d->sigset)) {
729 /* If all the mask is all-zero we can get rid of the structure */
730 hashmap_remove(e->signal_data, &d->priority);
738 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
739 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
742 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
743 struct signal_data *d;
744 static const int64_t zero_priority = 0;
748 /* Rechecks if the specified signal is still something we are
749 * interested in. If not, we'll unmask it, and possibly drop
750 * the signalfd for it. */
752 if (sig == SIGCHLD &&
753 e->n_enabled_child_sources > 0)
756 if (e->signal_sources &&
757 e->signal_sources[sig] &&
758 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
762 * The specified signal might be enabled in three different queues:
764 * 1) the one that belongs to the priority passed (if it is non-NULL)
765 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
766 * 3) the 0 priority (to cover the SIGCHLD case)
768 * Hence, let's remove it from all three here.
772 d = hashmap_get(e->signal_data, priority);
774 event_unmask_signal_data(e, d, sig);
777 if (e->signal_sources && e->signal_sources[sig]) {
778 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
780 event_unmask_signal_data(e, d, sig);
783 d = hashmap_get(e->signal_data, &zero_priority);
785 event_unmask_signal_data(e, d, sig);
788 static void source_disconnect(sd_event_source *s) {
796 assert(s->event->n_sources > 0);
802 source_io_unregister(s);
806 case SOURCE_TIME_REALTIME:
807 case SOURCE_TIME_BOOTTIME:
808 case SOURCE_TIME_MONOTONIC:
809 case SOURCE_TIME_REALTIME_ALARM:
810 case SOURCE_TIME_BOOTTIME_ALARM: {
811 struct clock_data *d;
813 d = event_get_clock_data(s->event, s->type);
816 prioq_remove(d->earliest, s, &s->time.earliest_index);
817 prioq_remove(d->latest, s, &s->time.latest_index);
818 d->needs_rearm = true;
823 if (s->signal.sig > 0) {
825 if (s->event->signal_sources)
826 s->event->signal_sources[s->signal.sig] = NULL;
828 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
834 if (s->child.pid > 0) {
835 if (s->enabled != SD_EVENT_OFF) {
836 assert(s->event->n_enabled_child_sources > 0);
837 s->event->n_enabled_child_sources--;
840 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
841 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
851 set_remove(s->event->post_sources, s);
855 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
859 assert_not_reached("Wut? I shouldn't exist.");
863 prioq_remove(s->event->pending, s, &s->pending_index);
866 prioq_remove(s->event->prepare, s, &s->prepare_index);
870 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
872 LIST_REMOVE(sources, event->sources, s);
876 sd_event_unref(event);
879 static void source_free(sd_event_source *s) {
882 source_disconnect(s);
884 if (s->type == SOURCE_IO && s->io.owned)
885 safe_close(s->io.fd);
887 free(s->description);
891 static int source_set_pending(sd_event_source *s, bool b) {
895 assert(s->type != SOURCE_EXIT);
903 s->pending_iteration = s->event->iteration;
905 r = prioq_put(s->event->pending, s, &s->pending_index);
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
916 d = event_get_clock_data(s->event, s->type);
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
940 s = new0(sd_event_source, 1);
946 s->floating = floating;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
953 LIST_PREPEND(sources, e->sources, s);
959 _public_ int sd_event_add_io(
961 sd_event_source **ret,
964 sd_event_io_handler_t callback,
970 assert_return(e, -EINVAL);
971 assert_return(e = event_resolve(e), -ENOPKG);
972 assert_return(fd >= 0, -EBADF);
973 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
974 assert_return(callback, -EINVAL);
975 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
976 assert_return(!event_pid_changed(e), -ECHILD);
978 s = source_new(e, !ret, SOURCE_IO);
982 s->wakeup = WAKEUP_EVENT_SOURCE;
984 s->io.events = events;
985 s->io.callback = callback;
986 s->userdata = userdata;
987 s->enabled = SD_EVENT_ON;
989 r = source_io_register(s, s->enabled, events);
1001 static void initialize_perturb(sd_event *e) {
1002 sd_id128_t bootid = {};
1004 /* When we sleep for longer, we try to realign the wakeup to
1005 the same time wihtin each minute/second/250ms, so that
1006 events all across the system can be coalesced into a single
1007 CPU wakeup. However, let's take some system-specific
1008 randomness for this value, so that in a network of systems
1009 with synced clocks timer events are distributed a
1010 bit. Here, we calculate a perturbation usec offset from the
1013 if (_likely_(e->perturb != USEC_INFINITY))
1016 if (sd_id128_get_boot(&bootid) >= 0)
1017 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1020 static int event_setup_timer_fd(
1022 struct clock_data *d,
1025 struct epoll_event ev;
1031 if (_likely_(d->fd >= 0))
1034 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1038 fd = fd_move_above_stdio(fd);
1040 ev = (struct epoll_event) {
1045 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1055 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1058 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1061 _public_ int sd_event_add_time(
1063 sd_event_source **ret,
1067 sd_event_time_handler_t callback,
1070 EventSourceType type;
1072 struct clock_data *d;
1075 assert_return(e, -EINVAL);
1076 assert_return(e = event_resolve(e), -ENOPKG);
1077 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1078 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1079 assert_return(!event_pid_changed(e), -ECHILD);
1081 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1084 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1089 callback = time_exit_callback;
1091 d = event_get_clock_data(e, type);
1094 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1098 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1103 r = event_setup_timer_fd(e, d, clock);
1108 s = source_new(e, !ret, type);
1112 s->time.next = usec;
1113 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1114 s->time.callback = callback;
1115 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1116 s->userdata = userdata;
1117 s->enabled = SD_EVENT_ONESHOT;
1119 d->needs_rearm = true;
1121 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1125 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1142 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1145 _public_ int sd_event_add_signal(
1147 sd_event_source **ret,
1149 sd_event_signal_handler_t callback,
1153 struct signal_data *d;
1157 assert_return(e, -EINVAL);
1158 assert_return(e = event_resolve(e), -ENOPKG);
1159 assert_return(SIGNAL_VALID(sig), -EINVAL);
1160 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1161 assert_return(!event_pid_changed(e), -ECHILD);
1164 callback = signal_exit_callback;
1166 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1170 if (!sigismember(&ss, sig))
1173 if (!e->signal_sources) {
1174 e->signal_sources = new0(sd_event_source*, _NSIG);
1175 if (!e->signal_sources)
1177 } else if (e->signal_sources[sig])
1180 s = source_new(e, !ret, SOURCE_SIGNAL);
1184 s->signal.sig = sig;
1185 s->signal.callback = callback;
1186 s->userdata = userdata;
1187 s->enabled = SD_EVENT_ON;
1189 e->signal_sources[sig] = s;
1191 r = event_make_signal_data(e, sig, &d);
1197 /* Use the signal name as description for the event source by default */
1198 (void) sd_event_source_set_description(s, signal_to_string(sig));
1206 _public_ int sd_event_add_child(
1208 sd_event_source **ret,
1211 sd_event_child_handler_t callback,
1217 assert_return(e, -EINVAL);
1218 assert_return(e = event_resolve(e), -ENOPKG);
1219 assert_return(pid > 1, -EINVAL);
1220 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1221 assert_return(options != 0, -EINVAL);
1222 assert_return(callback, -EINVAL);
1223 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1224 assert_return(!event_pid_changed(e), -ECHILD);
1226 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1230 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1233 s = source_new(e, !ret, SOURCE_CHILD);
1238 s->child.options = options;
1239 s->child.callback = callback;
1240 s->userdata = userdata;
1241 s->enabled = SD_EVENT_ONESHOT;
1243 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1249 e->n_enabled_child_sources++;
1251 r = event_make_signal_data(e, SIGCHLD, NULL);
1253 e->n_enabled_child_sources--;
1258 e->need_process_child = true;
1266 _public_ int sd_event_add_defer(
1268 sd_event_source **ret,
1269 sd_event_handler_t callback,
1275 assert_return(e, -EINVAL);
1276 assert_return(e = event_resolve(e), -ENOPKG);
1277 assert_return(callback, -EINVAL);
1278 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1279 assert_return(!event_pid_changed(e), -ECHILD);
1281 s = source_new(e, !ret, SOURCE_DEFER);
1285 s->defer.callback = callback;
1286 s->userdata = userdata;
1287 s->enabled = SD_EVENT_ONESHOT;
1289 r = source_set_pending(s, true);
1301 _public_ int sd_event_add_post(
1303 sd_event_source **ret,
1304 sd_event_handler_t callback,
1310 assert_return(e, -EINVAL);
1311 assert_return(e = event_resolve(e), -ENOPKG);
1312 assert_return(callback, -EINVAL);
1313 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1314 assert_return(!event_pid_changed(e), -ECHILD);
1316 r = set_ensure_allocated(&e->post_sources, NULL);
1320 s = source_new(e, !ret, SOURCE_POST);
1324 s->post.callback = callback;
1325 s->userdata = userdata;
1326 s->enabled = SD_EVENT_ON;
1328 r = set_put(e->post_sources, s);
1340 _public_ int sd_event_add_exit(
1342 sd_event_source **ret,
1343 sd_event_handler_t callback,
1349 assert_return(e, -EINVAL);
1350 assert_return(e = event_resolve(e), -ENOPKG);
1351 assert_return(callback, -EINVAL);
1352 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1353 assert_return(!event_pid_changed(e), -ECHILD);
1355 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1359 s = source_new(e, !ret, SOURCE_EXIT);
1363 s->exit.callback = callback;
1364 s->userdata = userdata;
1365 s->exit.prioq_index = PRIOQ_IDX_NULL;
1366 s->enabled = SD_EVENT_ONESHOT;
1368 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1380 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1385 assert(s->n_ref >= 1);
1391 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1396 assert(s->n_ref >= 1);
1399 if (s->n_ref <= 0) {
1400 /* Here's a special hack: when we are called from a
1401 * dispatch handler we won't free the event source
1402 * immediately, but we will detach the fd from the
1403 * epoll. This way it is safe for the caller to unref
1404 * the event source and immediately close the fd, but
1405 * we still retain a valid event source object after
1408 if (s->dispatching) {
1409 if (s->type == SOURCE_IO)
1410 source_io_unregister(s);
1412 source_disconnect(s);
1420 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1421 assert_return(s, -EINVAL);
1422 assert_return(!event_pid_changed(s->event), -ECHILD);
1424 return free_and_strdup(&s->description, description);
1427 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1428 assert_return(s, -EINVAL);
1429 assert_return(description, -EINVAL);
1430 assert_return(s->description, -ENXIO);
1431 assert_return(!event_pid_changed(s->event), -ECHILD);
1433 *description = s->description;
1437 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1438 assert_return(s, NULL);
1443 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1444 assert_return(s, -EINVAL);
1445 assert_return(s->type != SOURCE_EXIT, -EDOM);
1446 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1447 assert_return(!event_pid_changed(s->event), -ECHILD);
1452 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1453 assert_return(s, -EINVAL);
1454 assert_return(s->type == SOURCE_IO, -EDOM);
1455 assert_return(!event_pid_changed(s->event), -ECHILD);
1460 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1463 assert_return(s, -EINVAL);
1464 assert_return(fd >= 0, -EBADF);
1465 assert_return(s->type == SOURCE_IO, -EDOM);
1466 assert_return(!event_pid_changed(s->event), -ECHILD);
1471 if (s->enabled == SD_EVENT_OFF) {
1473 s->io.registered = false;
1477 saved_fd = s->io.fd;
1478 assert(s->io.registered);
1481 s->io.registered = false;
1483 r = source_io_register(s, s->enabled, s->io.events);
1485 s->io.fd = saved_fd;
1486 s->io.registered = true;
1490 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1496 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1497 assert_return(s, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1503 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1504 assert_return(s, -EINVAL);
1505 assert_return(s->type == SOURCE_IO, -EDOM);
1511 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1512 assert_return(s, -EINVAL);
1513 assert_return(events, -EINVAL);
1514 assert_return(s->type == SOURCE_IO, -EDOM);
1515 assert_return(!event_pid_changed(s->event), -ECHILD);
1517 *events = s->io.events;
1521 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1524 assert_return(s, -EINVAL);
1525 assert_return(s->type == SOURCE_IO, -EDOM);
1526 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1527 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1528 assert_return(!event_pid_changed(s->event), -ECHILD);
1530 /* edge-triggered updates are never skipped, so we can reset edges */
1531 if (s->io.events == events && !(events & EPOLLET))
1534 if (s->enabled != SD_EVENT_OFF) {
1535 r = source_io_register(s, s->enabled, events);
1540 s->io.events = events;
1541 source_set_pending(s, false);
1546 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1547 assert_return(s, -EINVAL);
1548 assert_return(revents, -EINVAL);
1549 assert_return(s->type == SOURCE_IO, -EDOM);
1550 assert_return(s->pending, -ENODATA);
1551 assert_return(!event_pid_changed(s->event), -ECHILD);
1553 *revents = s->io.revents;
1557 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1558 assert_return(s, -EINVAL);
1559 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1560 assert_return(!event_pid_changed(s->event), -ECHILD);
1562 return s->signal.sig;
1565 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1566 assert_return(s, -EINVAL);
1567 assert_return(!event_pid_changed(s->event), -ECHILD);
1569 *priority = s->priority;
1573 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1576 assert_return(s, -EINVAL);
1577 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1578 assert_return(!event_pid_changed(s->event), -ECHILD);
1580 if (s->priority == priority)
1583 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1584 struct signal_data *old, *d;
1586 /* Move us from the signalfd belonging to the old
1587 * priority to the signalfd of the new priority */
1589 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1591 s->priority = priority;
1593 r = event_make_signal_data(s->event, s->signal.sig, &d);
1595 s->priority = old->priority;
1599 event_unmask_signal_data(s->event, old, s->signal.sig);
1601 s->priority = priority;
1604 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1607 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1609 if (s->type == SOURCE_EXIT)
1610 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1615 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1616 assert_return(s, -EINVAL);
1617 assert_return(m, -EINVAL);
1618 assert_return(!event_pid_changed(s->event), -ECHILD);
1624 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1627 assert_return(s, -EINVAL);
1628 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1629 assert_return(!event_pid_changed(s->event), -ECHILD);
1631 /* If we are dead anyway, we are fine with turning off
1632 * sources, but everything else needs to fail. */
1633 if (s->event->state == SD_EVENT_FINISHED)
1634 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1636 if (s->enabled == m)
1639 if (m == SD_EVENT_OFF) {
1641 /* Unset the pending flag when this event source is disabled */
1642 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1643 r = source_set_pending(s, false);
1651 source_io_unregister(s);
1655 case SOURCE_TIME_REALTIME:
1656 case SOURCE_TIME_BOOTTIME:
1657 case SOURCE_TIME_MONOTONIC:
1658 case SOURCE_TIME_REALTIME_ALARM:
1659 case SOURCE_TIME_BOOTTIME_ALARM: {
1660 struct clock_data *d;
1663 d = event_get_clock_data(s->event, s->type);
1666 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1667 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1668 d->needs_rearm = true;
1675 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1681 assert(s->event->n_enabled_child_sources > 0);
1682 s->event->n_enabled_child_sources--;
1684 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1689 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1698 assert_not_reached("Wut? I shouldn't exist.");
1703 /* Unset the pending flag when this event source is enabled */
1704 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1705 r = source_set_pending(s, false);
1713 r = source_io_register(s, m, s->io.events);
1720 case SOURCE_TIME_REALTIME:
1721 case SOURCE_TIME_BOOTTIME:
1722 case SOURCE_TIME_MONOTONIC:
1723 case SOURCE_TIME_REALTIME_ALARM:
1724 case SOURCE_TIME_BOOTTIME_ALARM: {
1725 struct clock_data *d;
1728 d = event_get_clock_data(s->event, s->type);
1731 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1732 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1733 d->needs_rearm = true;
1741 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1743 s->enabled = SD_EVENT_OFF;
1744 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1752 if (s->enabled == SD_EVENT_OFF)
1753 s->event->n_enabled_child_sources++;
1757 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1759 s->enabled = SD_EVENT_OFF;
1760 s->event->n_enabled_child_sources--;
1761 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1769 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1778 assert_not_reached("Wut? I shouldn't exist.");
1783 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1786 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1791 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1792 assert_return(s, -EINVAL);
1793 assert_return(usec, -EINVAL);
1794 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1795 assert_return(!event_pid_changed(s->event), -ECHILD);
1797 *usec = s->time.next;
1801 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1802 struct clock_data *d;
1804 assert_return(s, -EINVAL);
1805 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1806 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1807 assert_return(!event_pid_changed(s->event), -ECHILD);
1809 s->time.next = usec;
1811 source_set_pending(s, false);
1813 d = event_get_clock_data(s->event, s->type);
1816 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1817 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1818 d->needs_rearm = true;
1823 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1824 assert_return(s, -EINVAL);
1825 assert_return(usec, -EINVAL);
1826 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1827 assert_return(!event_pid_changed(s->event), -ECHILD);
1829 *usec = s->time.accuracy;
1833 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1834 struct clock_data *d;
1836 assert_return(s, -EINVAL);
1837 assert_return(usec != (uint64_t) -1, -EINVAL);
1838 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1839 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1840 assert_return(!event_pid_changed(s->event), -ECHILD);
1843 usec = DEFAULT_ACCURACY_USEC;
1845 s->time.accuracy = usec;
1847 source_set_pending(s, false);
1849 d = event_get_clock_data(s->event, s->type);
1852 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1853 d->needs_rearm = true;
1858 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1859 assert_return(s, -EINVAL);
1860 assert_return(clock, -EINVAL);
1861 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1862 assert_return(!event_pid_changed(s->event), -ECHILD);
1864 *clock = event_source_type_to_clock(s->type);
1868 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1869 assert_return(s, -EINVAL);
1870 assert_return(pid, -EINVAL);
1871 assert_return(s->type == SOURCE_CHILD, -EDOM);
1872 assert_return(!event_pid_changed(s->event), -ECHILD);
1874 *pid = s->child.pid;
1878 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1881 assert_return(s, -EINVAL);
1882 assert_return(s->type != SOURCE_EXIT, -EDOM);
1883 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1884 assert_return(!event_pid_changed(s->event), -ECHILD);
1886 if (s->prepare == callback)
1889 if (callback && s->prepare) {
1890 s->prepare = callback;
1894 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1898 s->prepare = callback;
1901 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1905 prioq_remove(s->event->prepare, s, &s->prepare_index);
1910 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1911 assert_return(s, NULL);
1916 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1919 assert_return(s, NULL);
1922 s->userdata = userdata;
1927 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1934 if (a >= USEC_INFINITY)
1935 return USEC_INFINITY;
1940 initialize_perturb(e);
1943 Find a good time to wake up again between times a and b. We
1944 have two goals here:
1946 a) We want to wake up as seldom as possible, hence prefer
1947 later times over earlier times.
1949 b) But if we have to wake up, then let's make sure to
1950 dispatch as much as possible on the entire system.
1952 We implement this by waking up everywhere at the same time
1953 within any given minute if we can, synchronised via the
1954 perturbation value determined from the boot ID. If we can't,
1955 then we try to find the same spot in every 10s, then 1s and
1956 then 250ms step. Otherwise, we pick the last possible time
1960 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1962 if (_unlikely_(c < USEC_PER_MINUTE))
1965 c -= USEC_PER_MINUTE;
1971 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1973 if (_unlikely_(c < USEC_PER_SEC*10))
1976 c -= USEC_PER_SEC*10;
1982 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1984 if (_unlikely_(c < USEC_PER_SEC))
1993 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1995 if (_unlikely_(c < USEC_PER_MSEC*250))
1998 c -= USEC_PER_MSEC*250;
2007 static int event_arm_timer(
2009 struct clock_data *d) {
2011 struct itimerspec its = {};
2012 sd_event_source *a, *b;
2019 if (!d->needs_rearm)
2022 d->needs_rearm = false;
2024 a = prioq_peek(d->earliest);
2025 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2030 if (d->next == USEC_INFINITY)
2034 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2038 d->next = USEC_INFINITY;
2042 b = prioq_peek(d->latest);
2043 assert_se(b && b->enabled != SD_EVENT_OFF);
2045 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2049 assert_se(d->fd >= 0);
2052 /* We don' want to disarm here, just mean some time looooong ago. */
2053 its.it_value.tv_sec = 0;
2054 its.it_value.tv_nsec = 1;
2056 timespec_store(&its.it_value, t);
2058 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2066 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2069 assert(s->type == SOURCE_IO);
2071 /* If the event source was already pending, we just OR in the
2072 * new revents, otherwise we reset the value. The ORing is
2073 * necessary to handle EPOLLONESHOT events properly where
2074 * readability might happen independently of writability, and
2075 * we need to keep track of both */
2078 s->io.revents |= revents;
2080 s->io.revents = revents;
2082 return source_set_pending(s, true);
2085 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2092 assert_return(events == EPOLLIN, -EIO);
2094 ss = read(fd, &x, sizeof(x));
2096 if (IN_SET(errno, EAGAIN, EINTR))
2102 if (_unlikely_(ss != sizeof(x)))
2106 *next = USEC_INFINITY;
2111 static int process_timer(
2114 struct clock_data *d) {
2123 s = prioq_peek(d->earliest);
2126 s->enabled == SD_EVENT_OFF ||
2130 r = source_set_pending(s, true);
2134 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2135 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2136 d->needs_rearm = true;
2142 static int process_child(sd_event *e) {
2149 e->need_process_child = false;
2152 So, this is ugly. We iteratively invoke waitid() with P_PID
2153 + WNOHANG for each PID we wait for, instead of using
2154 P_ALL. This is because we only want to get child
2155 information of very specific child processes, and not all
2156 of them. We might not have processed the SIGCHLD even of a
2157 previous invocation and we don't want to maintain a
2158 unbounded *per-child* event queue, hence we really don't
2159 want anything flushed out of the kernel's queue that we
2160 don't care about. Since this is O(n) this means that if you
2161 have a lot of processes you probably want to handle SIGCHLD
2164 We do not reap the children here (by using WNOWAIT), this
2165 is only done after the event source is dispatched so that
2166 the callback still sees the process as a zombie.
2169 HASHMAP_FOREACH(s, e->child_sources, i) {
2170 assert(s->type == SOURCE_CHILD);
2175 if (s->enabled == SD_EVENT_OFF)
2178 zero(s->child.siginfo);
2179 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2180 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2184 if (s->child.siginfo.si_pid != 0) {
2185 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2187 if (!zombie && (s->child.options & WEXITED)) {
2188 /* If the child isn't dead then let's
2189 * immediately remove the state change
2190 * from the queue, since there's no
2191 * benefit in leaving it queued */
2193 assert(s->child.options & (WSTOPPED|WCONTINUED));
2194 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2197 r = source_set_pending(s, true);
2206 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2207 bool read_one = false;
2211 assert_return(events == EPOLLIN, -EIO);
2213 /* If there's a signal queued on this priority and SIGCHLD is
2214 on this priority too, then make sure to recheck the
2215 children we watch. This is because we only ever dequeue
2216 the first signal per priority, and if we dequeue one, and
2217 SIGCHLD might be enqueued later we wouldn't know, but we
2218 might have higher priority children we care about hence we
2219 need to check that explicitly. */
2221 if (sigismember(&d->sigset, SIGCHLD))
2222 e->need_process_child = true;
2224 /* If there's already an event source pending for this
2225 * priority we don't read another */
2230 struct signalfd_siginfo si;
2232 sd_event_source *s = NULL;
2234 n = read(d->fd, &si, sizeof(si));
2236 if (IN_SET(errno, EAGAIN, EINTR))
2242 if (_unlikely_(n != sizeof(si)))
2245 assert(SIGNAL_VALID(si.ssi_signo));
2249 if (e->signal_sources)
2250 s = e->signal_sources[si.ssi_signo];
2256 s->signal.siginfo = si;
2259 r = source_set_pending(s, true);
2267 static int source_dispatch(sd_event_source *s) {
2268 EventSourceType saved_type;
2272 assert(s->pending || s->type == SOURCE_EXIT);
2274 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2276 saved_type = s->type;
2278 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2279 r = source_set_pending(s, false);
2284 if (s->type != SOURCE_POST) {
2288 /* If we execute a non-post source, let's mark all
2289 * post sources as pending */
2291 SET_FOREACH(z, s->event->post_sources, i) {
2292 if (z->enabled == SD_EVENT_OFF)
2295 r = source_set_pending(z, true);
2301 if (s->enabled == SD_EVENT_ONESHOT) {
2302 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2307 s->dispatching = true;
2312 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2315 case SOURCE_TIME_REALTIME:
2316 case SOURCE_TIME_BOOTTIME:
2317 case SOURCE_TIME_MONOTONIC:
2318 case SOURCE_TIME_REALTIME_ALARM:
2319 case SOURCE_TIME_BOOTTIME_ALARM:
2320 r = s->time.callback(s, s->time.next, s->userdata);
2324 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2327 case SOURCE_CHILD: {
2330 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2332 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2334 /* Now, reap the PID for good. */
2336 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2342 r = s->defer.callback(s, s->userdata);
2346 r = s->post.callback(s, s->userdata);
2350 r = s->exit.callback(s, s->userdata);
2353 case SOURCE_WATCHDOG:
2354 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2355 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2356 assert_not_reached("Wut? I shouldn't exist.");
2359 s->dispatching = false;
2362 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2363 strna(s->description), event_source_type_to_string(saved_type));
2368 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2373 static int event_prepare(sd_event *e) {
2381 s = prioq_peek(e->prepare);
2382 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2385 s->prepare_iteration = e->iteration;
2386 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2392 s->dispatching = true;
2393 r = s->prepare(s, s->userdata);
2394 s->dispatching = false;
2397 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2398 strna(s->description), event_source_type_to_string(s->type));
2403 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2409 static int dispatch_exit(sd_event *e) {
2411 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2416 p = prioq_peek(e->exit);
2417 if (!p || p->enabled == SD_EVENT_OFF) {
2418 e->state = SD_EVENT_FINISHED;
2422 ref = sd_event_ref(e);
2424 e->state = SD_EVENT_EXITING;
2425 r = source_dispatch(p);
2426 e->state = SD_EVENT_INITIAL;
2430 static sd_event_source* event_next_pending(sd_event *e) {
2435 p = prioq_peek(e->pending);
2439 if (p->enabled == SD_EVENT_OFF)
2445 static int arm_watchdog(sd_event *e) {
2446 struct itimerspec its = {};
2451 assert(e->watchdog_fd >= 0);
2453 t = sleep_between(e,
2454 e->watchdog_last + (e->watchdog_period / 2),
2455 e->watchdog_last + (e->watchdog_period * 3 / 4));
2457 timespec_store(&its.it_value, t);
2459 /* Make sure we never set the watchdog to 0, which tells the
2460 * kernel to disable it. */
2461 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2462 its.it_value.tv_nsec = 1;
2464 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2471 static int process_watchdog(sd_event *e) {
2477 /* Don't notify watchdog too often */
2478 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2481 sd_notify(false, "WATCHDOG=1");
2482 e->watchdog_last = e->timestamp.monotonic;
2484 return arm_watchdog(e);
2487 _public_ int sd_event_prepare(sd_event *e) {
2490 assert_return(e, -EINVAL);
2491 assert_return(e = event_resolve(e), -ENOPKG);
2492 assert_return(!event_pid_changed(e), -ECHILD);
2493 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2494 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2496 if (e->exit_requested)
2501 e->state = SD_EVENT_PREPARING;
2502 r = event_prepare(e);
2503 e->state = SD_EVENT_INITIAL;
2507 r = event_arm_timer(e, &e->realtime);
2511 r = event_arm_timer(e, &e->boottime);
2515 r = event_arm_timer(e, &e->monotonic);
2519 r = event_arm_timer(e, &e->realtime_alarm);
2523 r = event_arm_timer(e, &e->boottime_alarm);
2527 if (event_next_pending(e) || e->need_process_child)
2530 e->state = SD_EVENT_ARMED;
2535 e->state = SD_EVENT_ARMED;
2536 r = sd_event_wait(e, 0);
2538 e->state = SD_EVENT_ARMED;
2543 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2544 struct epoll_event *ev_queue;
2545 unsigned ev_queue_max;
2548 assert_return(e, -EINVAL);
2549 assert_return(e = event_resolve(e), -ENOPKG);
2550 assert_return(!event_pid_changed(e), -ECHILD);
2551 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2552 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2554 if (e->exit_requested) {
2555 e->state = SD_EVENT_PENDING;
2559 ev_queue_max = MAX(e->n_sources, 1u);
2560 ev_queue = newa(struct epoll_event, ev_queue_max);
2562 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2563 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2565 if (errno == EINTR) {
2566 e->state = SD_EVENT_PENDING;
2574 triple_timestamp_get(&e->timestamp);
2576 for (i = 0; i < m; i++) {
2578 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2579 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2581 WakeupType *t = ev_queue[i].data.ptr;
2585 case WAKEUP_EVENT_SOURCE:
2586 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2589 case WAKEUP_CLOCK_DATA: {
2590 struct clock_data *d = ev_queue[i].data.ptr;
2591 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2595 case WAKEUP_SIGNAL_DATA:
2596 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2600 assert_not_reached("Invalid wake-up pointer");
2607 r = process_watchdog(e);
2611 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2615 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2619 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2623 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2627 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2631 if (e->need_process_child) {
2632 r = process_child(e);
2637 if (event_next_pending(e)) {
2638 e->state = SD_EVENT_PENDING;
2646 e->state = SD_EVENT_INITIAL;
2651 _public_ int sd_event_dispatch(sd_event *e) {
2655 assert_return(e, -EINVAL);
2656 assert_return(e = event_resolve(e), -ENOPKG);
2657 assert_return(!event_pid_changed(e), -ECHILD);
2658 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2659 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2661 if (e->exit_requested)
2662 return dispatch_exit(e);
2664 p = event_next_pending(e);
2666 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2668 ref = sd_event_ref(e);
2669 e->state = SD_EVENT_RUNNING;
2670 r = source_dispatch(p);
2671 e->state = SD_EVENT_INITIAL;
2675 e->state = SD_EVENT_INITIAL;
2680 static void event_log_delays(sd_event *e) {
2681 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2685 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2686 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2689 log_debug("Event loop iterations: %.*s", o, b);
2692 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2695 assert_return(e, -EINVAL);
2696 assert_return(e = event_resolve(e), -ENOPKG);
2697 assert_return(!event_pid_changed(e), -ECHILD);
2698 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2699 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2701 if (e->profile_delays && e->last_run) {
2705 this_run = now(CLOCK_MONOTONIC);
2707 l = u64log2(this_run - e->last_run);
2708 assert(l < sizeof(e->delays));
2711 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2712 event_log_delays(e);
2713 e->last_log = this_run;
2717 r = sd_event_prepare(e);
2719 /* There was nothing? Then wait... */
2720 r = sd_event_wait(e, timeout);
2722 if (e->profile_delays)
2723 e->last_run = now(CLOCK_MONOTONIC);
2726 /* There's something now, then let's dispatch it */
2727 r = sd_event_dispatch(e);
2737 _public_ int sd_event_loop(sd_event *e) {
2738 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2741 assert_return(e, -EINVAL);
2742 assert_return(e = event_resolve(e), -ENOPKG);
2743 assert_return(!event_pid_changed(e), -ECHILD);
2744 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2746 ref = sd_event_ref(e);
2748 while (e->state != SD_EVENT_FINISHED) {
2749 r = sd_event_run(e, (uint64_t) -1);
2754 return e->exit_code;
2757 _public_ int sd_event_get_fd(sd_event *e) {
2759 assert_return(e, -EINVAL);
2760 assert_return(e = event_resolve(e), -ENOPKG);
2761 assert_return(!event_pid_changed(e), -ECHILD);
2766 _public_ int sd_event_get_state(sd_event *e) {
2767 assert_return(e, -EINVAL);
2768 assert_return(e = event_resolve(e), -ENOPKG);
2769 assert_return(!event_pid_changed(e), -ECHILD);
2774 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2775 assert_return(e, -EINVAL);
2776 assert_return(e = event_resolve(e), -ENOPKG);
2777 assert_return(code, -EINVAL);
2778 assert_return(!event_pid_changed(e), -ECHILD);
2780 if (!e->exit_requested)
2783 *code = e->exit_code;
2787 _public_ int sd_event_exit(sd_event *e, int code) {
2788 assert_return(e, -EINVAL);
2789 assert_return(e = event_resolve(e), -ENOPKG);
2790 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2791 assert_return(!event_pid_changed(e), -ECHILD);
2793 e->exit_requested = true;
2794 e->exit_code = code;
2799 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2800 assert_return(e, -EINVAL);
2801 assert_return(e = event_resolve(e), -ENOPKG);
2802 assert_return(usec, -EINVAL);
2803 assert_return(!event_pid_changed(e), -ECHILD);
2805 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2808 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2809 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2810 * the purpose of getting the time this doesn't matter. */
2811 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2814 if (!triple_timestamp_is_set(&e->timestamp)) {
2815 /* Implicitly fall back to now() if we never ran
2816 * before and thus have no cached time. */
2821 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2825 _public_ int sd_event_default(sd_event **ret) {
2830 return !!default_event;
2832 if (default_event) {
2833 *ret = sd_event_ref(default_event);
2837 r = sd_event_new(&e);
2841 e->default_event_ptr = &default_event;
2849 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2850 assert_return(e, -EINVAL);
2851 assert_return(e = event_resolve(e), -ENOPKG);
2852 assert_return(tid, -EINVAL);
2853 assert_return(!event_pid_changed(e), -ECHILD);
2863 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2866 assert_return(e, -EINVAL);
2867 assert_return(e = event_resolve(e), -ENOPKG);
2868 assert_return(!event_pid_changed(e), -ECHILD);
2870 if (e->watchdog == !!b)
2874 struct epoll_event ev;
2876 r = sd_watchdog_enabled(false, &e->watchdog_period);
2880 /* Issue first ping immediately */
2881 sd_notify(false, "WATCHDOG=1");
2882 e->watchdog_last = now(CLOCK_MONOTONIC);
2884 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2885 if (e->watchdog_fd < 0)
2888 r = arm_watchdog(e);
2892 ev = (struct epoll_event) {
2894 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
2897 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2904 if (e->watchdog_fd >= 0) {
2905 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2906 e->watchdog_fd = safe_close(e->watchdog_fd);
2914 e->watchdog_fd = safe_close(e->watchdog_fd);
2918 _public_ int sd_event_get_watchdog(sd_event *e) {
2919 assert_return(e, -EINVAL);
2920 assert_return(e = event_resolve(e), -ENOPKG);
2921 assert_return(!event_pid_changed(e), -ECHILD);
2926 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2927 assert_return(e, -EINVAL);
2928 assert_return(e = event_resolve(e), -ENOPKG);
2929 assert_return(!event_pid_changed(e), -ECHILD);
2931 *ret = e->iteration;