1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
23 #include "process-util.h"
25 #include "signal-util.h"
26 #include "string-table.h"
27 #include "string-util.h"
28 #include "time-util.h"
31 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
33 typedef enum EventSourceType {
37 SOURCE_TIME_MONOTONIC,
38 SOURCE_TIME_REALTIME_ALARM,
39 SOURCE_TIME_BOOTTIME_ALARM,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType {
75 _WAKEUP_TYPE_INVALID = -1,
78 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
80 struct sd_event_source {
87 sd_event_handler_t prepare;
91 EventSourceType type:5;
98 unsigned pending_index;
99 unsigned prepare_index;
100 uint64_t pending_iteration;
101 uint64_t prepare_iteration;
103 LIST_FIELDS(sd_event_source, sources);
107 sd_event_io_handler_t callback;
115 sd_event_time_handler_t callback;
116 usec_t next, accuracy;
117 unsigned earliest_index;
118 unsigned latest_index;
121 sd_event_signal_handler_t callback;
122 struct signalfd_siginfo siginfo;
126 sd_event_child_handler_t callback;
132 sd_event_handler_t callback;
135 sd_event_handler_t callback;
138 sd_event_handler_t callback;
139 unsigned prioq_index;
148 /* For all clocks we maintain two priority queues each, one
149 * ordered for the earliest times the events may be
150 * dispatched, and one ordered by the latest times they must
151 * have been dispatched. The range between the top entries in
152 * the two prioqs is the time window we can freely schedule
165 /* For each priority we maintain one signal fd, so that we
166 * only have to dequeue a single event per priority at a
172 sd_event_source *current;
184 /* timerfd_create() only supports these five clocks so far. We
185 * can add support for more clocks when the kernel learns to
186 * deal with them, too. */
187 struct clock_data realtime;
188 struct clock_data boottime;
189 struct clock_data monotonic;
190 struct clock_data realtime_alarm;
191 struct clock_data boottime_alarm;
195 sd_event_source **signal_sources; /* indexed by signal number */
196 Hashmap *signal_data; /* indexed by priority */
198 Hashmap *child_sources;
199 unsigned n_enabled_child_sources;
208 triple_timestamp timestamp;
211 bool exit_requested:1;
212 bool need_process_child:1;
214 bool profile_delays:1;
219 sd_event **default_event_ptr;
221 usec_t watchdog_last, watchdog_period;
225 LIST_HEAD(sd_event_source, sources);
227 usec_t last_run, last_log;
228 unsigned delays[sizeof(usec_t) * 8];
231 static thread_local sd_event *default_event = NULL;
233 static void source_disconnect(sd_event_source *s);
235 static sd_event *event_resolve(sd_event *e) {
236 return e == SD_EVENT_DEFAULT ? default_event : e;
239 static int pending_prioq_compare(const void *a, const void *b) {
240 const sd_event_source *x = a, *y = b;
245 /* Enabled ones first */
246 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
248 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
251 /* Lower priority values first */
252 if (x->priority < y->priority)
254 if (x->priority > y->priority)
257 /* Older entries first */
258 if (x->pending_iteration < y->pending_iteration)
260 if (x->pending_iteration > y->pending_iteration)
266 static int prepare_prioq_compare(const void *a, const void *b) {
267 const sd_event_source *x = a, *y = b;
272 /* Enabled ones first */
273 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
275 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
278 /* Move most recently prepared ones last, so that we can stop
279 * preparing as soon as we hit one that has already been
280 * prepared in the current iteration */
281 if (x->prepare_iteration < y->prepare_iteration)
283 if (x->prepare_iteration > y->prepare_iteration)
286 /* Lower priority values first */
287 if (x->priority < y->priority)
289 if (x->priority > y->priority)
295 static int earliest_time_prioq_compare(const void *a, const void *b) {
296 const sd_event_source *x = a, *y = b;
298 assert(EVENT_SOURCE_IS_TIME(x->type));
299 assert(x->type == y->type);
301 /* Enabled ones first */
302 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307 /* Move the pending ones to the end */
308 if (!x->pending && y->pending)
310 if (x->pending && !y->pending)
314 if (x->time.next < y->time.next)
316 if (x->time.next > y->time.next)
322 static usec_t time_event_source_latest(const sd_event_source *s) {
323 return usec_add(s->time.next, s->time.accuracy);
326 static int latest_time_prioq_compare(const void *a, const void *b) {
327 const sd_event_source *x = a, *y = b;
329 assert(EVENT_SOURCE_IS_TIME(x->type));
330 assert(x->type == y->type);
332 /* Enabled ones first */
333 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
335 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338 /* Move the pending ones to the end */
339 if (!x->pending && y->pending)
341 if (x->pending && !y->pending)
345 if (time_event_source_latest(x) < time_event_source_latest(y))
347 if (time_event_source_latest(x) > time_event_source_latest(y))
353 static int exit_prioq_compare(const void *a, const void *b) {
354 const sd_event_source *x = a, *y = b;
356 assert(x->type == SOURCE_EXIT);
357 assert(y->type == SOURCE_EXIT);
359 /* Enabled ones first */
360 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
362 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
365 /* Lower priority values first */
366 if (x->priority < y->priority)
368 if (x->priority > y->priority)
374 static void free_clock_data(struct clock_data *d) {
376 assert(d->wakeup == WAKEUP_CLOCK_DATA);
379 prioq_free(d->earliest);
380 prioq_free(d->latest);
383 static void event_free(sd_event *e) {
388 while ((s = e->sources)) {
390 source_disconnect(s);
391 sd_event_source_unref(s);
394 assert(e->n_sources == 0);
396 if (e->default_event_ptr)
397 *(e->default_event_ptr) = NULL;
399 safe_close(e->epoll_fd);
400 safe_close(e->watchdog_fd);
402 free_clock_data(&e->realtime);
403 free_clock_data(&e->boottime);
404 free_clock_data(&e->monotonic);
405 free_clock_data(&e->realtime_alarm);
406 free_clock_data(&e->boottime_alarm);
408 prioq_free(e->pending);
409 prioq_free(e->prepare);
412 free(e->signal_sources);
413 hashmap_free(e->signal_data);
415 hashmap_free(e->child_sources);
416 set_free(e->post_sources);
420 _public_ int sd_event_new(sd_event** ret) {
424 assert_return(ret, -EINVAL);
426 e = new0(sd_event, 1);
431 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
432 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
433 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
434 e->original_pid = getpid_cached();
435 e->perturb = USEC_INFINITY;
437 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
441 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
442 if (e->epoll_fd < 0) {
447 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
449 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
450 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
451 e->profile_delays = true;
462 _public_ sd_event* sd_event_ref(sd_event *e) {
467 assert(e->n_ref >= 1);
473 _public_ sd_event* sd_event_unref(sd_event *e) {
478 assert(e->n_ref >= 1);
487 static bool event_pid_changed(sd_event *e) {
490 /* We don't support people creating an event loop and keeping
491 * it around over a fork(). Let's complain. */
493 return e->original_pid != getpid_cached();
496 static void source_io_unregister(sd_event_source *s) {
500 assert(s->type == SOURCE_IO);
502 if (event_pid_changed(s->event))
505 if (!s->io.registered)
508 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
510 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
511 strna(s->description), event_source_type_to_string(s->type));
513 s->io.registered = false;
516 static int source_io_register(
521 struct epoll_event ev;
525 assert(s->type == SOURCE_IO);
526 assert(enabled != SD_EVENT_OFF);
528 ev = (struct epoll_event) {
529 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
533 if (s->io.registered)
534 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
536 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
540 s->io.registered = true;
545 static clockid_t event_source_type_to_clock(EventSourceType t) {
549 case SOURCE_TIME_REALTIME:
550 return CLOCK_REALTIME;
552 case SOURCE_TIME_BOOTTIME:
553 return CLOCK_BOOTTIME;
555 case SOURCE_TIME_MONOTONIC:
556 return CLOCK_MONOTONIC;
558 case SOURCE_TIME_REALTIME_ALARM:
559 return CLOCK_REALTIME_ALARM;
561 case SOURCE_TIME_BOOTTIME_ALARM:
562 return CLOCK_BOOTTIME_ALARM;
565 return (clockid_t) -1;
569 static EventSourceType clock_to_event_source_type(clockid_t clock) {
574 return SOURCE_TIME_REALTIME;
577 return SOURCE_TIME_BOOTTIME;
579 case CLOCK_MONOTONIC:
580 return SOURCE_TIME_MONOTONIC;
582 case CLOCK_REALTIME_ALARM:
583 return SOURCE_TIME_REALTIME_ALARM;
585 case CLOCK_BOOTTIME_ALARM:
586 return SOURCE_TIME_BOOTTIME_ALARM;
589 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
593 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
598 case SOURCE_TIME_REALTIME:
601 case SOURCE_TIME_BOOTTIME:
604 case SOURCE_TIME_MONOTONIC:
605 return &e->monotonic;
607 case SOURCE_TIME_REALTIME_ALARM:
608 return &e->realtime_alarm;
610 case SOURCE_TIME_BOOTTIME_ALARM:
611 return &e->boottime_alarm;
618 static int event_make_signal_data(
621 struct signal_data **ret) {
623 struct epoll_event ev;
624 struct signal_data *d;
632 if (event_pid_changed(e))
635 if (e->signal_sources && e->signal_sources[sig])
636 priority = e->signal_sources[sig]->priority;
638 priority = SD_EVENT_PRIORITY_NORMAL;
640 d = hashmap_get(e->signal_data, &priority);
642 if (sigismember(&d->sigset, sig) > 0) {
648 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
652 d = new0(struct signal_data, 1);
656 d->wakeup = WAKEUP_SIGNAL_DATA;
658 d->priority = priority;
660 r = hashmap_put(e->signal_data, &d->priority, d);
670 assert_se(sigaddset(&ss_copy, sig) >= 0);
672 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
686 d->fd = fd_move_above_stdio(r);
688 ev = (struct epoll_event) {
693 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
706 d->fd = safe_close(d->fd);
707 hashmap_remove(e->signal_data, &d->priority);
714 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718 /* Turns off the specified signal in the signal data
719 * object. If the signal mask of the object becomes empty that
722 if (sigismember(&d->sigset, sig) == 0)
725 assert_se(sigdelset(&d->sigset, sig) >= 0);
727 if (sigisemptyset(&d->sigset)) {
729 /* If all the mask is all-zero we can get rid of the structure */
730 hashmap_remove(e->signal_data, &d->priority);
738 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
739 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
742 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
743 struct signal_data *d;
744 static const int64_t zero_priority = 0;
748 /* Rechecks if the specified signal is still something we are
749 * interested in. If not, we'll unmask it, and possibly drop
750 * the signalfd for it. */
752 if (sig == SIGCHLD &&
753 e->n_enabled_child_sources > 0)
756 if (e->signal_sources &&
757 e->signal_sources[sig] &&
758 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
762 * The specified signal might be enabled in three different queues:
764 * 1) the one that belongs to the priority passed (if it is non-NULL)
765 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
766 * 3) the 0 priority (to cover the SIGCHLD case)
768 * Hence, let's remove it from all three here.
772 d = hashmap_get(e->signal_data, priority);
774 event_unmask_signal_data(e, d, sig);
777 if (e->signal_sources && e->signal_sources[sig]) {
778 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
780 event_unmask_signal_data(e, d, sig);
783 d = hashmap_get(e->signal_data, &zero_priority);
785 event_unmask_signal_data(e, d, sig);
788 static void source_disconnect(sd_event_source *s) {
796 assert(s->event->n_sources > 0);
802 source_io_unregister(s);
806 case SOURCE_TIME_REALTIME:
807 case SOURCE_TIME_BOOTTIME:
808 case SOURCE_TIME_MONOTONIC:
809 case SOURCE_TIME_REALTIME_ALARM:
810 case SOURCE_TIME_BOOTTIME_ALARM: {
811 struct clock_data *d;
813 d = event_get_clock_data(s->event, s->type);
816 prioq_remove(d->earliest, s, &s->time.earliest_index);
817 prioq_remove(d->latest, s, &s->time.latest_index);
818 d->needs_rearm = true;
823 if (s->signal.sig > 0) {
825 if (s->event->signal_sources)
826 s->event->signal_sources[s->signal.sig] = NULL;
828 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
834 if (s->child.pid > 0) {
835 if (s->enabled != SD_EVENT_OFF) {
836 assert(s->event->n_enabled_child_sources > 0);
837 s->event->n_enabled_child_sources--;
840 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
841 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
851 set_remove(s->event->post_sources, s);
855 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
859 assert_not_reached("Wut? I shouldn't exist.");
863 prioq_remove(s->event->pending, s, &s->pending_index);
866 prioq_remove(s->event->prepare, s, &s->prepare_index);
870 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
872 LIST_REMOVE(sources, event->sources, s);
876 sd_event_unref(event);
879 static void source_free(sd_event_source *s) {
882 source_disconnect(s);
884 if (s->type == SOURCE_IO && s->io.owned)
885 safe_close(s->io.fd);
887 free(s->description);
891 static int source_set_pending(sd_event_source *s, bool b) {
895 assert(s->type != SOURCE_EXIT);
903 s->pending_iteration = s->event->iteration;
905 r = prioq_put(s->event->pending, s, &s->pending_index);
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
916 d = event_get_clock_data(s->event, s->type);
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
940 s = new0(sd_event_source, 1);
946 s->floating = floating;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
953 LIST_PREPEND(sources, e->sources, s);
959 _public_ int sd_event_add_io(
961 sd_event_source **ret,
964 sd_event_io_handler_t callback,
970 assert_return(e, -EINVAL);
971 assert_return(e = event_resolve(e), -ENOPKG);
972 assert_return(fd >= 0, -EBADF);
973 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
974 assert_return(callback, -EINVAL);
975 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
976 assert_return(!event_pid_changed(e), -ECHILD);
978 s = source_new(e, !ret, SOURCE_IO);
982 s->wakeup = WAKEUP_EVENT_SOURCE;
984 s->io.events = events;
985 s->io.callback = callback;
986 s->userdata = userdata;
987 s->enabled = SD_EVENT_ON;
989 r = source_io_register(s, s->enabled, events);
1001 static void initialize_perturb(sd_event *e) {
1002 sd_id128_t bootid = {};
1004 /* When we sleep for longer, we try to realign the wakeup to
1005 the same time wihtin each minute/second/250ms, so that
1006 events all across the system can be coalesced into a single
1007 CPU wakeup. However, let's take some system-specific
1008 randomness for this value, so that in a network of systems
1009 with synced clocks timer events are distributed a
1010 bit. Here, we calculate a perturbation usec offset from the
1013 if (_likely_(e->perturb != USEC_INFINITY))
1016 if (sd_id128_get_boot(&bootid) >= 0)
1017 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1020 static int event_setup_timer_fd(
1022 struct clock_data *d,
1025 struct epoll_event ev;
1031 if (_likely_(d->fd >= 0))
1034 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1038 fd = fd_move_above_stdio(fd);
1040 ev = (struct epoll_event) {
1045 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1055 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1058 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1061 _public_ int sd_event_add_time(
1063 sd_event_source **ret,
1067 sd_event_time_handler_t callback,
1070 EventSourceType type;
1072 struct clock_data *d;
1075 assert_return(e, -EINVAL);
1076 assert_return(e = event_resolve(e), -ENOPKG);
1077 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1078 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1079 assert_return(!event_pid_changed(e), -ECHILD);
1081 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1084 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1089 callback = time_exit_callback;
1091 d = event_get_clock_data(e, type);
1094 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1098 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1103 r = event_setup_timer_fd(e, d, clock);
1108 s = source_new(e, !ret, type);
1112 s->time.next = usec;
1113 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1114 s->time.callback = callback;
1115 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1116 s->userdata = userdata;
1117 s->enabled = SD_EVENT_ONESHOT;
1119 d->needs_rearm = true;
1121 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1125 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1142 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1145 _public_ int sd_event_add_signal(
1147 sd_event_source **ret,
1149 sd_event_signal_handler_t callback,
1153 struct signal_data *d;
1157 assert_return(e, -EINVAL);
1158 assert_return(e = event_resolve(e), -ENOPKG);
1159 assert_return(SIGNAL_VALID(sig), -EINVAL);
1160 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1161 assert_return(!event_pid_changed(e), -ECHILD);
1164 callback = signal_exit_callback;
1166 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1170 if (!sigismember(&ss, sig))
1173 if (!e->signal_sources) {
1174 e->signal_sources = new0(sd_event_source*, _NSIG);
1175 if (!e->signal_sources)
1177 } else if (e->signal_sources[sig])
1180 s = source_new(e, !ret, SOURCE_SIGNAL);
1184 s->signal.sig = sig;
1185 s->signal.callback = callback;
1186 s->userdata = userdata;
1187 s->enabled = SD_EVENT_ON;
1189 e->signal_sources[sig] = s;
1191 r = event_make_signal_data(e, sig, &d);
1197 /* Use the signal name as description for the event source by default */
1198 (void) sd_event_source_set_description(s, signal_to_string(sig));
1206 _public_ int sd_event_add_child(
1208 sd_event_source **ret,
1211 sd_event_child_handler_t callback,
1217 assert_return(e, -EINVAL);
1218 assert_return(e = event_resolve(e), -ENOPKG);
1219 assert_return(pid > 1, -EINVAL);
1220 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1221 assert_return(options != 0, -EINVAL);
1222 assert_return(callback, -EINVAL);
1223 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1224 assert_return(!event_pid_changed(e), -ECHILD);
1226 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1230 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1233 s = source_new(e, !ret, SOURCE_CHILD);
1238 s->child.options = options;
1239 s->child.callback = callback;
1240 s->userdata = userdata;
1241 s->enabled = SD_EVENT_ONESHOT;
1243 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1249 e->n_enabled_child_sources++;
1251 r = event_make_signal_data(e, SIGCHLD, NULL);
1253 e->n_enabled_child_sources--;
1258 e->need_process_child = true;
1266 _public_ int sd_event_add_defer(
1268 sd_event_source **ret,
1269 sd_event_handler_t callback,
1275 assert_return(e, -EINVAL);
1276 assert_return(e = event_resolve(e), -ENOPKG);
1277 assert_return(callback, -EINVAL);
1278 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1279 assert_return(!event_pid_changed(e), -ECHILD);
1281 s = source_new(e, !ret, SOURCE_DEFER);
1285 s->defer.callback = callback;
1286 s->userdata = userdata;
1287 s->enabled = SD_EVENT_ONESHOT;
1289 r = source_set_pending(s, true);
1301 _public_ int sd_event_add_post(
1303 sd_event_source **ret,
1304 sd_event_handler_t callback,
1310 assert_return(e, -EINVAL);
1311 assert_return(e = event_resolve(e), -ENOPKG);
1312 assert_return(callback, -EINVAL);
1313 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1314 assert_return(!event_pid_changed(e), -ECHILD);
1316 r = set_ensure_allocated(&e->post_sources, NULL);
1320 s = source_new(e, !ret, SOURCE_POST);
1324 s->post.callback = callback;
1325 s->userdata = userdata;
1326 s->enabled = SD_EVENT_ON;
1328 r = set_put(e->post_sources, s);
1340 _public_ int sd_event_add_exit(
1342 sd_event_source **ret,
1343 sd_event_handler_t callback,
1349 assert_return(e, -EINVAL);
1350 assert_return(e = event_resolve(e), -ENOPKG);
1351 assert_return(callback, -EINVAL);
1352 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1353 assert_return(!event_pid_changed(e), -ECHILD);
1355 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1359 s = source_new(e, !ret, SOURCE_EXIT);
1363 s->exit.callback = callback;
1364 s->userdata = userdata;
1365 s->exit.prioq_index = PRIOQ_IDX_NULL;
1366 s->enabled = SD_EVENT_ONESHOT;
1368 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1380 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1385 assert(s->n_ref >= 1);
1391 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1396 assert(s->n_ref >= 1);
1399 if (s->n_ref <= 0) {
1400 /* Here's a special hack: when we are called from a
1401 * dispatch handler we won't free the event source
1402 * immediately, but we will detach the fd from the
1403 * epoll. This way it is safe for the caller to unref
1404 * the event source and immediately close the fd, but
1405 * we still retain a valid event source object after
1408 if (s->dispatching) {
1409 if (s->type == SOURCE_IO)
1410 source_io_unregister(s);
1412 source_disconnect(s);
1420 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1421 assert_return(s, -EINVAL);
1422 assert_return(!event_pid_changed(s->event), -ECHILD);
1424 return free_and_strdup(&s->description, description);
1427 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1428 assert_return(s, -EINVAL);
1429 assert_return(description, -EINVAL);
1430 assert_return(s->description, -ENXIO);
1431 assert_return(!event_pid_changed(s->event), -ECHILD);
1433 *description = s->description;
1437 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1438 assert_return(s, NULL);
1443 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1444 assert_return(s, -EINVAL);
1445 assert_return(s->type != SOURCE_EXIT, -EDOM);
1446 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1447 assert_return(!event_pid_changed(s->event), -ECHILD);
1452 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1453 assert_return(s, -EINVAL);
1454 assert_return(s->type == SOURCE_IO, -EDOM);
1455 assert_return(!event_pid_changed(s->event), -ECHILD);
1460 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1463 assert_return(s, -EINVAL);
1464 assert_return(fd >= 0, -EBADF);
1465 assert_return(s->type == SOURCE_IO, -EDOM);
1466 assert_return(!event_pid_changed(s->event), -ECHILD);
1471 if (s->enabled == SD_EVENT_OFF) {
1473 s->io.registered = false;
1477 saved_fd = s->io.fd;
1478 assert(s->io.registered);
1481 s->io.registered = false;
1483 r = source_io_register(s, s->enabled, s->io.events);
1485 s->io.fd = saved_fd;
1486 s->io.registered = true;
1490 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1496 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1497 assert_return(s, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1503 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1504 assert_return(s, -EINVAL);
1505 assert_return(s->type == SOURCE_IO, -EDOM);
1511 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1512 assert_return(s, -EINVAL);
1513 assert_return(events, -EINVAL);
1514 assert_return(s->type == SOURCE_IO, -EDOM);
1515 assert_return(!event_pid_changed(s->event), -ECHILD);
1517 *events = s->io.events;
1521 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1524 assert_return(s, -EINVAL);
1525 assert_return(s->type == SOURCE_IO, -EDOM);
1526 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1527 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1528 assert_return(!event_pid_changed(s->event), -ECHILD);
1530 /* edge-triggered updates are never skipped, so we can reset edges */
1531 if (s->io.events == events && !(events & EPOLLET))
1534 r = source_set_pending(s, false);
1538 if (s->enabled != SD_EVENT_OFF) {
1539 r = source_io_register(s, s->enabled, events);
1544 s->io.events = events;
1549 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1550 assert_return(s, -EINVAL);
1551 assert_return(revents, -EINVAL);
1552 assert_return(s->type == SOURCE_IO, -EDOM);
1553 assert_return(s->pending, -ENODATA);
1554 assert_return(!event_pid_changed(s->event), -ECHILD);
1556 *revents = s->io.revents;
1560 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1561 assert_return(s, -EINVAL);
1562 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1563 assert_return(!event_pid_changed(s->event), -ECHILD);
1565 return s->signal.sig;
1568 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1569 assert_return(s, -EINVAL);
1570 assert_return(!event_pid_changed(s->event), -ECHILD);
1572 *priority = s->priority;
1576 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1579 assert_return(s, -EINVAL);
1580 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1581 assert_return(!event_pid_changed(s->event), -ECHILD);
1583 if (s->priority == priority)
1586 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1587 struct signal_data *old, *d;
1589 /* Move us from the signalfd belonging to the old
1590 * priority to the signalfd of the new priority */
1592 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1594 s->priority = priority;
1596 r = event_make_signal_data(s->event, s->signal.sig, &d);
1598 s->priority = old->priority;
1602 event_unmask_signal_data(s->event, old, s->signal.sig);
1604 s->priority = priority;
1607 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1610 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1612 if (s->type == SOURCE_EXIT)
1613 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1618 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1619 assert_return(s, -EINVAL);
1620 assert_return(m, -EINVAL);
1621 assert_return(!event_pid_changed(s->event), -ECHILD);
1627 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1630 assert_return(s, -EINVAL);
1631 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1632 assert_return(!event_pid_changed(s->event), -ECHILD);
1634 /* If we are dead anyway, we are fine with turning off
1635 * sources, but everything else needs to fail. */
1636 if (s->event->state == SD_EVENT_FINISHED)
1637 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1639 if (s->enabled == m)
1642 if (m == SD_EVENT_OFF) {
1644 /* Unset the pending flag when this event source is disabled */
1645 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1646 r = source_set_pending(s, false);
1654 source_io_unregister(s);
1658 case SOURCE_TIME_REALTIME:
1659 case SOURCE_TIME_BOOTTIME:
1660 case SOURCE_TIME_MONOTONIC:
1661 case SOURCE_TIME_REALTIME_ALARM:
1662 case SOURCE_TIME_BOOTTIME_ALARM: {
1663 struct clock_data *d;
1666 d = event_get_clock_data(s->event, s->type);
1669 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1670 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1671 d->needs_rearm = true;
1678 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1684 assert(s->event->n_enabled_child_sources > 0);
1685 s->event->n_enabled_child_sources--;
1687 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1692 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1701 assert_not_reached("Wut? I shouldn't exist.");
1706 /* Unset the pending flag when this event source is enabled */
1707 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1708 r = source_set_pending(s, false);
1716 r = source_io_register(s, m, s->io.events);
1723 case SOURCE_TIME_REALTIME:
1724 case SOURCE_TIME_BOOTTIME:
1725 case SOURCE_TIME_MONOTONIC:
1726 case SOURCE_TIME_REALTIME_ALARM:
1727 case SOURCE_TIME_BOOTTIME_ALARM: {
1728 struct clock_data *d;
1731 d = event_get_clock_data(s->event, s->type);
1734 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1735 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1736 d->needs_rearm = true;
1744 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1746 s->enabled = SD_EVENT_OFF;
1747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1755 if (s->enabled == SD_EVENT_OFF)
1756 s->event->n_enabled_child_sources++;
1760 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1762 s->enabled = SD_EVENT_OFF;
1763 s->event->n_enabled_child_sources--;
1764 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1772 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1781 assert_not_reached("Wut? I shouldn't exist.");
1786 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1789 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1794 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1795 assert_return(s, -EINVAL);
1796 assert_return(usec, -EINVAL);
1797 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1798 assert_return(!event_pid_changed(s->event), -ECHILD);
1800 *usec = s->time.next;
1804 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1805 struct clock_data *d;
1808 assert_return(s, -EINVAL);
1809 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1810 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1811 assert_return(!event_pid_changed(s->event), -ECHILD);
1813 r = source_set_pending(s, false);
1817 s->time.next = usec;
1819 d = event_get_clock_data(s->event, s->type);
1822 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1823 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1824 d->needs_rearm = true;
1829 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1830 assert_return(s, -EINVAL);
1831 assert_return(usec, -EINVAL);
1832 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1833 assert_return(!event_pid_changed(s->event), -ECHILD);
1835 *usec = s->time.accuracy;
1839 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1840 struct clock_data *d;
1843 assert_return(s, -EINVAL);
1844 assert_return(usec != (uint64_t) -1, -EINVAL);
1845 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1846 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1847 assert_return(!event_pid_changed(s->event), -ECHILD);
1849 r = source_set_pending(s, false);
1854 usec = DEFAULT_ACCURACY_USEC;
1856 s->time.accuracy = usec;
1858 d = event_get_clock_data(s->event, s->type);
1861 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1862 d->needs_rearm = true;
1867 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1868 assert_return(s, -EINVAL);
1869 assert_return(clock, -EINVAL);
1870 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1871 assert_return(!event_pid_changed(s->event), -ECHILD);
1873 *clock = event_source_type_to_clock(s->type);
1877 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1878 assert_return(s, -EINVAL);
1879 assert_return(pid, -EINVAL);
1880 assert_return(s->type == SOURCE_CHILD, -EDOM);
1881 assert_return(!event_pid_changed(s->event), -ECHILD);
1883 *pid = s->child.pid;
1887 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1890 assert_return(s, -EINVAL);
1891 assert_return(s->type != SOURCE_EXIT, -EDOM);
1892 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1893 assert_return(!event_pid_changed(s->event), -ECHILD);
1895 if (s->prepare == callback)
1898 if (callback && s->prepare) {
1899 s->prepare = callback;
1903 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1907 s->prepare = callback;
1910 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1914 prioq_remove(s->event->prepare, s, &s->prepare_index);
1919 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1920 assert_return(s, NULL);
1925 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1928 assert_return(s, NULL);
1931 s->userdata = userdata;
1936 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1943 if (a >= USEC_INFINITY)
1944 return USEC_INFINITY;
1949 initialize_perturb(e);
1952 Find a good time to wake up again between times a and b. We
1953 have two goals here:
1955 a) We want to wake up as seldom as possible, hence prefer
1956 later times over earlier times.
1958 b) But if we have to wake up, then let's make sure to
1959 dispatch as much as possible on the entire system.
1961 We implement this by waking up everywhere at the same time
1962 within any given minute if we can, synchronised via the
1963 perturbation value determined from the boot ID. If we can't,
1964 then we try to find the same spot in every 10s, then 1s and
1965 then 250ms step. Otherwise, we pick the last possible time
1969 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1971 if (_unlikely_(c < USEC_PER_MINUTE))
1974 c -= USEC_PER_MINUTE;
1980 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1982 if (_unlikely_(c < USEC_PER_SEC*10))
1985 c -= USEC_PER_SEC*10;
1991 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1993 if (_unlikely_(c < USEC_PER_SEC))
2002 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2004 if (_unlikely_(c < USEC_PER_MSEC*250))
2007 c -= USEC_PER_MSEC*250;
2016 static int event_arm_timer(
2018 struct clock_data *d) {
2020 struct itimerspec its = {};
2021 sd_event_source *a, *b;
2028 if (!d->needs_rearm)
2031 d->needs_rearm = false;
2033 a = prioq_peek(d->earliest);
2034 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2039 if (d->next == USEC_INFINITY)
2043 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2047 d->next = USEC_INFINITY;
2051 b = prioq_peek(d->latest);
2052 assert_se(b && b->enabled != SD_EVENT_OFF);
2054 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2058 assert_se(d->fd >= 0);
2061 /* We don' want to disarm here, just mean some time looooong ago. */
2062 its.it_value.tv_sec = 0;
2063 its.it_value.tv_nsec = 1;
2065 timespec_store(&its.it_value, t);
2067 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2075 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2078 assert(s->type == SOURCE_IO);
2080 /* If the event source was already pending, we just OR in the
2081 * new revents, otherwise we reset the value. The ORing is
2082 * necessary to handle EPOLLONESHOT events properly where
2083 * readability might happen independently of writability, and
2084 * we need to keep track of both */
2087 s->io.revents |= revents;
2089 s->io.revents = revents;
2091 return source_set_pending(s, true);
2094 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2101 assert_return(events == EPOLLIN, -EIO);
2103 ss = read(fd, &x, sizeof(x));
2105 if (IN_SET(errno, EAGAIN, EINTR))
2111 if (_unlikely_(ss != sizeof(x)))
2115 *next = USEC_INFINITY;
2120 static int process_timer(
2123 struct clock_data *d) {
2132 s = prioq_peek(d->earliest);
2135 s->enabled == SD_EVENT_OFF ||
2139 r = source_set_pending(s, true);
2143 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2144 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2145 d->needs_rearm = true;
2151 static int process_child(sd_event *e) {
2158 e->need_process_child = false;
2161 So, this is ugly. We iteratively invoke waitid() with P_PID
2162 + WNOHANG for each PID we wait for, instead of using
2163 P_ALL. This is because we only want to get child
2164 information of very specific child processes, and not all
2165 of them. We might not have processed the SIGCHLD even of a
2166 previous invocation and we don't want to maintain a
2167 unbounded *per-child* event queue, hence we really don't
2168 want anything flushed out of the kernel's queue that we
2169 don't care about. Since this is O(n) this means that if you
2170 have a lot of processes you probably want to handle SIGCHLD
2173 We do not reap the children here (by using WNOWAIT), this
2174 is only done after the event source is dispatched so that
2175 the callback still sees the process as a zombie.
2178 HASHMAP_FOREACH(s, e->child_sources, i) {
2179 assert(s->type == SOURCE_CHILD);
2184 if (s->enabled == SD_EVENT_OFF)
2187 zero(s->child.siginfo);
2188 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2189 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2193 if (s->child.siginfo.si_pid != 0) {
2194 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2196 if (!zombie && (s->child.options & WEXITED)) {
2197 /* If the child isn't dead then let's
2198 * immediately remove the state change
2199 * from the queue, since there's no
2200 * benefit in leaving it queued */
2202 assert(s->child.options & (WSTOPPED|WCONTINUED));
2203 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2206 r = source_set_pending(s, true);
2215 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2216 bool read_one = false;
2220 assert_return(events == EPOLLIN, -EIO);
2222 /* If there's a signal queued on this priority and SIGCHLD is
2223 on this priority too, then make sure to recheck the
2224 children we watch. This is because we only ever dequeue
2225 the first signal per priority, and if we dequeue one, and
2226 SIGCHLD might be enqueued later we wouldn't know, but we
2227 might have higher priority children we care about hence we
2228 need to check that explicitly. */
2230 if (sigismember(&d->sigset, SIGCHLD))
2231 e->need_process_child = true;
2233 /* If there's already an event source pending for this
2234 * priority we don't read another */
2239 struct signalfd_siginfo si;
2241 sd_event_source *s = NULL;
2243 n = read(d->fd, &si, sizeof(si));
2245 if (IN_SET(errno, EAGAIN, EINTR))
2251 if (_unlikely_(n != sizeof(si)))
2254 assert(SIGNAL_VALID(si.ssi_signo));
2258 if (e->signal_sources)
2259 s = e->signal_sources[si.ssi_signo];
2265 s->signal.siginfo = si;
2268 r = source_set_pending(s, true);
2276 static int source_dispatch(sd_event_source *s) {
2277 EventSourceType saved_type;
2281 assert(s->pending || s->type == SOURCE_EXIT);
2283 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2285 saved_type = s->type;
2287 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2288 r = source_set_pending(s, false);
2293 if (s->type != SOURCE_POST) {
2297 /* If we execute a non-post source, let's mark all
2298 * post sources as pending */
2300 SET_FOREACH(z, s->event->post_sources, i) {
2301 if (z->enabled == SD_EVENT_OFF)
2304 r = source_set_pending(z, true);
2310 if (s->enabled == SD_EVENT_ONESHOT) {
2311 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2316 s->dispatching = true;
2321 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2324 case SOURCE_TIME_REALTIME:
2325 case SOURCE_TIME_BOOTTIME:
2326 case SOURCE_TIME_MONOTONIC:
2327 case SOURCE_TIME_REALTIME_ALARM:
2328 case SOURCE_TIME_BOOTTIME_ALARM:
2329 r = s->time.callback(s, s->time.next, s->userdata);
2333 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2336 case SOURCE_CHILD: {
2339 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2341 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2343 /* Now, reap the PID for good. */
2345 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2351 r = s->defer.callback(s, s->userdata);
2355 r = s->post.callback(s, s->userdata);
2359 r = s->exit.callback(s, s->userdata);
2362 case SOURCE_WATCHDOG:
2363 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2364 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2365 assert_not_reached("Wut? I shouldn't exist.");
2368 s->dispatching = false;
2371 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2372 strna(s->description), event_source_type_to_string(saved_type));
2377 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2382 static int event_prepare(sd_event *e) {
2390 s = prioq_peek(e->prepare);
2391 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2394 s->prepare_iteration = e->iteration;
2395 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2401 s->dispatching = true;
2402 r = s->prepare(s, s->userdata);
2403 s->dispatching = false;
2406 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2407 strna(s->description), event_source_type_to_string(s->type));
2412 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2418 static int dispatch_exit(sd_event *e) {
2420 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2425 p = prioq_peek(e->exit);
2426 if (!p || p->enabled == SD_EVENT_OFF) {
2427 e->state = SD_EVENT_FINISHED;
2431 ref = sd_event_ref(e);
2433 e->state = SD_EVENT_EXITING;
2434 r = source_dispatch(p);
2435 e->state = SD_EVENT_INITIAL;
2439 static sd_event_source* event_next_pending(sd_event *e) {
2444 p = prioq_peek(e->pending);
2448 if (p->enabled == SD_EVENT_OFF)
2454 static int arm_watchdog(sd_event *e) {
2455 struct itimerspec its = {};
2460 assert(e->watchdog_fd >= 0);
2462 t = sleep_between(e,
2463 e->watchdog_last + (e->watchdog_period / 2),
2464 e->watchdog_last + (e->watchdog_period * 3 / 4));
2466 timespec_store(&its.it_value, t);
2468 /* Make sure we never set the watchdog to 0, which tells the
2469 * kernel to disable it. */
2470 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2471 its.it_value.tv_nsec = 1;
2473 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2480 static int process_watchdog(sd_event *e) {
2486 /* Don't notify watchdog too often */
2487 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2490 sd_notify(false, "WATCHDOG=1");
2491 e->watchdog_last = e->timestamp.monotonic;
2493 return arm_watchdog(e);
2496 _public_ int sd_event_prepare(sd_event *e) {
2499 assert_return(e, -EINVAL);
2500 assert_return(e = event_resolve(e), -ENOPKG);
2501 assert_return(!event_pid_changed(e), -ECHILD);
2502 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2503 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2505 if (e->exit_requested)
2510 e->state = SD_EVENT_PREPARING;
2511 r = event_prepare(e);
2512 e->state = SD_EVENT_INITIAL;
2516 r = event_arm_timer(e, &e->realtime);
2520 r = event_arm_timer(e, &e->boottime);
2524 r = event_arm_timer(e, &e->monotonic);
2528 r = event_arm_timer(e, &e->realtime_alarm);
2532 r = event_arm_timer(e, &e->boottime_alarm);
2536 if (event_next_pending(e) || e->need_process_child)
2539 e->state = SD_EVENT_ARMED;
2544 e->state = SD_EVENT_ARMED;
2545 r = sd_event_wait(e, 0);
2547 e->state = SD_EVENT_ARMED;
2552 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2553 struct epoll_event *ev_queue;
2554 unsigned ev_queue_max;
2557 assert_return(e, -EINVAL);
2558 assert_return(e = event_resolve(e), -ENOPKG);
2559 assert_return(!event_pid_changed(e), -ECHILD);
2560 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2561 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2563 if (e->exit_requested) {
2564 e->state = SD_EVENT_PENDING;
2568 ev_queue_max = MAX(e->n_sources, 1u);
2569 ev_queue = newa(struct epoll_event, ev_queue_max);
2571 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2572 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2574 if (errno == EINTR) {
2575 e->state = SD_EVENT_PENDING;
2583 triple_timestamp_get(&e->timestamp);
2585 for (i = 0; i < m; i++) {
2587 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2588 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2590 WakeupType *t = ev_queue[i].data.ptr;
2594 case WAKEUP_EVENT_SOURCE:
2595 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2598 case WAKEUP_CLOCK_DATA: {
2599 struct clock_data *d = ev_queue[i].data.ptr;
2600 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2604 case WAKEUP_SIGNAL_DATA:
2605 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2609 assert_not_reached("Invalid wake-up pointer");
2616 r = process_watchdog(e);
2620 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2624 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2628 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2632 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2636 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2640 if (e->need_process_child) {
2641 r = process_child(e);
2646 if (event_next_pending(e)) {
2647 e->state = SD_EVENT_PENDING;
2655 e->state = SD_EVENT_INITIAL;
2660 _public_ int sd_event_dispatch(sd_event *e) {
2664 assert_return(e, -EINVAL);
2665 assert_return(e = event_resolve(e), -ENOPKG);
2666 assert_return(!event_pid_changed(e), -ECHILD);
2667 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2668 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2670 if (e->exit_requested)
2671 return dispatch_exit(e);
2673 p = event_next_pending(e);
2675 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2677 ref = sd_event_ref(e);
2678 e->state = SD_EVENT_RUNNING;
2679 r = source_dispatch(p);
2680 e->state = SD_EVENT_INITIAL;
2684 e->state = SD_EVENT_INITIAL;
2689 static void event_log_delays(sd_event *e) {
2690 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2694 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2695 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2698 log_debug("Event loop iterations: %.*s", o, b);
2701 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2704 assert_return(e, -EINVAL);
2705 assert_return(e = event_resolve(e), -ENOPKG);
2706 assert_return(!event_pid_changed(e), -ECHILD);
2707 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2708 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2710 if (e->profile_delays && e->last_run) {
2714 this_run = now(CLOCK_MONOTONIC);
2716 l = u64log2(this_run - e->last_run);
2717 assert(l < sizeof(e->delays));
2720 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2721 event_log_delays(e);
2722 e->last_log = this_run;
2726 r = sd_event_prepare(e);
2728 /* There was nothing? Then wait... */
2729 r = sd_event_wait(e, timeout);
2731 if (e->profile_delays)
2732 e->last_run = now(CLOCK_MONOTONIC);
2735 /* There's something now, then let's dispatch it */
2736 r = sd_event_dispatch(e);
2746 _public_ int sd_event_loop(sd_event *e) {
2747 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2750 assert_return(e, -EINVAL);
2751 assert_return(e = event_resolve(e), -ENOPKG);
2752 assert_return(!event_pid_changed(e), -ECHILD);
2753 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2755 ref = sd_event_ref(e);
2757 while (e->state != SD_EVENT_FINISHED) {
2758 r = sd_event_run(e, (uint64_t) -1);
2763 return e->exit_code;
2766 _public_ int sd_event_get_fd(sd_event *e) {
2768 assert_return(e, -EINVAL);
2769 assert_return(e = event_resolve(e), -ENOPKG);
2770 assert_return(!event_pid_changed(e), -ECHILD);
2775 _public_ int sd_event_get_state(sd_event *e) {
2776 assert_return(e, -EINVAL);
2777 assert_return(e = event_resolve(e), -ENOPKG);
2778 assert_return(!event_pid_changed(e), -ECHILD);
2783 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2784 assert_return(e, -EINVAL);
2785 assert_return(e = event_resolve(e), -ENOPKG);
2786 assert_return(code, -EINVAL);
2787 assert_return(!event_pid_changed(e), -ECHILD);
2789 if (!e->exit_requested)
2792 *code = e->exit_code;
2796 _public_ int sd_event_exit(sd_event *e, int code) {
2797 assert_return(e, -EINVAL);
2798 assert_return(e = event_resolve(e), -ENOPKG);
2799 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2800 assert_return(!event_pid_changed(e), -ECHILD);
2802 e->exit_requested = true;
2803 e->exit_code = code;
2808 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2809 assert_return(e, -EINVAL);
2810 assert_return(e = event_resolve(e), -ENOPKG);
2811 assert_return(usec, -EINVAL);
2812 assert_return(!event_pid_changed(e), -ECHILD);
2814 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2817 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2818 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2819 * the purpose of getting the time this doesn't matter. */
2820 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2823 if (!triple_timestamp_is_set(&e->timestamp)) {
2824 /* Implicitly fall back to now() if we never ran
2825 * before and thus have no cached time. */
2830 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2834 _public_ int sd_event_default(sd_event **ret) {
2839 return !!default_event;
2841 if (default_event) {
2842 *ret = sd_event_ref(default_event);
2846 r = sd_event_new(&e);
2850 e->default_event_ptr = &default_event;
2858 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2859 assert_return(e, -EINVAL);
2860 assert_return(e = event_resolve(e), -ENOPKG);
2861 assert_return(tid, -EINVAL);
2862 assert_return(!event_pid_changed(e), -ECHILD);
2872 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2875 assert_return(e, -EINVAL);
2876 assert_return(e = event_resolve(e), -ENOPKG);
2877 assert_return(!event_pid_changed(e), -ECHILD);
2879 if (e->watchdog == !!b)
2883 struct epoll_event ev;
2885 r = sd_watchdog_enabled(false, &e->watchdog_period);
2889 /* Issue first ping immediately */
2890 sd_notify(false, "WATCHDOG=1");
2891 e->watchdog_last = now(CLOCK_MONOTONIC);
2893 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2894 if (e->watchdog_fd < 0)
2897 r = arm_watchdog(e);
2901 ev = (struct epoll_event) {
2903 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
2906 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2913 if (e->watchdog_fd >= 0) {
2914 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2915 e->watchdog_fd = safe_close(e->watchdog_fd);
2923 e->watchdog_fd = safe_close(e->watchdog_fd);
2927 _public_ int sd_event_get_watchdog(sd_event *e) {
2928 assert_return(e, -EINVAL);
2929 assert_return(e = event_resolve(e), -ENOPKG);
2930 assert_return(!event_pid_changed(e), -ECHILD);
2935 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2936 assert_return(e, -EINVAL);
2937 assert_return(e = event_resolve(e), -ENOPKG);
2938 assert_return(!event_pid_changed(e), -ECHILD);
2940 *ret = e->iteration;