1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
23 #include "process-util.h"
25 #include "signal-util.h"
26 #include "string-table.h"
27 #include "string-util.h"
28 #include "time-util.h"
31 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
33 typedef enum EventSourceType {
37 SOURCE_TIME_MONOTONIC,
38 SOURCE_TIME_REALTIME_ALARM,
39 SOURCE_TIME_BOOTTIME_ALARM,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType {
75 _WAKEUP_TYPE_INVALID = -1,
78 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
80 struct sd_event_source {
87 sd_event_handler_t prepare;
91 EventSourceType type:5;
98 unsigned pending_index;
99 unsigned prepare_index;
100 uint64_t pending_iteration;
101 uint64_t prepare_iteration;
103 LIST_FIELDS(sd_event_source, sources);
107 sd_event_io_handler_t callback;
115 sd_event_time_handler_t callback;
116 usec_t next, accuracy;
117 unsigned earliest_index;
118 unsigned latest_index;
121 sd_event_signal_handler_t callback;
122 struct signalfd_siginfo siginfo;
126 sd_event_child_handler_t callback;
132 sd_event_handler_t callback;
135 sd_event_handler_t callback;
138 sd_event_handler_t callback;
139 unsigned prioq_index;
148 /* For all clocks we maintain two priority queues each, one
149 * ordered for the earliest times the events may be
150 * dispatched, and one ordered by the latest times they must
151 * have been dispatched. The range between the top entries in
152 * the two prioqs is the time window we can freely schedule
165 /* For each priority we maintain one signal fd, so that we
166 * only have to dequeue a single event per priority at a
172 sd_event_source *current;
184 /* timerfd_create() only supports these five clocks so far. We
185 * can add support for more clocks when the kernel learns to
186 * deal with them, too. */
187 struct clock_data realtime;
188 struct clock_data boottime;
189 struct clock_data monotonic;
190 struct clock_data realtime_alarm;
191 struct clock_data boottime_alarm;
195 sd_event_source **signal_sources; /* indexed by signal number */
196 Hashmap *signal_data; /* indexed by priority */
198 Hashmap *child_sources;
199 unsigned n_enabled_child_sources;
208 triple_timestamp timestamp;
211 bool exit_requested:1;
212 bool need_process_child:1;
214 bool profile_delays:1;
219 sd_event **default_event_ptr;
221 usec_t watchdog_last, watchdog_period;
225 LIST_HEAD(sd_event_source, sources);
227 usec_t last_run, last_log;
228 unsigned delays[sizeof(usec_t) * 8];
231 static thread_local sd_event *default_event = NULL;
233 static void source_disconnect(sd_event_source *s);
235 static sd_event *event_resolve(sd_event *e) {
236 return e == SD_EVENT_DEFAULT ? default_event : e;
239 static int pending_prioq_compare(const void *a, const void *b) {
240 const sd_event_source *x = a, *y = b;
245 /* Enabled ones first */
246 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
248 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
251 /* Lower priority values first */
252 if (x->priority < y->priority)
254 if (x->priority > y->priority)
257 /* Older entries first */
258 if (x->pending_iteration < y->pending_iteration)
260 if (x->pending_iteration > y->pending_iteration)
266 static int prepare_prioq_compare(const void *a, const void *b) {
267 const sd_event_source *x = a, *y = b;
272 /* Enabled ones first */
273 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
275 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
278 /* Move most recently prepared ones last, so that we can stop
279 * preparing as soon as we hit one that has already been
280 * prepared in the current iteration */
281 if (x->prepare_iteration < y->prepare_iteration)
283 if (x->prepare_iteration > y->prepare_iteration)
286 /* Lower priority values first */
287 if (x->priority < y->priority)
289 if (x->priority > y->priority)
295 static int earliest_time_prioq_compare(const void *a, const void *b) {
296 const sd_event_source *x = a, *y = b;
298 assert(EVENT_SOURCE_IS_TIME(x->type));
299 assert(x->type == y->type);
301 /* Enabled ones first */
302 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
304 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307 /* Move the pending ones to the end */
308 if (!x->pending && y->pending)
310 if (x->pending && !y->pending)
314 if (x->time.next < y->time.next)
316 if (x->time.next > y->time.next)
322 static usec_t time_event_source_latest(const sd_event_source *s) {
323 return usec_add(s->time.next, s->time.accuracy);
326 static int latest_time_prioq_compare(const void *a, const void *b) {
327 const sd_event_source *x = a, *y = b;
329 assert(EVENT_SOURCE_IS_TIME(x->type));
330 assert(x->type == y->type);
332 /* Enabled ones first */
333 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
335 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
338 /* Move the pending ones to the end */
339 if (!x->pending && y->pending)
341 if (x->pending && !y->pending)
345 if (time_event_source_latest(x) < time_event_source_latest(y))
347 if (time_event_source_latest(x) > time_event_source_latest(y))
353 static int exit_prioq_compare(const void *a, const void *b) {
354 const sd_event_source *x = a, *y = b;
356 assert(x->type == SOURCE_EXIT);
357 assert(y->type == SOURCE_EXIT);
359 /* Enabled ones first */
360 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
362 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
365 /* Lower priority values first */
366 if (x->priority < y->priority)
368 if (x->priority > y->priority)
374 static void free_clock_data(struct clock_data *d) {
376 assert(d->wakeup == WAKEUP_CLOCK_DATA);
379 prioq_free(d->earliest);
380 prioq_free(d->latest);
383 static void event_free(sd_event *e) {
388 while ((s = e->sources)) {
390 source_disconnect(s);
391 sd_event_source_unref(s);
394 assert(e->n_sources == 0);
396 if (e->default_event_ptr)
397 *(e->default_event_ptr) = NULL;
399 safe_close(e->epoll_fd);
400 safe_close(e->watchdog_fd);
402 free_clock_data(&e->realtime);
403 free_clock_data(&e->boottime);
404 free_clock_data(&e->monotonic);
405 free_clock_data(&e->realtime_alarm);
406 free_clock_data(&e->boottime_alarm);
408 prioq_free(e->pending);
409 prioq_free(e->prepare);
412 free(e->signal_sources);
413 hashmap_free(e->signal_data);
415 hashmap_free(e->child_sources);
416 set_free(e->post_sources);
420 _public_ int sd_event_new(sd_event** ret) {
424 assert_return(ret, -EINVAL);
426 e = new0(sd_event, 1);
431 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
432 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
433 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
434 e->original_pid = getpid_cached();
435 e->perturb = USEC_INFINITY;
437 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
441 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
442 if (e->epoll_fd < 0) {
447 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
449 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
450 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
451 e->profile_delays = true;
462 _public_ sd_event* sd_event_ref(sd_event *e) {
467 assert(e->n_ref >= 1);
473 _public_ sd_event* sd_event_unref(sd_event *e) {
478 assert(e->n_ref >= 1);
487 static bool event_pid_changed(sd_event *e) {
490 /* We don't support people creating an event loop and keeping
491 * it around over a fork(). Let's complain. */
493 return e->original_pid != getpid_cached();
496 static void source_io_unregister(sd_event_source *s) {
500 assert(s->type == SOURCE_IO);
502 if (event_pid_changed(s->event))
505 if (!s->io.registered)
508 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
510 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
511 strna(s->description), event_source_type_to_string(s->type));
513 s->io.registered = false;
516 static int source_io_register(
521 struct epoll_event ev;
525 assert(s->type == SOURCE_IO);
526 assert(enabled != SD_EVENT_OFF);
528 ev = (struct epoll_event) {
529 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
533 if (s->io.registered)
534 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
536 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
540 s->io.registered = true;
545 static clockid_t event_source_type_to_clock(EventSourceType t) {
549 case SOURCE_TIME_REALTIME:
550 return CLOCK_REALTIME;
552 case SOURCE_TIME_BOOTTIME:
553 return CLOCK_BOOTTIME;
555 case SOURCE_TIME_MONOTONIC:
556 return CLOCK_MONOTONIC;
558 case SOURCE_TIME_REALTIME_ALARM:
559 return CLOCK_REALTIME_ALARM;
561 case SOURCE_TIME_BOOTTIME_ALARM:
562 return CLOCK_BOOTTIME_ALARM;
565 return (clockid_t) -1;
569 static EventSourceType clock_to_event_source_type(clockid_t clock) {
574 return SOURCE_TIME_REALTIME;
577 return SOURCE_TIME_BOOTTIME;
579 case CLOCK_MONOTONIC:
580 return SOURCE_TIME_MONOTONIC;
582 case CLOCK_REALTIME_ALARM:
583 return SOURCE_TIME_REALTIME_ALARM;
585 case CLOCK_BOOTTIME_ALARM:
586 return SOURCE_TIME_BOOTTIME_ALARM;
589 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
593 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
598 case SOURCE_TIME_REALTIME:
601 case SOURCE_TIME_BOOTTIME:
604 case SOURCE_TIME_MONOTONIC:
605 return &e->monotonic;
607 case SOURCE_TIME_REALTIME_ALARM:
608 return &e->realtime_alarm;
610 case SOURCE_TIME_BOOTTIME_ALARM:
611 return &e->boottime_alarm;
618 static int event_make_signal_data(
621 struct signal_data **ret) {
623 struct epoll_event ev;
624 struct signal_data *d;
632 if (event_pid_changed(e))
635 if (e->signal_sources && e->signal_sources[sig])
636 priority = e->signal_sources[sig]->priority;
638 priority = SD_EVENT_PRIORITY_NORMAL;
640 d = hashmap_get(e->signal_data, &priority);
642 if (sigismember(&d->sigset, sig) > 0) {
648 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
652 d = new0(struct signal_data, 1);
656 d->wakeup = WAKEUP_SIGNAL_DATA;
658 d->priority = priority;
660 r = hashmap_put(e->signal_data, &d->priority, d);
670 assert_se(sigaddset(&ss_copy, sig) >= 0);
672 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
686 d->fd = fd_move_above_stdio(r);
688 ev = (struct epoll_event) {
693 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
706 d->fd = safe_close(d->fd);
707 hashmap_remove(e->signal_data, &d->priority);
714 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718 /* Turns off the specified signal in the signal data
719 * object. If the signal mask of the object becomes empty that
722 if (sigismember(&d->sigset, sig) == 0)
725 assert_se(sigdelset(&d->sigset, sig) >= 0);
727 if (sigisemptyset(&d->sigset)) {
729 /* If all the mask is all-zero we can get rid of the structure */
730 hashmap_remove(e->signal_data, &d->priority);
738 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
739 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
742 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
743 struct signal_data *d;
744 static const int64_t zero_priority = 0;
748 /* Rechecks if the specified signal is still something we are
749 * interested in. If not, we'll unmask it, and possibly drop
750 * the signalfd for it. */
752 if (sig == SIGCHLD &&
753 e->n_enabled_child_sources > 0)
756 if (e->signal_sources &&
757 e->signal_sources[sig] &&
758 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
762 * The specified signal might be enabled in three different queues:
764 * 1) the one that belongs to the priority passed (if it is non-NULL)
765 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
766 * 3) the 0 priority (to cover the SIGCHLD case)
768 * Hence, let's remove it from all three here.
772 d = hashmap_get(e->signal_data, priority);
774 event_unmask_signal_data(e, d, sig);
777 if (e->signal_sources && e->signal_sources[sig]) {
778 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
780 event_unmask_signal_data(e, d, sig);
783 d = hashmap_get(e->signal_data, &zero_priority);
785 event_unmask_signal_data(e, d, sig);
788 static void source_disconnect(sd_event_source *s) {
796 assert(s->event->n_sources > 0);
802 source_io_unregister(s);
806 case SOURCE_TIME_REALTIME:
807 case SOURCE_TIME_BOOTTIME:
808 case SOURCE_TIME_MONOTONIC:
809 case SOURCE_TIME_REALTIME_ALARM:
810 case SOURCE_TIME_BOOTTIME_ALARM: {
811 struct clock_data *d;
813 d = event_get_clock_data(s->event, s->type);
816 prioq_remove(d->earliest, s, &s->time.earliest_index);
817 prioq_remove(d->latest, s, &s->time.latest_index);
818 d->needs_rearm = true;
823 if (s->signal.sig > 0) {
825 if (s->event->signal_sources)
826 s->event->signal_sources[s->signal.sig] = NULL;
828 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
834 if (s->child.pid > 0) {
835 if (s->enabled != SD_EVENT_OFF) {
836 assert(s->event->n_enabled_child_sources > 0);
837 s->event->n_enabled_child_sources--;
840 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
841 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
851 set_remove(s->event->post_sources, s);
855 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
859 assert_not_reached("Wut? I shouldn't exist.");
863 prioq_remove(s->event->pending, s, &s->pending_index);
866 prioq_remove(s->event->prepare, s, &s->prepare_index);
870 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
872 LIST_REMOVE(sources, event->sources, s);
876 sd_event_unref(event);
879 static void source_free(sd_event_source *s) {
882 source_disconnect(s);
884 if (s->type == SOURCE_IO && s->io.owned)
885 safe_close(s->io.fd);
887 free(s->description);
891 static int source_set_pending(sd_event_source *s, bool b) {
895 assert(s->type != SOURCE_EXIT);
903 s->pending_iteration = s->event->iteration;
905 r = prioq_put(s->event->pending, s, &s->pending_index);
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
916 d = event_get_clock_data(s->event, s->type);
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
940 s = new0(sd_event_source, 1);
946 s->floating = floating;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
953 LIST_PREPEND(sources, e->sources, s);
959 _public_ int sd_event_add_io(
961 sd_event_source **ret,
964 sd_event_io_handler_t callback,
970 assert_return(e, -EINVAL);
971 assert_return(e = event_resolve(e), -ENOPKG);
972 assert_return(fd >= 0, -EBADF);
973 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
974 assert_return(callback, -EINVAL);
975 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
976 assert_return(!event_pid_changed(e), -ECHILD);
978 s = source_new(e, !ret, SOURCE_IO);
982 s->wakeup = WAKEUP_EVENT_SOURCE;
984 s->io.events = events;
985 s->io.callback = callback;
986 s->userdata = userdata;
987 s->enabled = SD_EVENT_ON;
989 r = source_io_register(s, s->enabled, events);
1001 static void initialize_perturb(sd_event *e) {
1002 sd_id128_t bootid = {};
1004 /* When we sleep for longer, we try to realign the wakeup to
1005 the same time wihtin each minute/second/250ms, so that
1006 events all across the system can be coalesced into a single
1007 CPU wakeup. However, let's take some system-specific
1008 randomness for this value, so that in a network of systems
1009 with synced clocks timer events are distributed a
1010 bit. Here, we calculate a perturbation usec offset from the
1013 if (_likely_(e->perturb != USEC_INFINITY))
1016 if (sd_id128_get_boot(&bootid) >= 0)
1017 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1020 static int event_setup_timer_fd(
1022 struct clock_data *d,
1025 struct epoll_event ev;
1031 if (_likely_(d->fd >= 0))
1034 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1038 fd = fd_move_above_stdio(fd);
1040 ev = (struct epoll_event) {
1045 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1055 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1058 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1061 _public_ int sd_event_add_time(
1063 sd_event_source **ret,
1067 sd_event_time_handler_t callback,
1070 EventSourceType type;
1072 struct clock_data *d;
1075 assert_return(e, -EINVAL);
1076 assert_return(e = event_resolve(e), -ENOPKG);
1077 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1078 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1079 assert_return(!event_pid_changed(e), -ECHILD);
1081 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1084 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1089 callback = time_exit_callback;
1091 d = event_get_clock_data(e, type);
1094 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1098 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1103 r = event_setup_timer_fd(e, d, clock);
1108 s = source_new(e, !ret, type);
1112 s->time.next = usec;
1113 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1114 s->time.callback = callback;
1115 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1116 s->userdata = userdata;
1117 s->enabled = SD_EVENT_ONESHOT;
1119 d->needs_rearm = true;
1121 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1125 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1142 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1145 _public_ int sd_event_add_signal(
1147 sd_event_source **ret,
1149 sd_event_signal_handler_t callback,
1153 struct signal_data *d;
1157 assert_return(e, -EINVAL);
1158 assert_return(e = event_resolve(e), -ENOPKG);
1159 assert_return(SIGNAL_VALID(sig), -EINVAL);
1160 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1161 assert_return(!event_pid_changed(e), -ECHILD);
1164 callback = signal_exit_callback;
1166 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1170 if (!sigismember(&ss, sig))
1173 if (!e->signal_sources) {
1174 e->signal_sources = new0(sd_event_source*, _NSIG);
1175 if (!e->signal_sources)
1177 } else if (e->signal_sources[sig])
1180 s = source_new(e, !ret, SOURCE_SIGNAL);
1184 s->signal.sig = sig;
1185 s->signal.callback = callback;
1186 s->userdata = userdata;
1187 s->enabled = SD_EVENT_ON;
1189 e->signal_sources[sig] = s;
1191 r = event_make_signal_data(e, sig, &d);
1197 /* Use the signal name as description for the event source by default */
1198 (void) sd_event_source_set_description(s, signal_to_string(sig));
1206 _public_ int sd_event_add_child(
1208 sd_event_source **ret,
1211 sd_event_child_handler_t callback,
1217 assert_return(e, -EINVAL);
1218 assert_return(e = event_resolve(e), -ENOPKG);
1219 assert_return(pid > 1, -EINVAL);
1220 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1221 assert_return(options != 0, -EINVAL);
1222 assert_return(callback, -EINVAL);
1223 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1224 assert_return(!event_pid_changed(e), -ECHILD);
1226 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1230 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1233 s = source_new(e, !ret, SOURCE_CHILD);
1238 s->child.options = options;
1239 s->child.callback = callback;
1240 s->userdata = userdata;
1241 s->enabled = SD_EVENT_ONESHOT;
1243 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1249 e->n_enabled_child_sources++;
1251 r = event_make_signal_data(e, SIGCHLD, NULL);
1253 e->n_enabled_child_sources--;
1258 e->need_process_child = true;
1266 _public_ int sd_event_add_defer(
1268 sd_event_source **ret,
1269 sd_event_handler_t callback,
1275 assert_return(e, -EINVAL);
1276 assert_return(e = event_resolve(e), -ENOPKG);
1277 assert_return(callback, -EINVAL);
1278 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1279 assert_return(!event_pid_changed(e), -ECHILD);
1281 s = source_new(e, !ret, SOURCE_DEFER);
1285 s->defer.callback = callback;
1286 s->userdata = userdata;
1287 s->enabled = SD_EVENT_ONESHOT;
1289 r = source_set_pending(s, true);
1301 _public_ int sd_event_add_post(
1303 sd_event_source **ret,
1304 sd_event_handler_t callback,
1310 assert_return(e, -EINVAL);
1311 assert_return(e = event_resolve(e), -ENOPKG);
1312 assert_return(callback, -EINVAL);
1313 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1314 assert_return(!event_pid_changed(e), -ECHILD);
1316 r = set_ensure_allocated(&e->post_sources, NULL);
1320 s = source_new(e, !ret, SOURCE_POST);
1324 s->post.callback = callback;
1325 s->userdata = userdata;
1326 s->enabled = SD_EVENT_ON;
1328 r = set_put(e->post_sources, s);
1340 _public_ int sd_event_add_exit(
1342 sd_event_source **ret,
1343 sd_event_handler_t callback,
1349 assert_return(e, -EINVAL);
1350 assert_return(e = event_resolve(e), -ENOPKG);
1351 assert_return(callback, -EINVAL);
1352 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1353 assert_return(!event_pid_changed(e), -ECHILD);
1355 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1359 s = source_new(e, !ret, SOURCE_EXIT);
1363 s->exit.callback = callback;
1364 s->userdata = userdata;
1365 s->exit.prioq_index = PRIOQ_IDX_NULL;
1366 s->enabled = SD_EVENT_ONESHOT;
1368 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1380 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1385 assert(s->n_ref >= 1);
1391 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1396 assert(s->n_ref >= 1);
1399 if (s->n_ref <= 0) {
1400 /* Here's a special hack: when we are called from a
1401 * dispatch handler we won't free the event source
1402 * immediately, but we will detach the fd from the
1403 * epoll. This way it is safe for the caller to unref
1404 * the event source and immediately close the fd, but
1405 * we still retain a valid event source object after
1408 if (s->dispatching) {
1409 if (s->type == SOURCE_IO)
1410 source_io_unregister(s);
1412 source_disconnect(s);
1420 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1421 assert_return(s, -EINVAL);
1422 assert_return(!event_pid_changed(s->event), -ECHILD);
1424 return free_and_strdup(&s->description, description);
1427 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1428 assert_return(s, -EINVAL);
1429 assert_return(description, -EINVAL);
1430 assert_return(s->description, -ENXIO);
1431 assert_return(!event_pid_changed(s->event), -ECHILD);
1433 *description = s->description;
1437 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1438 assert_return(s, NULL);
1443 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1444 assert_return(s, -EINVAL);
1445 assert_return(s->type != SOURCE_EXIT, -EDOM);
1446 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1447 assert_return(!event_pid_changed(s->event), -ECHILD);
1452 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1453 assert_return(s, -EINVAL);
1454 assert_return(s->type == SOURCE_IO, -EDOM);
1455 assert_return(!event_pid_changed(s->event), -ECHILD);
1460 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1463 assert_return(s, -EINVAL);
1464 assert_return(fd >= 0, -EBADF);
1465 assert_return(s->type == SOURCE_IO, -EDOM);
1466 assert_return(!event_pid_changed(s->event), -ECHILD);
1471 if (s->enabled == SD_EVENT_OFF) {
1473 s->io.registered = false;
1477 saved_fd = s->io.fd;
1478 assert(s->io.registered);
1481 s->io.registered = false;
1483 r = source_io_register(s, s->enabled, s->io.events);
1485 s->io.fd = saved_fd;
1486 s->io.registered = true;
1490 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1496 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1497 assert_return(s, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1503 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1504 assert_return(s, -EINVAL);
1505 assert_return(s->type == SOURCE_IO, -EDOM);
1511 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1512 assert_return(s, -EINVAL);
1513 assert_return(events, -EINVAL);
1514 assert_return(s->type == SOURCE_IO, -EDOM);
1515 assert_return(!event_pid_changed(s->event), -ECHILD);
1517 *events = s->io.events;
1521 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1524 assert_return(s, -EINVAL);
1525 assert_return(s->type == SOURCE_IO, -EDOM);
1526 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1527 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1528 assert_return(!event_pid_changed(s->event), -ECHILD);
1530 /* edge-triggered updates are never skipped, so we can reset edges */
1531 if (s->io.events == events && !(events & EPOLLET))
1534 if (s->enabled != SD_EVENT_OFF) {
1535 r = source_io_register(s, s->enabled, events);
1540 s->io.events = events;
1541 source_set_pending(s, false);
1546 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1547 assert_return(s, -EINVAL);
1548 assert_return(revents, -EINVAL);
1549 assert_return(s->type == SOURCE_IO, -EDOM);
1550 assert_return(s->pending, -ENODATA);
1551 assert_return(!event_pid_changed(s->event), -ECHILD);
1553 *revents = s->io.revents;
1557 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1558 assert_return(s, -EINVAL);
1559 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1560 assert_return(!event_pid_changed(s->event), -ECHILD);
1562 return s->signal.sig;
1565 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1566 assert_return(s, -EINVAL);
1567 assert_return(!event_pid_changed(s->event), -ECHILD);
1569 *priority = s->priority;
1573 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1576 assert_return(s, -EINVAL);
1577 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1578 assert_return(!event_pid_changed(s->event), -ECHILD);
1580 if (s->priority == priority)
1583 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1584 struct signal_data *old, *d;
1586 /* Move us from the signalfd belonging to the old
1587 * priority to the signalfd of the new priority */
1589 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1591 s->priority = priority;
1593 r = event_make_signal_data(s->event, s->signal.sig, &d);
1595 s->priority = old->priority;
1599 event_unmask_signal_data(s->event, old, s->signal.sig);
1601 s->priority = priority;
1604 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1607 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1609 if (s->type == SOURCE_EXIT)
1610 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1615 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1616 assert_return(s, -EINVAL);
1617 assert_return(m, -EINVAL);
1618 assert_return(!event_pid_changed(s->event), -ECHILD);
1624 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1627 assert_return(s, -EINVAL);
1628 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1629 assert_return(!event_pid_changed(s->event), -ECHILD);
1631 /* If we are dead anyway, we are fine with turning off
1632 * sources, but everything else needs to fail. */
1633 if (s->event->state == SD_EVENT_FINISHED)
1634 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1636 if (s->enabled == m)
1639 if (m == SD_EVENT_OFF) {
1644 source_io_unregister(s);
1648 case SOURCE_TIME_REALTIME:
1649 case SOURCE_TIME_BOOTTIME:
1650 case SOURCE_TIME_MONOTONIC:
1651 case SOURCE_TIME_REALTIME_ALARM:
1652 case SOURCE_TIME_BOOTTIME_ALARM: {
1653 struct clock_data *d;
1656 d = event_get_clock_data(s->event, s->type);
1659 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1660 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1661 d->needs_rearm = true;
1668 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1674 assert(s->event->n_enabled_child_sources > 0);
1675 s->event->n_enabled_child_sources--;
1677 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1682 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1691 assert_not_reached("Wut? I shouldn't exist.");
1698 r = source_io_register(s, m, s->io.events);
1705 case SOURCE_TIME_REALTIME:
1706 case SOURCE_TIME_BOOTTIME:
1707 case SOURCE_TIME_MONOTONIC:
1708 case SOURCE_TIME_REALTIME_ALARM:
1709 case SOURCE_TIME_BOOTTIME_ALARM: {
1710 struct clock_data *d;
1713 d = event_get_clock_data(s->event, s->type);
1716 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1717 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1718 d->needs_rearm = true;
1726 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1728 s->enabled = SD_EVENT_OFF;
1729 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1737 if (s->enabled == SD_EVENT_OFF)
1738 s->event->n_enabled_child_sources++;
1742 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1744 s->enabled = SD_EVENT_OFF;
1745 s->event->n_enabled_child_sources--;
1746 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1754 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1763 assert_not_reached("Wut? I shouldn't exist.");
1768 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1771 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1776 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1777 assert_return(s, -EINVAL);
1778 assert_return(usec, -EINVAL);
1779 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1780 assert_return(!event_pid_changed(s->event), -ECHILD);
1782 *usec = s->time.next;
1786 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1787 struct clock_data *d;
1789 assert_return(s, -EINVAL);
1790 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1791 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1792 assert_return(!event_pid_changed(s->event), -ECHILD);
1794 s->time.next = usec;
1796 source_set_pending(s, false);
1798 d = event_get_clock_data(s->event, s->type);
1801 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1802 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1803 d->needs_rearm = true;
1808 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1809 assert_return(s, -EINVAL);
1810 assert_return(usec, -EINVAL);
1811 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1812 assert_return(!event_pid_changed(s->event), -ECHILD);
1814 *usec = s->time.accuracy;
1818 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1819 struct clock_data *d;
1821 assert_return(s, -EINVAL);
1822 assert_return(usec != (uint64_t) -1, -EINVAL);
1823 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1824 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1825 assert_return(!event_pid_changed(s->event), -ECHILD);
1828 usec = DEFAULT_ACCURACY_USEC;
1830 s->time.accuracy = usec;
1832 source_set_pending(s, false);
1834 d = event_get_clock_data(s->event, s->type);
1837 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1838 d->needs_rearm = true;
1843 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1844 assert_return(s, -EINVAL);
1845 assert_return(clock, -EINVAL);
1846 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1847 assert_return(!event_pid_changed(s->event), -ECHILD);
1849 *clock = event_source_type_to_clock(s->type);
1853 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1854 assert_return(s, -EINVAL);
1855 assert_return(pid, -EINVAL);
1856 assert_return(s->type == SOURCE_CHILD, -EDOM);
1857 assert_return(!event_pid_changed(s->event), -ECHILD);
1859 *pid = s->child.pid;
1863 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1866 assert_return(s, -EINVAL);
1867 assert_return(s->type != SOURCE_EXIT, -EDOM);
1868 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1869 assert_return(!event_pid_changed(s->event), -ECHILD);
1871 if (s->prepare == callback)
1874 if (callback && s->prepare) {
1875 s->prepare = callback;
1879 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1883 s->prepare = callback;
1886 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1890 prioq_remove(s->event->prepare, s, &s->prepare_index);
1895 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1896 assert_return(s, NULL);
1901 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1904 assert_return(s, NULL);
1907 s->userdata = userdata;
1912 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1919 if (a >= USEC_INFINITY)
1920 return USEC_INFINITY;
1925 initialize_perturb(e);
1928 Find a good time to wake up again between times a and b. We
1929 have two goals here:
1931 a) We want to wake up as seldom as possible, hence prefer
1932 later times over earlier times.
1934 b) But if we have to wake up, then let's make sure to
1935 dispatch as much as possible on the entire system.
1937 We implement this by waking up everywhere at the same time
1938 within any given minute if we can, synchronised via the
1939 perturbation value determined from the boot ID. If we can't,
1940 then we try to find the same spot in every 10s, then 1s and
1941 then 250ms step. Otherwise, we pick the last possible time
1945 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1947 if (_unlikely_(c < USEC_PER_MINUTE))
1950 c -= USEC_PER_MINUTE;
1956 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1958 if (_unlikely_(c < USEC_PER_SEC*10))
1961 c -= USEC_PER_SEC*10;
1967 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1969 if (_unlikely_(c < USEC_PER_SEC))
1978 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1980 if (_unlikely_(c < USEC_PER_MSEC*250))
1983 c -= USEC_PER_MSEC*250;
1992 static int event_arm_timer(
1994 struct clock_data *d) {
1996 struct itimerspec its = {};
1997 sd_event_source *a, *b;
2004 if (!d->needs_rearm)
2007 d->needs_rearm = false;
2009 a = prioq_peek(d->earliest);
2010 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2015 if (d->next == USEC_INFINITY)
2019 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2023 d->next = USEC_INFINITY;
2027 b = prioq_peek(d->latest);
2028 assert_se(b && b->enabled != SD_EVENT_OFF);
2030 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2034 assert_se(d->fd >= 0);
2037 /* We don' want to disarm here, just mean some time looooong ago. */
2038 its.it_value.tv_sec = 0;
2039 its.it_value.tv_nsec = 1;
2041 timespec_store(&its.it_value, t);
2043 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2051 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2054 assert(s->type == SOURCE_IO);
2056 /* If the event source was already pending, we just OR in the
2057 * new revents, otherwise we reset the value. The ORing is
2058 * necessary to handle EPOLLONESHOT events properly where
2059 * readability might happen independently of writability, and
2060 * we need to keep track of both */
2063 s->io.revents |= revents;
2065 s->io.revents = revents;
2067 return source_set_pending(s, true);
2070 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2077 assert_return(events == EPOLLIN, -EIO);
2079 ss = read(fd, &x, sizeof(x));
2081 if (IN_SET(errno, EAGAIN, EINTR))
2087 if (_unlikely_(ss != sizeof(x)))
2091 *next = USEC_INFINITY;
2096 static int process_timer(
2099 struct clock_data *d) {
2108 s = prioq_peek(d->earliest);
2111 s->enabled == SD_EVENT_OFF ||
2115 r = source_set_pending(s, true);
2119 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2120 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2121 d->needs_rearm = true;
2127 static int process_child(sd_event *e) {
2134 e->need_process_child = false;
2137 So, this is ugly. We iteratively invoke waitid() with P_PID
2138 + WNOHANG for each PID we wait for, instead of using
2139 P_ALL. This is because we only want to get child
2140 information of very specific child processes, and not all
2141 of them. We might not have processed the SIGCHLD even of a
2142 previous invocation and we don't want to maintain a
2143 unbounded *per-child* event queue, hence we really don't
2144 want anything flushed out of the kernel's queue that we
2145 don't care about. Since this is O(n) this means that if you
2146 have a lot of processes you probably want to handle SIGCHLD
2149 We do not reap the children here (by using WNOWAIT), this
2150 is only done after the event source is dispatched so that
2151 the callback still sees the process as a zombie.
2154 HASHMAP_FOREACH(s, e->child_sources, i) {
2155 assert(s->type == SOURCE_CHILD);
2160 if (s->enabled == SD_EVENT_OFF)
2163 zero(s->child.siginfo);
2164 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2165 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2169 if (s->child.siginfo.si_pid != 0) {
2170 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2172 if (!zombie && (s->child.options & WEXITED)) {
2173 /* If the child isn't dead then let's
2174 * immediately remove the state change
2175 * from the queue, since there's no
2176 * benefit in leaving it queued */
2178 assert(s->child.options & (WSTOPPED|WCONTINUED));
2179 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2182 r = source_set_pending(s, true);
2191 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2192 bool read_one = false;
2196 assert_return(events == EPOLLIN, -EIO);
2198 /* If there's a signal queued on this priority and SIGCHLD is
2199 on this priority too, then make sure to recheck the
2200 children we watch. This is because we only ever dequeue
2201 the first signal per priority, and if we dequeue one, and
2202 SIGCHLD might be enqueued later we wouldn't know, but we
2203 might have higher priority children we care about hence we
2204 need to check that explicitly. */
2206 if (sigismember(&d->sigset, SIGCHLD))
2207 e->need_process_child = true;
2209 /* If there's already an event source pending for this
2210 * priority we don't read another */
2215 struct signalfd_siginfo si;
2217 sd_event_source *s = NULL;
2219 n = read(d->fd, &si, sizeof(si));
2221 if (IN_SET(errno, EAGAIN, EINTR))
2227 if (_unlikely_(n != sizeof(si)))
2230 assert(SIGNAL_VALID(si.ssi_signo));
2234 if (e->signal_sources)
2235 s = e->signal_sources[si.ssi_signo];
2241 s->signal.siginfo = si;
2244 r = source_set_pending(s, true);
2252 static int source_dispatch(sd_event_source *s) {
2253 EventSourceType saved_type;
2257 assert(s->pending || s->type == SOURCE_EXIT);
2259 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2261 saved_type = s->type;
2263 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2264 r = source_set_pending(s, false);
2269 if (s->type != SOURCE_POST) {
2273 /* If we execute a non-post source, let's mark all
2274 * post sources as pending */
2276 SET_FOREACH(z, s->event->post_sources, i) {
2277 if (z->enabled == SD_EVENT_OFF)
2280 r = source_set_pending(z, true);
2286 if (s->enabled == SD_EVENT_ONESHOT) {
2287 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2292 s->dispatching = true;
2297 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2300 case SOURCE_TIME_REALTIME:
2301 case SOURCE_TIME_BOOTTIME:
2302 case SOURCE_TIME_MONOTONIC:
2303 case SOURCE_TIME_REALTIME_ALARM:
2304 case SOURCE_TIME_BOOTTIME_ALARM:
2305 r = s->time.callback(s, s->time.next, s->userdata);
2309 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2312 case SOURCE_CHILD: {
2315 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2317 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2319 /* Now, reap the PID for good. */
2321 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2327 r = s->defer.callback(s, s->userdata);
2331 r = s->post.callback(s, s->userdata);
2335 r = s->exit.callback(s, s->userdata);
2338 case SOURCE_WATCHDOG:
2339 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2340 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2341 assert_not_reached("Wut? I shouldn't exist.");
2344 s->dispatching = false;
2347 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2348 strna(s->description), event_source_type_to_string(saved_type));
2353 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2358 static int event_prepare(sd_event *e) {
2366 s = prioq_peek(e->prepare);
2367 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2370 s->prepare_iteration = e->iteration;
2371 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2377 s->dispatching = true;
2378 r = s->prepare(s, s->userdata);
2379 s->dispatching = false;
2382 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2383 strna(s->description), event_source_type_to_string(s->type));
2388 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2394 static int dispatch_exit(sd_event *e) {
2396 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2401 p = prioq_peek(e->exit);
2402 if (!p || p->enabled == SD_EVENT_OFF) {
2403 e->state = SD_EVENT_FINISHED;
2407 ref = sd_event_ref(e);
2409 e->state = SD_EVENT_EXITING;
2410 r = source_dispatch(p);
2411 e->state = SD_EVENT_INITIAL;
2415 static sd_event_source* event_next_pending(sd_event *e) {
2420 p = prioq_peek(e->pending);
2424 if (p->enabled == SD_EVENT_OFF)
2430 static int arm_watchdog(sd_event *e) {
2431 struct itimerspec its = {};
2436 assert(e->watchdog_fd >= 0);
2438 t = sleep_between(e,
2439 e->watchdog_last + (e->watchdog_period / 2),
2440 e->watchdog_last + (e->watchdog_period * 3 / 4));
2442 timespec_store(&its.it_value, t);
2444 /* Make sure we never set the watchdog to 0, which tells the
2445 * kernel to disable it. */
2446 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2447 its.it_value.tv_nsec = 1;
2449 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2456 static int process_watchdog(sd_event *e) {
2462 /* Don't notify watchdog too often */
2463 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2466 sd_notify(false, "WATCHDOG=1");
2467 e->watchdog_last = e->timestamp.monotonic;
2469 return arm_watchdog(e);
2472 _public_ int sd_event_prepare(sd_event *e) {
2475 assert_return(e, -EINVAL);
2476 assert_return(e = event_resolve(e), -ENOPKG);
2477 assert_return(!event_pid_changed(e), -ECHILD);
2478 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2479 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2481 if (e->exit_requested)
2486 e->state = SD_EVENT_PREPARING;
2487 r = event_prepare(e);
2488 e->state = SD_EVENT_INITIAL;
2492 r = event_arm_timer(e, &e->realtime);
2496 r = event_arm_timer(e, &e->boottime);
2500 r = event_arm_timer(e, &e->monotonic);
2504 r = event_arm_timer(e, &e->realtime_alarm);
2508 r = event_arm_timer(e, &e->boottime_alarm);
2512 if (event_next_pending(e) || e->need_process_child)
2515 e->state = SD_EVENT_ARMED;
2520 e->state = SD_EVENT_ARMED;
2521 r = sd_event_wait(e, 0);
2523 e->state = SD_EVENT_ARMED;
2528 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2529 struct epoll_event *ev_queue;
2530 unsigned ev_queue_max;
2533 assert_return(e, -EINVAL);
2534 assert_return(e = event_resolve(e), -ENOPKG);
2535 assert_return(!event_pid_changed(e), -ECHILD);
2536 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2537 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2539 if (e->exit_requested) {
2540 e->state = SD_EVENT_PENDING;
2544 ev_queue_max = MAX(e->n_sources, 1u);
2545 ev_queue = newa(struct epoll_event, ev_queue_max);
2547 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2548 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2550 if (errno == EINTR) {
2551 e->state = SD_EVENT_PENDING;
2559 triple_timestamp_get(&e->timestamp);
2561 for (i = 0; i < m; i++) {
2563 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2564 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2566 WakeupType *t = ev_queue[i].data.ptr;
2570 case WAKEUP_EVENT_SOURCE:
2571 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2574 case WAKEUP_CLOCK_DATA: {
2575 struct clock_data *d = ev_queue[i].data.ptr;
2576 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2580 case WAKEUP_SIGNAL_DATA:
2581 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2585 assert_not_reached("Invalid wake-up pointer");
2592 r = process_watchdog(e);
2596 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2600 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2604 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2608 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2612 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2616 if (e->need_process_child) {
2617 r = process_child(e);
2622 if (event_next_pending(e)) {
2623 e->state = SD_EVENT_PENDING;
2631 e->state = SD_EVENT_INITIAL;
2636 _public_ int sd_event_dispatch(sd_event *e) {
2640 assert_return(e, -EINVAL);
2641 assert_return(e = event_resolve(e), -ENOPKG);
2642 assert_return(!event_pid_changed(e), -ECHILD);
2643 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2644 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2646 if (e->exit_requested)
2647 return dispatch_exit(e);
2649 p = event_next_pending(e);
2651 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2653 ref = sd_event_ref(e);
2654 e->state = SD_EVENT_RUNNING;
2655 r = source_dispatch(p);
2656 e->state = SD_EVENT_INITIAL;
2660 e->state = SD_EVENT_INITIAL;
2665 static void event_log_delays(sd_event *e) {
2666 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2670 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2671 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2674 log_debug("Event loop iterations: %.*s", o, b);
2677 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2680 assert_return(e, -EINVAL);
2681 assert_return(e = event_resolve(e), -ENOPKG);
2682 assert_return(!event_pid_changed(e), -ECHILD);
2683 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2684 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2686 if (e->profile_delays && e->last_run) {
2690 this_run = now(CLOCK_MONOTONIC);
2692 l = u64log2(this_run - e->last_run);
2693 assert(l < sizeof(e->delays));
2696 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2697 event_log_delays(e);
2698 e->last_log = this_run;
2702 r = sd_event_prepare(e);
2704 /* There was nothing? Then wait... */
2705 r = sd_event_wait(e, timeout);
2707 if (e->profile_delays)
2708 e->last_run = now(CLOCK_MONOTONIC);
2711 /* There's something now, then let's dispatch it */
2712 r = sd_event_dispatch(e);
2722 _public_ int sd_event_loop(sd_event *e) {
2723 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2726 assert_return(e, -EINVAL);
2727 assert_return(e = event_resolve(e), -ENOPKG);
2728 assert_return(!event_pid_changed(e), -ECHILD);
2729 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2731 ref = sd_event_ref(e);
2733 while (e->state != SD_EVENT_FINISHED) {
2734 r = sd_event_run(e, (uint64_t) -1);
2739 return e->exit_code;
2742 _public_ int sd_event_get_fd(sd_event *e) {
2744 assert_return(e, -EINVAL);
2745 assert_return(e = event_resolve(e), -ENOPKG);
2746 assert_return(!event_pid_changed(e), -ECHILD);
2751 _public_ int sd_event_get_state(sd_event *e) {
2752 assert_return(e, -EINVAL);
2753 assert_return(e = event_resolve(e), -ENOPKG);
2754 assert_return(!event_pid_changed(e), -ECHILD);
2759 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2760 assert_return(e, -EINVAL);
2761 assert_return(e = event_resolve(e), -ENOPKG);
2762 assert_return(code, -EINVAL);
2763 assert_return(!event_pid_changed(e), -ECHILD);
2765 if (!e->exit_requested)
2768 *code = e->exit_code;
2772 _public_ int sd_event_exit(sd_event *e, int code) {
2773 assert_return(e, -EINVAL);
2774 assert_return(e = event_resolve(e), -ENOPKG);
2775 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2776 assert_return(!event_pid_changed(e), -ECHILD);
2778 e->exit_requested = true;
2779 e->exit_code = code;
2784 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2785 assert_return(e, -EINVAL);
2786 assert_return(e = event_resolve(e), -ENOPKG);
2787 assert_return(usec, -EINVAL);
2788 assert_return(!event_pid_changed(e), -ECHILD);
2790 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2793 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2794 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2795 * the purpose of getting the time this doesn't matter. */
2796 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2799 if (!triple_timestamp_is_set(&e->timestamp)) {
2800 /* Implicitly fall back to now() if we never ran
2801 * before and thus have no cached time. */
2806 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2810 _public_ int sd_event_default(sd_event **ret) {
2815 return !!default_event;
2817 if (default_event) {
2818 *ret = sd_event_ref(default_event);
2822 r = sd_event_new(&e);
2826 e->default_event_ptr = &default_event;
2834 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2835 assert_return(e, -EINVAL);
2836 assert_return(e = event_resolve(e), -ENOPKG);
2837 assert_return(tid, -EINVAL);
2838 assert_return(!event_pid_changed(e), -ECHILD);
2848 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2851 assert_return(e, -EINVAL);
2852 assert_return(e = event_resolve(e), -ENOPKG);
2853 assert_return(!event_pid_changed(e), -ECHILD);
2855 if (e->watchdog == !!b)
2859 struct epoll_event ev;
2861 r = sd_watchdog_enabled(false, &e->watchdog_period);
2865 /* Issue first ping immediately */
2866 sd_notify(false, "WATCHDOG=1");
2867 e->watchdog_last = now(CLOCK_MONOTONIC);
2869 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2870 if (e->watchdog_fd < 0)
2873 r = arm_watchdog(e);
2877 ev = (struct epoll_event) {
2879 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
2882 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2889 if (e->watchdog_fd >= 0) {
2890 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2891 e->watchdog_fd = safe_close(e->watchdog_fd);
2899 e->watchdog_fd = safe_close(e->watchdog_fd);
2903 _public_ int sd_event_get_watchdog(sd_event *e) {
2904 assert_return(e, -EINVAL);
2905 assert_return(e = event_resolve(e), -ENOPKG);
2906 assert_return(!event_pid_changed(e), -ECHILD);
2911 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2912 assert_return(e, -EINVAL);
2913 assert_return(e = event_resolve(e), -ENOPKG);
2914 assert_return(!event_pid_changed(e), -ECHILD);
2916 *ret = e->iteration;