2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
24 #include "sd-daemon.h"
28 #include "alloc-util.h"
35 #include "process-util.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType {
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
62 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType {
87 _WAKEUP_TYPE_INVALID = -1,
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92 struct sd_event_source {
99 sd_event_handler_t prepare;
103 EventSourceType type:5;
110 unsigned pending_index;
111 unsigned prepare_index;
112 uint64_t pending_iteration;
113 uint64_t prepare_iteration;
115 LIST_FIELDS(sd_event_source, sources);
119 sd_event_io_handler_t callback;
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
137 sd_event_child_handler_t callback;
143 sd_event_handler_t callback;
146 sd_event_handler_t callback;
149 sd_event_handler_t callback;
150 unsigned prioq_index;
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source *current;
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
199 struct clock_data boottime;
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
219 triple_timestamp timestamp;
222 bool exit_requested:1;
223 bool need_process_child:1;
225 bool profile_delays:1;
230 sd_event **default_event_ptr;
232 usec_t watchdog_last, watchdog_period;
236 LIST_HEAD(sd_event_source, sources);
238 usec_t last_run, last_log;
239 unsigned delays[sizeof(usec_t) * 8];
242 static void source_disconnect(sd_event_source *s);
244 static int pending_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Lower priority values first */
257 if (x->priority < y->priority)
259 if (x->priority > y->priority)
262 /* Older entries first */
263 if (x->pending_iteration < y->pending_iteration)
265 if (x->pending_iteration > y->pending_iteration)
271 static int prepare_prioq_compare(const void *a, const void *b) {
272 const sd_event_source *x = a, *y = b;
277 /* Enabled ones first */
278 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
280 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
283 /* Move most recently prepared ones last, so that we can stop
284 * preparing as soon as we hit one that has already been
285 * prepared in the current iteration */
286 if (x->prepare_iteration < y->prepare_iteration)
288 if (x->prepare_iteration > y->prepare_iteration)
291 /* Lower priority values first */
292 if (x->priority < y->priority)
294 if (x->priority > y->priority)
300 static int earliest_time_prioq_compare(const void *a, const void *b) {
301 const sd_event_source *x = a, *y = b;
303 assert(EVENT_SOURCE_IS_TIME(x->type));
304 assert(x->type == y->type);
306 /* Enabled ones first */
307 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
309 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
312 /* Move the pending ones to the end */
313 if (!x->pending && y->pending)
315 if (x->pending && !y->pending)
319 if (x->time.next < y->time.next)
321 if (x->time.next > y->time.next)
327 static usec_t time_event_source_latest(const sd_event_source *s) {
328 return usec_add(s->time.next, s->time.accuracy);
331 static int latest_time_prioq_compare(const void *a, const void *b) {
332 const sd_event_source *x = a, *y = b;
334 assert(EVENT_SOURCE_IS_TIME(x->type));
335 assert(x->type == y->type);
337 /* Enabled ones first */
338 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
340 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
343 /* Move the pending ones to the end */
344 if (!x->pending && y->pending)
346 if (x->pending && !y->pending)
350 if (time_event_source_latest(x) < time_event_source_latest(y))
352 if (time_event_source_latest(x) > time_event_source_latest(y))
358 static int exit_prioq_compare(const void *a, const void *b) {
359 const sd_event_source *x = a, *y = b;
361 assert(x->type == SOURCE_EXIT);
362 assert(y->type == SOURCE_EXIT);
364 /* Enabled ones first */
365 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
367 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
370 /* Lower priority values first */
371 if (x->priority < y->priority)
373 if (x->priority > y->priority)
379 static void free_clock_data(struct clock_data *d) {
381 assert(d->wakeup == WAKEUP_CLOCK_DATA);
384 prioq_free(d->earliest);
385 prioq_free(d->latest);
388 static void event_free(sd_event *e) {
393 while ((s = e->sources)) {
395 source_disconnect(s);
396 sd_event_source_unref(s);
399 assert(e->n_sources == 0);
401 if (e->default_event_ptr)
402 *(e->default_event_ptr) = NULL;
404 safe_close(e->epoll_fd);
405 safe_close(e->watchdog_fd);
407 free_clock_data(&e->realtime);
408 free_clock_data(&e->boottime);
409 free_clock_data(&e->monotonic);
410 free_clock_data(&e->realtime_alarm);
411 free_clock_data(&e->boottime_alarm);
413 prioq_free(e->pending);
414 prioq_free(e->prepare);
417 free(e->signal_sources);
418 hashmap_free(e->signal_data);
420 hashmap_free(e->child_sources);
421 set_free(e->post_sources);
425 _public_ int sd_event_new(sd_event** ret) {
429 assert_return(ret, -EINVAL);
431 e = new0(sd_event, 1);
436 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
437 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
438 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
439 e->original_pid = getpid();
440 e->perturb = USEC_INFINITY;
442 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
446 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
447 if (e->epoll_fd < 0) {
452 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
453 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
454 e->profile_delays = true;
465 _public_ sd_event* sd_event_ref(sd_event *e) {
470 assert(e->n_ref >= 1);
476 _public_ sd_event* sd_event_unref(sd_event *e) {
481 assert(e->n_ref >= 1);
490 static bool event_pid_changed(sd_event *e) {
493 /* We don't support people creating an event loop and keeping
494 * it around over a fork(). Let's complain. */
496 return e->original_pid != getpid();
499 static void source_io_unregister(sd_event_source *s) {
503 assert(s->type == SOURCE_IO);
505 if (event_pid_changed(s->event))
508 if (!s->io.registered)
511 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
514 strna(s->description), event_source_type_to_string(s->type));
516 s->io.registered = false;
519 static int source_io_register(
524 struct epoll_event ev = {};
528 assert(s->type == SOURCE_IO);
529 assert(enabled != SD_EVENT_OFF);
534 if (enabled == SD_EVENT_ONESHOT)
535 ev.events |= EPOLLONESHOT;
537 if (s->io.registered)
538 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
544 s->io.registered = true;
549 static clockid_t event_source_type_to_clock(EventSourceType t) {
553 case SOURCE_TIME_REALTIME:
554 return CLOCK_REALTIME;
556 case SOURCE_TIME_BOOTTIME:
557 return CLOCK_BOOTTIME;
559 case SOURCE_TIME_MONOTONIC:
560 return CLOCK_MONOTONIC;
562 case SOURCE_TIME_REALTIME_ALARM:
563 return CLOCK_REALTIME_ALARM;
565 case SOURCE_TIME_BOOTTIME_ALARM:
566 return CLOCK_BOOTTIME_ALARM;
569 return (clockid_t) -1;
573 static EventSourceType clock_to_event_source_type(clockid_t clock) {
578 return SOURCE_TIME_REALTIME;
581 return SOURCE_TIME_BOOTTIME;
583 case CLOCK_MONOTONIC:
584 return SOURCE_TIME_MONOTONIC;
586 case CLOCK_REALTIME_ALARM:
587 return SOURCE_TIME_REALTIME_ALARM;
589 case CLOCK_BOOTTIME_ALARM:
590 return SOURCE_TIME_BOOTTIME_ALARM;
593 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
597 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
602 case SOURCE_TIME_REALTIME:
605 case SOURCE_TIME_BOOTTIME:
608 case SOURCE_TIME_MONOTONIC:
609 return &e->monotonic;
611 case SOURCE_TIME_REALTIME_ALARM:
612 return &e->realtime_alarm;
614 case SOURCE_TIME_BOOTTIME_ALARM:
615 return &e->boottime_alarm;
622 static int event_make_signal_data(
625 struct signal_data **ret) {
627 struct epoll_event ev = {};
628 struct signal_data *d;
636 if (event_pid_changed(e))
639 if (e->signal_sources && e->signal_sources[sig])
640 priority = e->signal_sources[sig]->priority;
644 d = hashmap_get(e->signal_data, &priority);
646 if (sigismember(&d->sigset, sig) > 0) {
652 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
656 d = new0(struct signal_data, 1);
660 d->wakeup = WAKEUP_SIGNAL_DATA;
662 d->priority = priority;
664 r = hashmap_put(e->signal_data, &d->priority, d);
674 assert_se(sigaddset(&ss_copy, sig) >= 0);
676 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
695 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
708 d->fd = safe_close(d->fd);
709 hashmap_remove(e->signal_data, &d->priority);
716 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
720 /* Turns off the specified signal in the signal data
721 * object. If the signal mask of the object becomes empty that
724 if (sigismember(&d->sigset, sig) == 0)
727 assert_se(sigdelset(&d->sigset, sig) >= 0);
729 if (sigisemptyset(&d->sigset)) {
731 /* If all the mask is all-zero we can get rid of the structure */
732 hashmap_remove(e->signal_data, &d->priority);
740 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
741 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
744 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
745 struct signal_data *d;
746 static const int64_t zero_priority = 0;
750 /* Rechecks if the specified signal is still something we are
751 * interested in. If not, we'll unmask it, and possibly drop
752 * the signalfd for it. */
754 if (sig == SIGCHLD &&
755 e->n_enabled_child_sources > 0)
758 if (e->signal_sources &&
759 e->signal_sources[sig] &&
760 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
764 * The specified signal might be enabled in three different queues:
766 * 1) the one that belongs to the priority passed (if it is non-NULL)
767 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
768 * 3) the 0 priority (to cover the SIGCHLD case)
770 * Hence, let's remove it from all three here.
774 d = hashmap_get(e->signal_data, priority);
776 event_unmask_signal_data(e, d, sig);
779 if (e->signal_sources && e->signal_sources[sig]) {
780 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
782 event_unmask_signal_data(e, d, sig);
785 d = hashmap_get(e->signal_data, &zero_priority);
787 event_unmask_signal_data(e, d, sig);
790 static void source_disconnect(sd_event_source *s) {
798 assert(s->event->n_sources > 0);
804 source_io_unregister(s);
808 case SOURCE_TIME_REALTIME:
809 case SOURCE_TIME_BOOTTIME:
810 case SOURCE_TIME_MONOTONIC:
811 case SOURCE_TIME_REALTIME_ALARM:
812 case SOURCE_TIME_BOOTTIME_ALARM: {
813 struct clock_data *d;
815 d = event_get_clock_data(s->event, s->type);
818 prioq_remove(d->earliest, s, &s->time.earliest_index);
819 prioq_remove(d->latest, s, &s->time.latest_index);
820 d->needs_rearm = true;
825 if (s->signal.sig > 0) {
827 if (s->event->signal_sources)
828 s->event->signal_sources[s->signal.sig] = NULL;
830 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
836 if (s->child.pid > 0) {
837 if (s->enabled != SD_EVENT_OFF) {
838 assert(s->event->n_enabled_child_sources > 0);
839 s->event->n_enabled_child_sources--;
842 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
843 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
853 set_remove(s->event->post_sources, s);
857 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
861 assert_not_reached("Wut? I shouldn't exist.");
865 prioq_remove(s->event->pending, s, &s->pending_index);
868 prioq_remove(s->event->prepare, s, &s->prepare_index);
872 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
874 LIST_REMOVE(sources, event->sources, s);
878 sd_event_unref(event);
881 static void source_free(sd_event_source *s) {
884 source_disconnect(s);
885 free(s->description);
889 static int source_set_pending(sd_event_source *s, bool b) {
893 assert(s->type != SOURCE_EXIT);
901 s->pending_iteration = s->event->iteration;
903 r = prioq_put(s->event->pending, s, &s->pending_index);
909 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
911 if (EVENT_SOURCE_IS_TIME(s->type)) {
912 struct clock_data *d;
914 d = event_get_clock_data(s->event, s->type);
917 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
918 prioq_reshuffle(d->latest, s, &s->time.latest_index);
919 d->needs_rearm = true;
922 if (s->type == SOURCE_SIGNAL && !b) {
923 struct signal_data *d;
925 d = hashmap_get(s->event->signal_data, &s->priority);
926 if (d && d->current == s)
933 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
938 s = new0(sd_event_source, 1);
944 s->floating = floating;
946 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
951 LIST_PREPEND(sources, e->sources, s);
957 _public_ int sd_event_add_io(
959 sd_event_source **ret,
962 sd_event_io_handler_t callback,
968 assert_return(e, -EINVAL);
969 assert_return(fd >= 0, -EBADF);
970 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
971 assert_return(callback, -EINVAL);
972 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
973 assert_return(!event_pid_changed(e), -ECHILD);
975 s = source_new(e, !ret, SOURCE_IO);
979 s->wakeup = WAKEUP_EVENT_SOURCE;
981 s->io.events = events;
982 s->io.callback = callback;
983 s->userdata = userdata;
984 s->enabled = SD_EVENT_ON;
986 r = source_io_register(s, s->enabled, events);
998 static void initialize_perturb(sd_event *e) {
999 sd_id128_t bootid = {};
1001 /* When we sleep for longer, we try to realign the wakeup to
1002 the same time wihtin each minute/second/250ms, so that
1003 events all across the system can be coalesced into a single
1004 CPU wakeup. However, let's take some system-specific
1005 randomness for this value, so that in a network of systems
1006 with synced clocks timer events are distributed a
1007 bit. Here, we calculate a perturbation usec offset from the
1010 if (_likely_(e->perturb != USEC_INFINITY))
1013 if (sd_id128_get_boot(&bootid) >= 0)
1014 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1017 static int event_setup_timer_fd(
1019 struct clock_data *d,
1022 struct epoll_event ev = {};
1028 if (_likely_(d->fd >= 0))
1031 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1035 ev.events = EPOLLIN;
1038 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1048 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1051 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1054 _public_ int sd_event_add_time(
1056 sd_event_source **ret,
1060 sd_event_time_handler_t callback,
1063 EventSourceType type;
1065 struct clock_data *d;
1068 assert_return(e, -EINVAL);
1069 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1070 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1071 assert_return(!event_pid_changed(e), -ECHILD);
1073 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1076 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1081 callback = time_exit_callback;
1083 d = event_get_clock_data(e, type);
1086 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1090 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1095 r = event_setup_timer_fd(e, d, clock);
1100 s = source_new(e, !ret, type);
1104 s->time.next = usec;
1105 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1106 s->time.callback = callback;
1107 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1108 s->userdata = userdata;
1109 s->enabled = SD_EVENT_ONESHOT;
1111 d->needs_rearm = true;
1113 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1117 r = prioq_put(d->latest, s, &s->time.latest_index);
1131 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1134 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1137 _public_ int sd_event_add_signal(
1139 sd_event_source **ret,
1141 sd_event_signal_handler_t callback,
1145 struct signal_data *d;
1149 assert_return(e, -EINVAL);
1150 assert_return(SIGNAL_VALID(sig), -EINVAL);
1151 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1152 assert_return(!event_pid_changed(e), -ECHILD);
1155 callback = signal_exit_callback;
1157 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1161 if (!sigismember(&ss, sig))
1164 if (!e->signal_sources) {
1165 e->signal_sources = new0(sd_event_source*, _NSIG);
1166 if (!e->signal_sources)
1168 } else if (e->signal_sources[sig])
1171 s = source_new(e, !ret, SOURCE_SIGNAL);
1175 s->signal.sig = sig;
1176 s->signal.callback = callback;
1177 s->userdata = userdata;
1178 s->enabled = SD_EVENT_ON;
1180 e->signal_sources[sig] = s;
1182 r = event_make_signal_data(e, sig, &d);
1188 /* Use the signal name as description for the event source by default */
1189 (void) sd_event_source_set_description(s, signal_to_string(sig));
1197 _public_ int sd_event_add_child(
1199 sd_event_source **ret,
1202 sd_event_child_handler_t callback,
1208 assert_return(e, -EINVAL);
1209 assert_return(pid > 1, -EINVAL);
1210 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1211 assert_return(options != 0, -EINVAL);
1212 assert_return(callback, -EINVAL);
1213 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1214 assert_return(!event_pid_changed(e), -ECHILD);
1216 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1220 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1223 s = source_new(e, !ret, SOURCE_CHILD);
1228 s->child.options = options;
1229 s->child.callback = callback;
1230 s->userdata = userdata;
1231 s->enabled = SD_EVENT_ONESHOT;
1233 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1239 e->n_enabled_child_sources++;
1241 r = event_make_signal_data(e, SIGCHLD, NULL);
1243 e->n_enabled_child_sources--;
1248 e->need_process_child = true;
1256 _public_ int sd_event_add_defer(
1258 sd_event_source **ret,
1259 sd_event_handler_t callback,
1265 assert_return(e, -EINVAL);
1266 assert_return(callback, -EINVAL);
1267 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1268 assert_return(!event_pid_changed(e), -ECHILD);
1270 s = source_new(e, !ret, SOURCE_DEFER);
1274 s->defer.callback = callback;
1275 s->userdata = userdata;
1276 s->enabled = SD_EVENT_ONESHOT;
1278 r = source_set_pending(s, true);
1290 _public_ int sd_event_add_post(
1292 sd_event_source **ret,
1293 sd_event_handler_t callback,
1299 assert_return(e, -EINVAL);
1300 assert_return(callback, -EINVAL);
1301 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1302 assert_return(!event_pid_changed(e), -ECHILD);
1304 r = set_ensure_allocated(&e->post_sources, NULL);
1308 s = source_new(e, !ret, SOURCE_POST);
1312 s->post.callback = callback;
1313 s->userdata = userdata;
1314 s->enabled = SD_EVENT_ON;
1316 r = set_put(e->post_sources, s);
1328 _public_ int sd_event_add_exit(
1330 sd_event_source **ret,
1331 sd_event_handler_t callback,
1337 assert_return(e, -EINVAL);
1338 assert_return(callback, -EINVAL);
1339 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1340 assert_return(!event_pid_changed(e), -ECHILD);
1342 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1346 s = source_new(e, !ret, SOURCE_EXIT);
1350 s->exit.callback = callback;
1351 s->userdata = userdata;
1352 s->exit.prioq_index = PRIOQ_IDX_NULL;
1353 s->enabled = SD_EVENT_ONESHOT;
1355 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1367 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1372 assert(s->n_ref >= 1);
1378 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1383 assert(s->n_ref >= 1);
1386 if (s->n_ref <= 0) {
1387 /* Here's a special hack: when we are called from a
1388 * dispatch handler we won't free the event source
1389 * immediately, but we will detach the fd from the
1390 * epoll. This way it is safe for the caller to unref
1391 * the event source and immediately close the fd, but
1392 * we still retain a valid event source object after
1395 if (s->dispatching) {
1396 if (s->type == SOURCE_IO)
1397 source_io_unregister(s);
1399 source_disconnect(s);
1407 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1408 assert_return(s, -EINVAL);
1409 assert_return(!event_pid_changed(s->event), -ECHILD);
1411 return free_and_strdup(&s->description, description);
1414 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1415 assert_return(s, -EINVAL);
1416 assert_return(description, -EINVAL);
1417 assert_return(s->description, -ENXIO);
1418 assert_return(!event_pid_changed(s->event), -ECHILD);
1420 *description = s->description;
1424 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1425 assert_return(s, NULL);
1430 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1431 assert_return(s, -EINVAL);
1432 assert_return(s->type != SOURCE_EXIT, -EDOM);
1433 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1434 assert_return(!event_pid_changed(s->event), -ECHILD);
1439 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1440 assert_return(s, -EINVAL);
1441 assert_return(s->type == SOURCE_IO, -EDOM);
1442 assert_return(!event_pid_changed(s->event), -ECHILD);
1447 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1450 assert_return(s, -EINVAL);
1451 assert_return(fd >= 0, -EBADF);
1452 assert_return(s->type == SOURCE_IO, -EDOM);
1453 assert_return(!event_pid_changed(s->event), -ECHILD);
1458 if (s->enabled == SD_EVENT_OFF) {
1460 s->io.registered = false;
1464 saved_fd = s->io.fd;
1465 assert(s->io.registered);
1468 s->io.registered = false;
1470 r = source_io_register(s, s->enabled, s->io.events);
1472 s->io.fd = saved_fd;
1473 s->io.registered = true;
1477 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1483 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1484 assert_return(s, -EINVAL);
1485 assert_return(events, -EINVAL);
1486 assert_return(s->type == SOURCE_IO, -EDOM);
1487 assert_return(!event_pid_changed(s->event), -ECHILD);
1489 *events = s->io.events;
1493 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1496 assert_return(s, -EINVAL);
1497 assert_return(s->type == SOURCE_IO, -EDOM);
1498 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1499 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1500 assert_return(!event_pid_changed(s->event), -ECHILD);
1502 /* edge-triggered updates are never skipped, so we can reset edges */
1503 if (s->io.events == events && !(events & EPOLLET))
1506 if (s->enabled != SD_EVENT_OFF) {
1507 r = source_io_register(s, s->enabled, events);
1512 s->io.events = events;
1513 source_set_pending(s, false);
1518 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1519 assert_return(s, -EINVAL);
1520 assert_return(revents, -EINVAL);
1521 assert_return(s->type == SOURCE_IO, -EDOM);
1522 assert_return(s->pending, -ENODATA);
1523 assert_return(!event_pid_changed(s->event), -ECHILD);
1525 *revents = s->io.revents;
1529 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1530 assert_return(s, -EINVAL);
1531 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1532 assert_return(!event_pid_changed(s->event), -ECHILD);
1534 return s->signal.sig;
1537 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1538 assert_return(s, -EINVAL);
1539 assert_return(!event_pid_changed(s->event), -ECHILD);
1541 *priority = s->priority;
1545 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1548 assert_return(s, -EINVAL);
1549 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1550 assert_return(!event_pid_changed(s->event), -ECHILD);
1552 if (s->priority == priority)
1555 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1556 struct signal_data *old, *d;
1558 /* Move us from the signalfd belonging to the old
1559 * priority to the signalfd of the new priority */
1561 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1563 s->priority = priority;
1565 r = event_make_signal_data(s->event, s->signal.sig, &d);
1567 s->priority = old->priority;
1571 event_unmask_signal_data(s->event, old, s->signal.sig);
1573 s->priority = priority;
1576 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1579 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1581 if (s->type == SOURCE_EXIT)
1582 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1587 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1588 assert_return(s, -EINVAL);
1589 assert_return(m, -EINVAL);
1590 assert_return(!event_pid_changed(s->event), -ECHILD);
1596 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1599 assert_return(s, -EINVAL);
1600 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1601 assert_return(!event_pid_changed(s->event), -ECHILD);
1603 /* If we are dead anyway, we are fine with turning off
1604 * sources, but everything else needs to fail. */
1605 if (s->event->state == SD_EVENT_FINISHED)
1606 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1608 if (s->enabled == m)
1611 if (m == SD_EVENT_OFF) {
1616 source_io_unregister(s);
1620 case SOURCE_TIME_REALTIME:
1621 case SOURCE_TIME_BOOTTIME:
1622 case SOURCE_TIME_MONOTONIC:
1623 case SOURCE_TIME_REALTIME_ALARM:
1624 case SOURCE_TIME_BOOTTIME_ALARM: {
1625 struct clock_data *d;
1628 d = event_get_clock_data(s->event, s->type);
1631 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1632 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1633 d->needs_rearm = true;
1640 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1646 assert(s->event->n_enabled_child_sources > 0);
1647 s->event->n_enabled_child_sources--;
1649 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1654 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1663 assert_not_reached("Wut? I shouldn't exist.");
1670 r = source_io_register(s, m, s->io.events);
1677 case SOURCE_TIME_REALTIME:
1678 case SOURCE_TIME_BOOTTIME:
1679 case SOURCE_TIME_MONOTONIC:
1680 case SOURCE_TIME_REALTIME_ALARM:
1681 case SOURCE_TIME_BOOTTIME_ALARM: {
1682 struct clock_data *d;
1685 d = event_get_clock_data(s->event, s->type);
1688 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1689 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1690 d->needs_rearm = true;
1698 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1700 s->enabled = SD_EVENT_OFF;
1701 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1709 if (s->enabled == SD_EVENT_OFF)
1710 s->event->n_enabled_child_sources++;
1714 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1716 s->enabled = SD_EVENT_OFF;
1717 s->event->n_enabled_child_sources--;
1718 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1726 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1735 assert_not_reached("Wut? I shouldn't exist.");
1740 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1743 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1748 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1749 assert_return(s, -EINVAL);
1750 assert_return(usec, -EINVAL);
1751 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1752 assert_return(!event_pid_changed(s->event), -ECHILD);
1754 *usec = s->time.next;
1758 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1759 struct clock_data *d;
1761 assert_return(s, -EINVAL);
1762 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1763 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1764 assert_return(!event_pid_changed(s->event), -ECHILD);
1766 s->time.next = usec;
1768 source_set_pending(s, false);
1770 d = event_get_clock_data(s->event, s->type);
1773 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1774 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1775 d->needs_rearm = true;
1780 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1781 assert_return(s, -EINVAL);
1782 assert_return(usec, -EINVAL);
1783 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1784 assert_return(!event_pid_changed(s->event), -ECHILD);
1786 *usec = s->time.accuracy;
1790 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1791 struct clock_data *d;
1793 assert_return(s, -EINVAL);
1794 assert_return(usec != (uint64_t) -1, -EINVAL);
1795 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1796 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1797 assert_return(!event_pid_changed(s->event), -ECHILD);
1800 usec = DEFAULT_ACCURACY_USEC;
1802 s->time.accuracy = usec;
1804 source_set_pending(s, false);
1806 d = event_get_clock_data(s->event, s->type);
1809 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1810 d->needs_rearm = true;
1815 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1816 assert_return(s, -EINVAL);
1817 assert_return(clock, -EINVAL);
1818 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1819 assert_return(!event_pid_changed(s->event), -ECHILD);
1821 *clock = event_source_type_to_clock(s->type);
1825 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1826 assert_return(s, -EINVAL);
1827 assert_return(pid, -EINVAL);
1828 assert_return(s->type == SOURCE_CHILD, -EDOM);
1829 assert_return(!event_pid_changed(s->event), -ECHILD);
1831 *pid = s->child.pid;
1835 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1838 assert_return(s, -EINVAL);
1839 assert_return(s->type != SOURCE_EXIT, -EDOM);
1840 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1841 assert_return(!event_pid_changed(s->event), -ECHILD);
1843 if (s->prepare == callback)
1846 if (callback && s->prepare) {
1847 s->prepare = callback;
1851 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1855 s->prepare = callback;
1858 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1862 prioq_remove(s->event->prepare, s, &s->prepare_index);
1867 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1868 assert_return(s, NULL);
1873 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1876 assert_return(s, NULL);
1879 s->userdata = userdata;
1884 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1891 if (a >= USEC_INFINITY)
1892 return USEC_INFINITY;
1897 initialize_perturb(e);
1900 Find a good time to wake up again between times a and b. We
1901 have two goals here:
1903 a) We want to wake up as seldom as possible, hence prefer
1904 later times over earlier times.
1906 b) But if we have to wake up, then let's make sure to
1907 dispatch as much as possible on the entire system.
1909 We implement this by waking up everywhere at the same time
1910 within any given minute if we can, synchronised via the
1911 perturbation value determined from the boot ID. If we can't,
1912 then we try to find the same spot in every 10s, then 1s and
1913 then 250ms step. Otherwise, we pick the last possible time
1917 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1919 if (_unlikely_(c < USEC_PER_MINUTE))
1922 c -= USEC_PER_MINUTE;
1928 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1930 if (_unlikely_(c < USEC_PER_SEC*10))
1933 c -= USEC_PER_SEC*10;
1939 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1941 if (_unlikely_(c < USEC_PER_SEC))
1950 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1952 if (_unlikely_(c < USEC_PER_MSEC*250))
1955 c -= USEC_PER_MSEC*250;
1964 static int event_arm_timer(
1966 struct clock_data *d) {
1968 struct itimerspec its = {};
1969 sd_event_source *a, *b;
1976 if (!d->needs_rearm)
1979 d->needs_rearm = false;
1981 a = prioq_peek(d->earliest);
1982 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
1987 if (d->next == USEC_INFINITY)
1991 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1995 d->next = USEC_INFINITY;
1999 b = prioq_peek(d->latest);
2000 assert_se(b && b->enabled != SD_EVENT_OFF);
2002 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2006 assert_se(d->fd >= 0);
2009 /* We don' want to disarm here, just mean some time looooong ago. */
2010 its.it_value.tv_sec = 0;
2011 its.it_value.tv_nsec = 1;
2013 timespec_store(&its.it_value, t);
2015 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2023 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2026 assert(s->type == SOURCE_IO);
2028 /* If the event source was already pending, we just OR in the
2029 * new revents, otherwise we reset the value. The ORing is
2030 * necessary to handle EPOLLONESHOT events properly where
2031 * readability might happen independently of writability, and
2032 * we need to keep track of both */
2035 s->io.revents |= revents;
2037 s->io.revents = revents;
2039 return source_set_pending(s, true);
2042 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2049 assert_return(events == EPOLLIN, -EIO);
2051 ss = read(fd, &x, sizeof(x));
2053 if (errno == EAGAIN || errno == EINTR)
2059 if (_unlikely_(ss != sizeof(x)))
2063 *next = USEC_INFINITY;
2068 static int process_timer(
2071 struct clock_data *d) {
2080 s = prioq_peek(d->earliest);
2083 s->enabled == SD_EVENT_OFF ||
2087 r = source_set_pending(s, true);
2091 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2092 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2093 d->needs_rearm = true;
2099 static int process_child(sd_event *e) {
2106 e->need_process_child = false;
2109 So, this is ugly. We iteratively invoke waitid() with P_PID
2110 + WNOHANG for each PID we wait for, instead of using
2111 P_ALL. This is because we only want to get child
2112 information of very specific child processes, and not all
2113 of them. We might not have processed the SIGCHLD even of a
2114 previous invocation and we don't want to maintain a
2115 unbounded *per-child* event queue, hence we really don't
2116 want anything flushed out of the kernel's queue that we
2117 don't care about. Since this is O(n) this means that if you
2118 have a lot of processes you probably want to handle SIGCHLD
2121 We do not reap the children here (by using WNOWAIT), this
2122 is only done after the event source is dispatched so that
2123 the callback still sees the process as a zombie.
2126 HASHMAP_FOREACH(s, e->child_sources, i) {
2127 assert(s->type == SOURCE_CHILD);
2132 if (s->enabled == SD_EVENT_OFF)
2135 zero(s->child.siginfo);
2136 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2137 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2141 if (s->child.siginfo.si_pid != 0) {
2143 s->child.siginfo.si_code == CLD_EXITED ||
2144 s->child.siginfo.si_code == CLD_KILLED ||
2145 s->child.siginfo.si_code == CLD_DUMPED;
2147 if (!zombie && (s->child.options & WEXITED)) {
2148 /* If the child isn't dead then let's
2149 * immediately remove the state change
2150 * from the queue, since there's no
2151 * benefit in leaving it queued */
2153 assert(s->child.options & (WSTOPPED|WCONTINUED));
2154 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2157 r = source_set_pending(s, true);
2166 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2167 bool read_one = false;
2171 assert_return(events == EPOLLIN, -EIO);
2173 /* If there's a signal queued on this priority and SIGCHLD is
2174 on this priority too, then make sure to recheck the
2175 children we watch. This is because we only ever dequeue
2176 the first signal per priority, and if we dequeue one, and
2177 SIGCHLD might be enqueued later we wouldn't know, but we
2178 might have higher priority children we care about hence we
2179 need to check that explicitly. */
2181 if (sigismember(&d->sigset, SIGCHLD))
2182 e->need_process_child = true;
2184 /* If there's already an event source pending for this
2185 * priority we don't read another */
2190 struct signalfd_siginfo si;
2192 sd_event_source *s = NULL;
2194 n = read(d->fd, &si, sizeof(si));
2196 if (errno == EAGAIN || errno == EINTR)
2202 if (_unlikely_(n != sizeof(si)))
2205 assert(SIGNAL_VALID(si.ssi_signo));
2209 if (e->signal_sources)
2210 s = e->signal_sources[si.ssi_signo];
2216 s->signal.siginfo = si;
2219 r = source_set_pending(s, true);
2227 static int source_dispatch(sd_event_source *s) {
2228 EventSourceType saved_type;
2232 assert(s->pending || s->type == SOURCE_EXIT);
2234 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2236 saved_type = s->type;
2238 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2239 r = source_set_pending(s, false);
2244 if (s->type != SOURCE_POST) {
2248 /* If we execute a non-post source, let's mark all
2249 * post sources as pending */
2251 SET_FOREACH(z, s->event->post_sources, i) {
2252 if (z->enabled == SD_EVENT_OFF)
2255 r = source_set_pending(z, true);
2261 if (s->enabled == SD_EVENT_ONESHOT) {
2262 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2267 s->dispatching = true;
2272 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2275 case SOURCE_TIME_REALTIME:
2276 case SOURCE_TIME_BOOTTIME:
2277 case SOURCE_TIME_MONOTONIC:
2278 case SOURCE_TIME_REALTIME_ALARM:
2279 case SOURCE_TIME_BOOTTIME_ALARM:
2280 r = s->time.callback(s, s->time.next, s->userdata);
2284 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2287 case SOURCE_CHILD: {
2290 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2291 s->child.siginfo.si_code == CLD_KILLED ||
2292 s->child.siginfo.si_code == CLD_DUMPED;
2294 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2296 /* Now, reap the PID for good. */
2298 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2304 r = s->defer.callback(s, s->userdata);
2308 r = s->post.callback(s, s->userdata);
2312 r = s->exit.callback(s, s->userdata);
2315 case SOURCE_WATCHDOG:
2316 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2317 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2318 assert_not_reached("Wut? I shouldn't exist.");
2321 s->dispatching = false;
2324 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2325 strna(s->description), event_source_type_to_string(saved_type));
2330 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2335 static int event_prepare(sd_event *e) {
2343 s = prioq_peek(e->prepare);
2344 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2347 s->prepare_iteration = e->iteration;
2348 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2354 s->dispatching = true;
2355 r = s->prepare(s, s->userdata);
2356 s->dispatching = false;
2359 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2360 strna(s->description), event_source_type_to_string(s->type));
2365 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2371 static int dispatch_exit(sd_event *e) {
2377 p = prioq_peek(e->exit);
2378 if (!p || p->enabled == SD_EVENT_OFF) {
2379 e->state = SD_EVENT_FINISHED;
2385 e->state = SD_EVENT_EXITING;
2387 r = source_dispatch(p);
2389 e->state = SD_EVENT_INITIAL;
2395 static sd_event_source* event_next_pending(sd_event *e) {
2400 p = prioq_peek(e->pending);
2404 if (p->enabled == SD_EVENT_OFF)
2410 static int arm_watchdog(sd_event *e) {
2411 struct itimerspec its = {};
2416 assert(e->watchdog_fd >= 0);
2418 t = sleep_between(e,
2419 e->watchdog_last + (e->watchdog_period / 2),
2420 e->watchdog_last + (e->watchdog_period * 3 / 4));
2422 timespec_store(&its.it_value, t);
2424 /* Make sure we never set the watchdog to 0, which tells the
2425 * kernel to disable it. */
2426 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2427 its.it_value.tv_nsec = 1;
2429 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2436 static int process_watchdog(sd_event *e) {
2442 /* Don't notify watchdog too often */
2443 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2446 sd_notify(false, "WATCHDOG=1");
2447 e->watchdog_last = e->timestamp.monotonic;
2449 return arm_watchdog(e);
2452 _public_ int sd_event_prepare(sd_event *e) {
2455 assert_return(e, -EINVAL);
2456 assert_return(!event_pid_changed(e), -ECHILD);
2457 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2458 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2460 if (e->exit_requested)
2465 e->state = SD_EVENT_PREPARING;
2466 r = event_prepare(e);
2467 e->state = SD_EVENT_INITIAL;
2471 r = event_arm_timer(e, &e->realtime);
2475 r = event_arm_timer(e, &e->boottime);
2479 r = event_arm_timer(e, &e->monotonic);
2483 r = event_arm_timer(e, &e->realtime_alarm);
2487 r = event_arm_timer(e, &e->boottime_alarm);
2491 if (event_next_pending(e) || e->need_process_child)
2494 e->state = SD_EVENT_ARMED;
2499 e->state = SD_EVENT_ARMED;
2500 r = sd_event_wait(e, 0);
2502 e->state = SD_EVENT_ARMED;
2507 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2508 struct epoll_event *ev_queue;
2509 unsigned ev_queue_max;
2512 assert_return(e, -EINVAL);
2513 assert_return(!event_pid_changed(e), -ECHILD);
2514 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2515 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2517 if (e->exit_requested) {
2518 e->state = SD_EVENT_PENDING;
2522 ev_queue_max = MAX(e->n_sources, 1u);
2523 ev_queue = newa(struct epoll_event, ev_queue_max);
2525 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2526 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2528 if (errno == EINTR) {
2529 e->state = SD_EVENT_PENDING;
2537 triple_timestamp_get(&e->timestamp);
2539 for (i = 0; i < m; i++) {
2541 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2542 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2544 WakeupType *t = ev_queue[i].data.ptr;
2548 case WAKEUP_EVENT_SOURCE:
2549 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2552 case WAKEUP_CLOCK_DATA: {
2553 struct clock_data *d = ev_queue[i].data.ptr;
2554 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2558 case WAKEUP_SIGNAL_DATA:
2559 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2563 assert_not_reached("Invalid wake-up pointer");
2570 r = process_watchdog(e);
2574 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2578 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2582 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2586 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2590 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2594 if (e->need_process_child) {
2595 r = process_child(e);
2600 if (event_next_pending(e)) {
2601 e->state = SD_EVENT_PENDING;
2609 e->state = SD_EVENT_INITIAL;
2614 _public_ int sd_event_dispatch(sd_event *e) {
2618 assert_return(e, -EINVAL);
2619 assert_return(!event_pid_changed(e), -ECHILD);
2620 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2621 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2623 if (e->exit_requested)
2624 return dispatch_exit(e);
2626 p = event_next_pending(e);
2630 e->state = SD_EVENT_RUNNING;
2631 r = source_dispatch(p);
2632 e->state = SD_EVENT_INITIAL;
2639 e->state = SD_EVENT_INITIAL;
2644 static void event_log_delays(sd_event *e) {
2645 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2649 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2650 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2653 log_debug("Event loop iterations: %.*s", o, b);
2656 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2659 assert_return(e, -EINVAL);
2660 assert_return(!event_pid_changed(e), -ECHILD);
2661 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2662 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2664 if (e->profile_delays && e->last_run) {
2668 this_run = now(CLOCK_MONOTONIC);
2670 l = u64log2(this_run - e->last_run);
2671 assert(l < sizeof(e->delays));
2674 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2675 event_log_delays(e);
2676 e->last_log = this_run;
2680 r = sd_event_prepare(e);
2682 /* There was nothing? Then wait... */
2683 r = sd_event_wait(e, timeout);
2685 if (e->profile_delays)
2686 e->last_run = now(CLOCK_MONOTONIC);
2689 /* There's something now, then let's dispatch it */
2690 r = sd_event_dispatch(e);
2700 _public_ int sd_event_loop(sd_event *e) {
2703 assert_return(e, -EINVAL);
2704 assert_return(!event_pid_changed(e), -ECHILD);
2705 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2709 while (e->state != SD_EVENT_FINISHED) {
2710 r = sd_event_run(e, (uint64_t) -1);
2722 _public_ int sd_event_get_fd(sd_event *e) {
2724 assert_return(e, -EINVAL);
2725 assert_return(!event_pid_changed(e), -ECHILD);
2730 _public_ int sd_event_get_state(sd_event *e) {
2731 assert_return(e, -EINVAL);
2732 assert_return(!event_pid_changed(e), -ECHILD);
2737 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2738 assert_return(e, -EINVAL);
2739 assert_return(code, -EINVAL);
2740 assert_return(!event_pid_changed(e), -ECHILD);
2742 if (!e->exit_requested)
2745 *code = e->exit_code;
2749 _public_ int sd_event_exit(sd_event *e, int code) {
2750 assert_return(e, -EINVAL);
2751 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2752 assert_return(!event_pid_changed(e), -ECHILD);
2754 e->exit_requested = true;
2755 e->exit_code = code;
2760 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2761 assert_return(e, -EINVAL);
2762 assert_return(usec, -EINVAL);
2763 assert_return(!event_pid_changed(e), -ECHILD);
2765 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2768 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2769 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2770 * the purpose of getting the time this doesn't matter. */
2771 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2774 if (!triple_timestamp_is_set(&e->timestamp)) {
2775 /* Implicitly fall back to now() if we never ran
2776 * before and thus have no cached time. */
2781 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2785 _public_ int sd_event_default(sd_event **ret) {
2787 static thread_local sd_event *default_event = NULL;
2792 return !!default_event;
2794 if (default_event) {
2795 *ret = sd_event_ref(default_event);
2799 r = sd_event_new(&e);
2803 e->default_event_ptr = &default_event;
2811 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2812 assert_return(e, -EINVAL);
2813 assert_return(tid, -EINVAL);
2814 assert_return(!event_pid_changed(e), -ECHILD);
2824 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2827 assert_return(e, -EINVAL);
2828 assert_return(!event_pid_changed(e), -ECHILD);
2830 if (e->watchdog == !!b)
2834 struct epoll_event ev = {};
2836 r = sd_watchdog_enabled(false, &e->watchdog_period);
2840 /* Issue first ping immediately */
2841 sd_notify(false, "WATCHDOG=1");
2842 e->watchdog_last = now(CLOCK_MONOTONIC);
2844 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2845 if (e->watchdog_fd < 0)
2848 r = arm_watchdog(e);
2852 ev.events = EPOLLIN;
2853 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2855 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2862 if (e->watchdog_fd >= 0) {
2863 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2864 e->watchdog_fd = safe_close(e->watchdog_fd);
2872 e->watchdog_fd = safe_close(e->watchdog_fd);
2876 _public_ int sd_event_get_watchdog(sd_event *e) {
2877 assert_return(e, -EINVAL);
2878 assert_return(!event_pid_changed(e), -ECHILD);
2883 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2884 assert_return(e, -EINVAL);
2885 assert_return(!event_pid_changed(e), -ECHILD);
2887 *ret = e->iteration;