2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
24 #include "sd-daemon.h"
28 #include "alloc-util.h"
35 #include "process-util.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType {
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
62 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType {
87 _WAKEUP_TYPE_INVALID = -1,
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92 struct sd_event_source {
99 sd_event_handler_t prepare;
103 EventSourceType type:5;
110 unsigned pending_index;
111 unsigned prepare_index;
112 unsigned pending_iteration;
113 unsigned prepare_iteration;
115 LIST_FIELDS(sd_event_source, sources);
119 sd_event_io_handler_t callback;
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
137 sd_event_child_handler_t callback;
143 sd_event_handler_t callback;
146 sd_event_handler_t callback;
149 sd_event_handler_t callback;
150 unsigned prioq_index;
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source *current;
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
199 struct clock_data boottime;
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
219 dual_timestamp timestamp;
220 usec_t timestamp_boottime;
223 bool exit_requested:1;
224 bool need_process_child:1;
226 bool profile_delays:1;
231 sd_event **default_event_ptr;
233 usec_t watchdog_last, watchdog_period;
237 LIST_HEAD(sd_event_source, sources);
239 usec_t last_run, last_log;
240 unsigned delays[sizeof(usec_t) * 8];
243 static void source_disconnect(sd_event_source *s);
245 static int pending_prioq_compare(const void *a, const void *b) {
246 const sd_event_source *x = a, *y = b;
251 /* Enabled ones first */
252 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
254 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
257 /* Lower priority values first */
258 if (x->priority < y->priority)
260 if (x->priority > y->priority)
263 /* Older entries first */
264 if (x->pending_iteration < y->pending_iteration)
266 if (x->pending_iteration > y->pending_iteration)
272 static int prepare_prioq_compare(const void *a, const void *b) {
273 const sd_event_source *x = a, *y = b;
278 /* Enabled ones first */
279 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
281 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
284 /* Move most recently prepared ones last, so that we can stop
285 * preparing as soon as we hit one that has already been
286 * prepared in the current iteration */
287 if (x->prepare_iteration < y->prepare_iteration)
289 if (x->prepare_iteration > y->prepare_iteration)
292 /* Lower priority values first */
293 if (x->priority < y->priority)
295 if (x->priority > y->priority)
301 static int earliest_time_prioq_compare(const void *a, const void *b) {
302 const sd_event_source *x = a, *y = b;
304 assert(EVENT_SOURCE_IS_TIME(x->type));
305 assert(x->type == y->type);
307 /* Enabled ones first */
308 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
310 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
313 /* Move the pending ones to the end */
314 if (!x->pending && y->pending)
316 if (x->pending && !y->pending)
320 if (x->time.next < y->time.next)
322 if (x->time.next > y->time.next)
328 static usec_t time_event_source_latest(const sd_event_source *s) {
329 return usec_add(s->time.next, s->time.accuracy);
332 static int latest_time_prioq_compare(const void *a, const void *b) {
333 const sd_event_source *x = a, *y = b;
335 assert(EVENT_SOURCE_IS_TIME(x->type));
336 assert(x->type == y->type);
338 /* Enabled ones first */
339 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
341 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
344 /* Move the pending ones to the end */
345 if (!x->pending && y->pending)
347 if (x->pending && !y->pending)
351 if (time_event_source_latest(x) < time_event_source_latest(y))
353 if (time_event_source_latest(x) > time_event_source_latest(y))
359 static int exit_prioq_compare(const void *a, const void *b) {
360 const sd_event_source *x = a, *y = b;
362 assert(x->type == SOURCE_EXIT);
363 assert(y->type == SOURCE_EXIT);
365 /* Enabled ones first */
366 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
368 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
371 /* Lower priority values first */
372 if (x->priority < y->priority)
374 if (x->priority > y->priority)
380 static void free_clock_data(struct clock_data *d) {
382 assert(d->wakeup == WAKEUP_CLOCK_DATA);
385 prioq_free(d->earliest);
386 prioq_free(d->latest);
389 static void event_free(sd_event *e) {
394 while ((s = e->sources)) {
396 source_disconnect(s);
397 sd_event_source_unref(s);
400 assert(e->n_sources == 0);
402 if (e->default_event_ptr)
403 *(e->default_event_ptr) = NULL;
405 safe_close(e->epoll_fd);
406 safe_close(e->watchdog_fd);
408 free_clock_data(&e->realtime);
409 free_clock_data(&e->boottime);
410 free_clock_data(&e->monotonic);
411 free_clock_data(&e->realtime_alarm);
412 free_clock_data(&e->boottime_alarm);
414 prioq_free(e->pending);
415 prioq_free(e->prepare);
418 free(e->signal_sources);
419 hashmap_free(e->signal_data);
421 hashmap_free(e->child_sources);
422 set_free(e->post_sources);
426 _public_ int sd_event_new(sd_event** ret) {
430 assert_return(ret, -EINVAL);
432 e = new0(sd_event, 1);
437 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
438 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
439 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
440 e->original_pid = getpid();
441 e->perturb = USEC_INFINITY;
443 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
447 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
448 if (e->epoll_fd < 0) {
453 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
454 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
455 e->profile_delays = true;
466 _public_ sd_event* sd_event_ref(sd_event *e) {
471 assert(e->n_ref >= 1);
477 _public_ sd_event* sd_event_unref(sd_event *e) {
482 assert(e->n_ref >= 1);
491 static bool event_pid_changed(sd_event *e) {
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
497 return e->original_pid != getpid();
500 static void source_io_unregister(sd_event_source *s) {
504 assert(s->type == SOURCE_IO);
506 if (event_pid_changed(s->event))
509 if (!s->io.registered)
512 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
514 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s->description), event_source_type_to_string(s->type));
517 s->io.registered = false;
520 static int source_io_register(
525 struct epoll_event ev = {};
529 assert(s->type == SOURCE_IO);
530 assert(enabled != SD_EVENT_OFF);
535 if (enabled == SD_EVENT_ONESHOT)
536 ev.events |= EPOLLONESHOT;
538 if (s->io.registered)
539 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
541 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
545 s->io.registered = true;
550 #if 0 /// UNNEEDED by elogind
551 static clockid_t event_source_type_to_clock(EventSourceType t) {
555 case SOURCE_TIME_REALTIME:
556 return CLOCK_REALTIME;
558 case SOURCE_TIME_BOOTTIME:
559 return CLOCK_BOOTTIME;
561 case SOURCE_TIME_MONOTONIC:
562 return CLOCK_MONOTONIC;
564 case SOURCE_TIME_REALTIME_ALARM:
565 return CLOCK_REALTIME_ALARM;
567 case SOURCE_TIME_BOOTTIME_ALARM:
568 return CLOCK_BOOTTIME_ALARM;
571 return (clockid_t) -1;
576 static EventSourceType clock_to_event_source_type(clockid_t clock) {
581 return SOURCE_TIME_REALTIME;
584 return SOURCE_TIME_BOOTTIME;
586 case CLOCK_MONOTONIC:
587 return SOURCE_TIME_MONOTONIC;
589 case CLOCK_REALTIME_ALARM:
590 return SOURCE_TIME_REALTIME_ALARM;
592 case CLOCK_BOOTTIME_ALARM:
593 return SOURCE_TIME_BOOTTIME_ALARM;
596 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
600 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
605 case SOURCE_TIME_REALTIME:
608 case SOURCE_TIME_BOOTTIME:
611 case SOURCE_TIME_MONOTONIC:
612 return &e->monotonic;
614 case SOURCE_TIME_REALTIME_ALARM:
615 return &e->realtime_alarm;
617 case SOURCE_TIME_BOOTTIME_ALARM:
618 return &e->boottime_alarm;
625 static int event_make_signal_data(
628 struct signal_data **ret) {
630 struct epoll_event ev = {};
631 struct signal_data *d;
639 if (event_pid_changed(e))
642 if (e->signal_sources && e->signal_sources[sig])
643 priority = e->signal_sources[sig]->priority;
647 d = hashmap_get(e->signal_data, &priority);
649 if (sigismember(&d->sigset, sig) > 0) {
655 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
659 d = new0(struct signal_data, 1);
663 d->wakeup = WAKEUP_SIGNAL_DATA;
665 d->priority = priority;
667 r = hashmap_put(e->signal_data, &d->priority, d);
677 assert_se(sigaddset(&ss_copy, sig) >= 0);
679 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
698 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
711 d->fd = safe_close(d->fd);
712 hashmap_remove(e->signal_data, &d->priority);
719 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
723 /* Turns off the specified signal in the signal data
724 * object. If the signal mask of the object becomes empty that
727 if (sigismember(&d->sigset, sig) == 0)
730 assert_se(sigdelset(&d->sigset, sig) >= 0);
732 if (sigisemptyset(&d->sigset)) {
734 /* If all the mask is all-zero we can get rid of the structure */
735 hashmap_remove(e->signal_data, &d->priority);
744 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
745 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
748 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
749 struct signal_data *d;
750 static const int64_t zero_priority = 0;
754 /* Rechecks if the specified signal is still something we are
755 * interested in. If not, we'll unmask it, and possibly drop
756 * the signalfd for it. */
758 if (sig == SIGCHLD &&
759 e->n_enabled_child_sources > 0)
762 if (e->signal_sources &&
763 e->signal_sources[sig] &&
764 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
768 * The specified signal might be enabled in three different queues:
770 * 1) the one that belongs to the priority passed (if it is non-NULL)
771 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
772 * 3) the 0 priority (to cover the SIGCHLD case)
774 * Hence, let's remove it from all three here.
778 d = hashmap_get(e->signal_data, priority);
780 event_unmask_signal_data(e, d, sig);
783 if (e->signal_sources && e->signal_sources[sig]) {
784 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
786 event_unmask_signal_data(e, d, sig);
789 d = hashmap_get(e->signal_data, &zero_priority);
791 event_unmask_signal_data(e, d, sig);
794 static void source_disconnect(sd_event_source *s) {
802 assert(s->event->n_sources > 0);
808 source_io_unregister(s);
812 case SOURCE_TIME_REALTIME:
813 case SOURCE_TIME_BOOTTIME:
814 case SOURCE_TIME_MONOTONIC:
815 case SOURCE_TIME_REALTIME_ALARM:
816 case SOURCE_TIME_BOOTTIME_ALARM: {
817 struct clock_data *d;
819 d = event_get_clock_data(s->event, s->type);
822 prioq_remove(d->earliest, s, &s->time.earliest_index);
823 prioq_remove(d->latest, s, &s->time.latest_index);
824 d->needs_rearm = true;
829 if (s->signal.sig > 0) {
831 if (s->event->signal_sources)
832 s->event->signal_sources[s->signal.sig] = NULL;
834 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
840 if (s->child.pid > 0) {
841 if (s->enabled != SD_EVENT_OFF) {
842 assert(s->event->n_enabled_child_sources > 0);
843 s->event->n_enabled_child_sources--;
846 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
847 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
857 set_remove(s->event->post_sources, s);
861 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
865 assert_not_reached("Wut? I shouldn't exist.");
869 prioq_remove(s->event->pending, s, &s->pending_index);
872 prioq_remove(s->event->prepare, s, &s->prepare_index);
876 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
878 LIST_REMOVE(sources, event->sources, s);
882 sd_event_unref(event);
885 static void source_free(sd_event_source *s) {
888 source_disconnect(s);
889 free(s->description);
893 static int source_set_pending(sd_event_source *s, bool b) {
897 assert(s->type != SOURCE_EXIT);
905 s->pending_iteration = s->event->iteration;
907 r = prioq_put(s->event->pending, s, &s->pending_index);
913 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
915 if (EVENT_SOURCE_IS_TIME(s->type)) {
916 struct clock_data *d;
918 d = event_get_clock_data(s->event, s->type);
921 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
922 prioq_reshuffle(d->latest, s, &s->time.latest_index);
923 d->needs_rearm = true;
926 if (s->type == SOURCE_SIGNAL && !b) {
927 struct signal_data *d;
929 d = hashmap_get(s->event->signal_data, &s->priority);
930 if (d && d->current == s)
937 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
942 s = new0(sd_event_source, 1);
948 s->floating = floating;
950 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
955 LIST_PREPEND(sources, e->sources, s);
961 _public_ int sd_event_add_io(
963 sd_event_source **ret,
966 sd_event_io_handler_t callback,
972 assert_return(e, -EINVAL);
973 assert_return(fd >= 0, -EBADF);
974 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
975 assert_return(callback, -EINVAL);
976 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
977 assert_return(!event_pid_changed(e), -ECHILD);
979 s = source_new(e, !ret, SOURCE_IO);
983 s->wakeup = WAKEUP_EVENT_SOURCE;
985 s->io.events = events;
986 s->io.callback = callback;
987 s->userdata = userdata;
988 s->enabled = SD_EVENT_ON;
990 r = source_io_register(s, s->enabled, events);
1002 static void initialize_perturb(sd_event *e) {
1003 sd_id128_t bootid = {};
1005 /* When we sleep for longer, we try to realign the wakeup to
1006 the same time wihtin each minute/second/250ms, so that
1007 events all across the system can be coalesced into a single
1008 CPU wakeup. However, let's take some system-specific
1009 randomness for this value, so that in a network of systems
1010 with synced clocks timer events are distributed a
1011 bit. Here, we calculate a perturbation usec offset from the
1014 if (_likely_(e->perturb != USEC_INFINITY))
1017 if (sd_id128_get_boot(&bootid) >= 0)
1018 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1021 static int event_setup_timer_fd(
1023 struct clock_data *d,
1026 struct epoll_event ev = {};
1032 if (_likely_(d->fd >= 0))
1035 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1039 ev.events = EPOLLIN;
1042 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1052 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1055 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1058 _public_ int sd_event_add_time(
1060 sd_event_source **ret,
1064 sd_event_time_handler_t callback,
1067 EventSourceType type;
1069 struct clock_data *d;
1072 assert_return(e, -EINVAL);
1073 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1074 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1075 assert_return(!event_pid_changed(e), -ECHILD);
1078 callback = time_exit_callback;
1080 type = clock_to_event_source_type(clock);
1081 assert_return(type >= 0, -EOPNOTSUPP);
1083 d = event_get_clock_data(e, type);
1086 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1090 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1095 r = event_setup_timer_fd(e, d, clock);
1100 s = source_new(e, !ret, type);
1104 s->time.next = usec;
1105 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1106 s->time.callback = callback;
1107 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1108 s->userdata = userdata;
1109 s->enabled = SD_EVENT_ONESHOT;
1111 d->needs_rearm = true;
1113 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1117 r = prioq_put(d->latest, s, &s->time.latest_index);
1131 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1134 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1137 _public_ int sd_event_add_signal(
1139 sd_event_source **ret,
1141 sd_event_signal_handler_t callback,
1145 struct signal_data *d;
1149 assert_return(e, -EINVAL);
1150 assert_return(SIGNAL_VALID(sig), -EINVAL);
1151 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1152 assert_return(!event_pid_changed(e), -ECHILD);
1155 callback = signal_exit_callback;
1157 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1161 if (!sigismember(&ss, sig))
1164 if (!e->signal_sources) {
1165 e->signal_sources = new0(sd_event_source*, _NSIG);
1166 if (!e->signal_sources)
1168 } else if (e->signal_sources[sig])
1171 s = source_new(e, !ret, SOURCE_SIGNAL);
1175 s->signal.sig = sig;
1176 s->signal.callback = callback;
1177 s->userdata = userdata;
1178 s->enabled = SD_EVENT_ON;
1180 e->signal_sources[sig] = s;
1182 r = event_make_signal_data(e, sig, &d);
1188 /* Use the signal name as description for the event source by default */
1189 (void) sd_event_source_set_description(s, signal_to_string(sig));
1197 #if 0 /// UNNEEDED by elogind
1198 _public_ int sd_event_add_child(
1200 sd_event_source **ret,
1203 sd_event_child_handler_t callback,
1209 assert_return(e, -EINVAL);
1210 assert_return(pid > 1, -EINVAL);
1211 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1212 assert_return(options != 0, -EINVAL);
1213 assert_return(callback, -EINVAL);
1214 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1215 assert_return(!event_pid_changed(e), -ECHILD);
1217 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1221 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1224 s = source_new(e, !ret, SOURCE_CHILD);
1229 s->child.options = options;
1230 s->child.callback = callback;
1231 s->userdata = userdata;
1232 s->enabled = SD_EVENT_ONESHOT;
1234 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1240 e->n_enabled_child_sources ++;
1242 r = event_make_signal_data(e, SIGCHLD, NULL);
1244 e->n_enabled_child_sources--;
1249 e->need_process_child = true;
1257 _public_ int sd_event_add_defer(
1259 sd_event_source **ret,
1260 sd_event_handler_t callback,
1266 assert_return(e, -EINVAL);
1267 assert_return(callback, -EINVAL);
1268 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1269 assert_return(!event_pid_changed(e), -ECHILD);
1271 s = source_new(e, !ret, SOURCE_DEFER);
1275 s->defer.callback = callback;
1276 s->userdata = userdata;
1277 s->enabled = SD_EVENT_ONESHOT;
1279 r = source_set_pending(s, true);
1292 _public_ int sd_event_add_post(
1294 sd_event_source **ret,
1295 sd_event_handler_t callback,
1301 assert_return(e, -EINVAL);
1302 assert_return(callback, -EINVAL);
1303 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1304 assert_return(!event_pid_changed(e), -ECHILD);
1306 r = set_ensure_allocated(&e->post_sources, NULL);
1310 s = source_new(e, !ret, SOURCE_POST);
1314 s->post.callback = callback;
1315 s->userdata = userdata;
1316 s->enabled = SD_EVENT_ON;
1318 r = set_put(e->post_sources, s);
1330 _public_ int sd_event_add_exit(
1332 sd_event_source **ret,
1333 sd_event_handler_t callback,
1339 assert_return(e, -EINVAL);
1340 assert_return(callback, -EINVAL);
1341 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1342 assert_return(!event_pid_changed(e), -ECHILD);
1344 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1348 s = source_new(e, !ret, SOURCE_EXIT);
1352 s->exit.callback = callback;
1353 s->userdata = userdata;
1354 s->exit.prioq_index = PRIOQ_IDX_NULL;
1355 s->enabled = SD_EVENT_ONESHOT;
1357 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1369 #if 0 /// UNNEEDED by elogind
1370 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1375 assert(s->n_ref >= 1);
1382 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1387 assert(s->n_ref >= 1);
1390 if (s->n_ref <= 0) {
1391 /* Here's a special hack: when we are called from a
1392 * dispatch handler we won't free the event source
1393 * immediately, but we will detach the fd from the
1394 * epoll. This way it is safe for the caller to unref
1395 * the event source and immediately close the fd, but
1396 * we still retain a valid event source object after
1399 if (s->dispatching) {
1400 if (s->type == SOURCE_IO)
1401 source_io_unregister(s);
1403 source_disconnect(s);
1411 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1412 assert_return(s, -EINVAL);
1413 assert_return(!event_pid_changed(s->event), -ECHILD);
1415 return free_and_strdup(&s->description, description);
1418 #if 0 /// UNNEEDED by elogind
1419 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1420 assert_return(s, -EINVAL);
1421 assert_return(description, -EINVAL);
1422 assert_return(s->description, -ENXIO);
1423 assert_return(!event_pid_changed(s->event), -ECHILD);
1425 *description = s->description;
1430 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1431 assert_return(s, NULL);
1436 #if 0 /// UNNEEDED by elogind
1437 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1438 assert_return(s, -EINVAL);
1439 assert_return(s->type != SOURCE_EXIT, -EDOM);
1440 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1441 assert_return(!event_pid_changed(s->event), -ECHILD);
1446 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1447 assert_return(s, -EINVAL);
1448 assert_return(s->type == SOURCE_IO, -EDOM);
1449 assert_return(!event_pid_changed(s->event), -ECHILD);
1455 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1458 assert_return(s, -EINVAL);
1459 assert_return(fd >= 0, -EBADF);
1460 assert_return(s->type == SOURCE_IO, -EDOM);
1461 assert_return(!event_pid_changed(s->event), -ECHILD);
1466 if (s->enabled == SD_EVENT_OFF) {
1468 s->io.registered = false;
1472 saved_fd = s->io.fd;
1473 assert(s->io.registered);
1476 s->io.registered = false;
1478 r = source_io_register(s, s->enabled, s->io.events);
1480 s->io.fd = saved_fd;
1481 s->io.registered = true;
1485 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1491 #if 0 /// UNNEEDED by elogind
1492 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1493 assert_return(s, -EINVAL);
1494 assert_return(events, -EINVAL);
1495 assert_return(s->type == SOURCE_IO, -EDOM);
1496 assert_return(!event_pid_changed(s->event), -ECHILD);
1498 *events = s->io.events;
1503 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1506 assert_return(s, -EINVAL);
1507 assert_return(s->type == SOURCE_IO, -EDOM);
1508 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1509 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1510 assert_return(!event_pid_changed(s->event), -ECHILD);
1512 /* edge-triggered updates are never skipped, so we can reset edges */
1513 if (s->io.events == events && !(events & EPOLLET))
1516 if (s->enabled != SD_EVENT_OFF) {
1517 r = source_io_register(s, s->enabled, events);
1522 s->io.events = events;
1523 source_set_pending(s, false);
1528 #if 0 /// UNNEEDED by elogind
1529 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1530 assert_return(s, -EINVAL);
1531 assert_return(revents, -EINVAL);
1532 assert_return(s->type == SOURCE_IO, -EDOM);
1533 assert_return(s->pending, -ENODATA);
1534 assert_return(!event_pid_changed(s->event), -ECHILD);
1536 *revents = s->io.revents;
1540 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1541 assert_return(s, -EINVAL);
1542 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1543 assert_return(!event_pid_changed(s->event), -ECHILD);
1545 return s->signal.sig;
1548 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1549 assert_return(s, -EINVAL);
1550 assert_return(!event_pid_changed(s->event), -ECHILD);
1556 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1559 assert_return(s, -EINVAL);
1560 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1561 assert_return(!event_pid_changed(s->event), -ECHILD);
1563 if (s->priority == priority)
1566 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1567 struct signal_data *old, *d;
1569 /* Move us from the signalfd belonging to the old
1570 * priority to the signalfd of the new priority */
1572 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1574 s->priority = priority;
1576 r = event_make_signal_data(s->event, s->signal.sig, &d);
1578 s->priority = old->priority;
1582 event_unmask_signal_data(s->event, old, s->signal.sig);
1584 s->priority = priority;
1587 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1590 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1592 if (s->type == SOURCE_EXIT)
1593 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1598 #if 0 /// UNNEEDED by elogind
1599 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1600 assert_return(s, -EINVAL);
1601 assert_return(m, -EINVAL);
1602 assert_return(!event_pid_changed(s->event), -ECHILD);
1609 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1612 assert_return(s, -EINVAL);
1613 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1614 assert_return(!event_pid_changed(s->event), -ECHILD);
1616 /* If we are dead anyway, we are fine with turning off
1617 * sources, but everything else needs to fail. */
1618 if (s->event->state == SD_EVENT_FINISHED)
1619 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1621 if (s->enabled == m)
1624 if (m == SD_EVENT_OFF) {
1629 source_io_unregister(s);
1633 case SOURCE_TIME_REALTIME:
1634 case SOURCE_TIME_BOOTTIME:
1635 case SOURCE_TIME_MONOTONIC:
1636 case SOURCE_TIME_REALTIME_ALARM:
1637 case SOURCE_TIME_BOOTTIME_ALARM: {
1638 struct clock_data *d;
1641 d = event_get_clock_data(s->event, s->type);
1644 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1645 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1646 d->needs_rearm = true;
1653 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1659 assert(s->event->n_enabled_child_sources > 0);
1660 s->event->n_enabled_child_sources--;
1662 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1667 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1676 assert_not_reached("Wut? I shouldn't exist.");
1683 r = source_io_register(s, m, s->io.events);
1690 case SOURCE_TIME_REALTIME:
1691 case SOURCE_TIME_BOOTTIME:
1692 case SOURCE_TIME_MONOTONIC:
1693 case SOURCE_TIME_REALTIME_ALARM:
1694 case SOURCE_TIME_BOOTTIME_ALARM: {
1695 struct clock_data *d;
1698 d = event_get_clock_data(s->event, s->type);
1701 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1702 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1703 d->needs_rearm = true;
1711 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1713 s->enabled = SD_EVENT_OFF;
1714 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1722 if (s->enabled == SD_EVENT_OFF)
1723 s->event->n_enabled_child_sources++;
1727 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1729 s->enabled = SD_EVENT_OFF;
1730 s->event->n_enabled_child_sources--;
1731 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1739 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1748 assert_not_reached("Wut? I shouldn't exist.");
1753 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1756 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1761 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1762 assert_return(s, -EINVAL);
1763 assert_return(usec, -EINVAL);
1764 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1765 assert_return(!event_pid_changed(s->event), -ECHILD);
1767 *usec = s->time.next;
1771 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1772 struct clock_data *d;
1774 assert_return(s, -EINVAL);
1775 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1776 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1777 assert_return(!event_pid_changed(s->event), -ECHILD);
1779 s->time.next = usec;
1781 source_set_pending(s, false);
1783 d = event_get_clock_data(s->event, s->type);
1786 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1787 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1788 d->needs_rearm = true;
1793 #if 0 /// UNNEEDED by elogind
1794 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1795 assert_return(s, -EINVAL);
1796 assert_return(usec, -EINVAL);
1797 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1798 assert_return(!event_pid_changed(s->event), -ECHILD);
1800 *usec = s->time.accuracy;
1804 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1805 struct clock_data *d;
1807 assert_return(s, -EINVAL);
1808 assert_return(usec != (uint64_t) -1, -EINVAL);
1809 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1810 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1811 assert_return(!event_pid_changed(s->event), -ECHILD);
1814 usec = DEFAULT_ACCURACY_USEC;
1816 s->time.accuracy = usec;
1818 source_set_pending(s, false);
1820 d = event_get_clock_data(s->event, s->type);
1823 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1824 d->needs_rearm = true;
1829 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1830 assert_return(s, -EINVAL);
1831 assert_return(clock, -EINVAL);
1832 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1833 assert_return(!event_pid_changed(s->event), -ECHILD);
1835 *clock = event_source_type_to_clock(s->type);
1839 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1840 assert_return(s, -EINVAL);
1841 assert_return(pid, -EINVAL);
1842 assert_return(s->type == SOURCE_CHILD, -EDOM);
1843 assert_return(!event_pid_changed(s->event), -ECHILD);
1845 *pid = s->child.pid;
1850 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1853 assert_return(s, -EINVAL);
1854 assert_return(s->type != SOURCE_EXIT, -EDOM);
1855 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1856 assert_return(!event_pid_changed(s->event), -ECHILD);
1858 if (s->prepare == callback)
1861 if (callback && s->prepare) {
1862 s->prepare = callback;
1866 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1870 s->prepare = callback;
1873 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1877 prioq_remove(s->event->prepare, s, &s->prepare_index);
1882 #if 0 /// UNNEEDED by elogind
1883 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1884 assert_return(s, NULL);
1889 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1892 assert_return(s, NULL);
1895 s->userdata = userdata;
1901 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1908 if (a >= USEC_INFINITY)
1909 return USEC_INFINITY;
1914 initialize_perturb(e);
1917 Find a good time to wake up again between times a and b. We
1918 have two goals here:
1920 a) We want to wake up as seldom as possible, hence prefer
1921 later times over earlier times.
1923 b) But if we have to wake up, then let's make sure to
1924 dispatch as much as possible on the entire system.
1926 We implement this by waking up everywhere at the same time
1927 within any given minute if we can, synchronised via the
1928 perturbation value determined from the boot ID. If we can't,
1929 then we try to find the same spot in every 10s, then 1s and
1930 then 250ms step. Otherwise, we pick the last possible time
1934 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1936 if (_unlikely_(c < USEC_PER_MINUTE))
1939 c -= USEC_PER_MINUTE;
1945 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1947 if (_unlikely_(c < USEC_PER_SEC*10))
1950 c -= USEC_PER_SEC*10;
1956 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1958 if (_unlikely_(c < USEC_PER_SEC))
1967 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1969 if (_unlikely_(c < USEC_PER_MSEC*250))
1972 c -= USEC_PER_MSEC*250;
1981 static int event_arm_timer(
1983 struct clock_data *d) {
1985 struct itimerspec its = {};
1986 sd_event_source *a, *b;
1993 if (!d->needs_rearm)
1996 d->needs_rearm = false;
1998 a = prioq_peek(d->earliest);
1999 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2004 if (d->next == USEC_INFINITY)
2008 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2012 d->next = USEC_INFINITY;
2016 b = prioq_peek(d->latest);
2017 assert_se(b && b->enabled != SD_EVENT_OFF);
2019 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2023 assert_se(d->fd >= 0);
2026 /* We don' want to disarm here, just mean some time looooong ago. */
2027 its.it_value.tv_sec = 0;
2028 its.it_value.tv_nsec = 1;
2030 timespec_store(&its.it_value, t);
2032 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2040 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2043 assert(s->type == SOURCE_IO);
2045 /* If the event source was already pending, we just OR in the
2046 * new revents, otherwise we reset the value. The ORing is
2047 * necessary to handle EPOLLONESHOT events properly where
2048 * readability might happen independently of writability, and
2049 * we need to keep track of both */
2052 s->io.revents |= revents;
2054 s->io.revents = revents;
2056 return source_set_pending(s, true);
2059 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2066 assert_return(events == EPOLLIN, -EIO);
2068 ss = read(fd, &x, sizeof(x));
2070 if (errno == EAGAIN || errno == EINTR)
2076 if (_unlikely_(ss != sizeof(x)))
2080 *next = USEC_INFINITY;
2085 static int process_timer(
2088 struct clock_data *d) {
2097 s = prioq_peek(d->earliest);
2100 s->enabled == SD_EVENT_OFF ||
2104 r = source_set_pending(s, true);
2108 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2109 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2110 d->needs_rearm = true;
2116 static int process_child(sd_event *e) {
2123 e->need_process_child = false;
2126 So, this is ugly. We iteratively invoke waitid() with P_PID
2127 + WNOHANG for each PID we wait for, instead of using
2128 P_ALL. This is because we only want to get child
2129 information of very specific child processes, and not all
2130 of them. We might not have processed the SIGCHLD even of a
2131 previous invocation and we don't want to maintain a
2132 unbounded *per-child* event queue, hence we really don't
2133 want anything flushed out of the kernel's queue that we
2134 don't care about. Since this is O(n) this means that if you
2135 have a lot of processes you probably want to handle SIGCHLD
2138 We do not reap the children here (by using WNOWAIT), this
2139 is only done after the event source is dispatched so that
2140 the callback still sees the process as a zombie.
2143 HASHMAP_FOREACH(s, e->child_sources, i) {
2144 assert(s->type == SOURCE_CHILD);
2149 if (s->enabled == SD_EVENT_OFF)
2152 zero(s->child.siginfo);
2153 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2154 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2158 if (s->child.siginfo.si_pid != 0) {
2160 s->child.siginfo.si_code == CLD_EXITED ||
2161 s->child.siginfo.si_code == CLD_KILLED ||
2162 s->child.siginfo.si_code == CLD_DUMPED;
2164 if (!zombie && (s->child.options & WEXITED)) {
2165 /* If the child isn't dead then let's
2166 * immediately remove the state change
2167 * from the queue, since there's no
2168 * benefit in leaving it queued */
2170 assert(s->child.options & (WSTOPPED|WCONTINUED));
2171 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2174 r = source_set_pending(s, true);
2183 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2184 bool read_one = false;
2188 assert_return(events == EPOLLIN, -EIO);
2190 /* If there's a signal queued on this priority and SIGCHLD is
2191 on this priority too, then make sure to recheck the
2192 children we watch. This is because we only ever dequeue
2193 the first signal per priority, and if we dequeue one, and
2194 SIGCHLD might be enqueued later we wouldn't know, but we
2195 might have higher priority children we care about hence we
2196 need to check that explicitly. */
2198 if (sigismember(&d->sigset, SIGCHLD))
2199 e->need_process_child = true;
2201 /* If there's already an event source pending for this
2202 * priority we don't read another */
2207 struct signalfd_siginfo si;
2209 sd_event_source *s = NULL;
2211 n = read(d->fd, &si, sizeof(si));
2213 if (errno == EAGAIN || errno == EINTR)
2219 if (_unlikely_(n != sizeof(si)))
2222 assert(SIGNAL_VALID(si.ssi_signo));
2226 if (e->signal_sources)
2227 s = e->signal_sources[si.ssi_signo];
2233 s->signal.siginfo = si;
2236 r = source_set_pending(s, true);
2244 static int source_dispatch(sd_event_source *s) {
2248 assert(s->pending || s->type == SOURCE_EXIT);
2250 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2251 r = source_set_pending(s, false);
2256 if (s->type != SOURCE_POST) {
2260 /* If we execute a non-post source, let's mark all
2261 * post sources as pending */
2263 SET_FOREACH(z, s->event->post_sources, i) {
2264 if (z->enabled == SD_EVENT_OFF)
2267 r = source_set_pending(z, true);
2273 if (s->enabled == SD_EVENT_ONESHOT) {
2274 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2279 s->dispatching = true;
2284 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2287 case SOURCE_TIME_REALTIME:
2288 case SOURCE_TIME_BOOTTIME:
2289 case SOURCE_TIME_MONOTONIC:
2290 case SOURCE_TIME_REALTIME_ALARM:
2291 case SOURCE_TIME_BOOTTIME_ALARM:
2292 r = s->time.callback(s, s->time.next, s->userdata);
2296 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2299 case SOURCE_CHILD: {
2302 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2303 s->child.siginfo.si_code == CLD_KILLED ||
2304 s->child.siginfo.si_code == CLD_DUMPED;
2306 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2308 /* Now, reap the PID for good. */
2310 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2316 r = s->defer.callback(s, s->userdata);
2320 r = s->post.callback(s, s->userdata);
2324 r = s->exit.callback(s, s->userdata);
2327 case SOURCE_WATCHDOG:
2328 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2329 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2330 assert_not_reached("Wut? I shouldn't exist.");
2333 s->dispatching = false;
2336 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2337 strna(s->description), event_source_type_to_string(s->type));
2342 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2347 static int event_prepare(sd_event *e) {
2355 s = prioq_peek(e->prepare);
2356 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2359 s->prepare_iteration = e->iteration;
2360 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2366 s->dispatching = true;
2367 r = s->prepare(s, s->userdata);
2368 s->dispatching = false;
2371 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2372 strna(s->description), event_source_type_to_string(s->type));
2377 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2383 static int dispatch_exit(sd_event *e) {
2389 p = prioq_peek(e->exit);
2390 if (!p || p->enabled == SD_EVENT_OFF) {
2391 e->state = SD_EVENT_FINISHED;
2397 e->state = SD_EVENT_EXITING;
2399 r = source_dispatch(p);
2401 e->state = SD_EVENT_INITIAL;
2407 static sd_event_source* event_next_pending(sd_event *e) {
2412 p = prioq_peek(e->pending);
2416 if (p->enabled == SD_EVENT_OFF)
2422 static int arm_watchdog(sd_event *e) {
2423 struct itimerspec its = {};
2428 assert(e->watchdog_fd >= 0);
2430 t = sleep_between(e,
2431 e->watchdog_last + (e->watchdog_period / 2),
2432 e->watchdog_last + (e->watchdog_period * 3 / 4));
2434 timespec_store(&its.it_value, t);
2436 /* Make sure we never set the watchdog to 0, which tells the
2437 * kernel to disable it. */
2438 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2439 its.it_value.tv_nsec = 1;
2441 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2448 static int process_watchdog(sd_event *e) {
2454 /* Don't notify watchdog too often */
2455 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2458 sd_notify(false, "WATCHDOG=1");
2459 e->watchdog_last = e->timestamp.monotonic;
2461 return arm_watchdog(e);
2464 _public_ int sd_event_prepare(sd_event *e) {
2467 assert_return(e, -EINVAL);
2468 assert_return(!event_pid_changed(e), -ECHILD);
2469 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2470 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2472 if (e->exit_requested)
2477 e->state = SD_EVENT_PREPARING;
2478 r = event_prepare(e);
2479 e->state = SD_EVENT_INITIAL;
2483 r = event_arm_timer(e, &e->realtime);
2487 r = event_arm_timer(e, &e->boottime);
2491 r = event_arm_timer(e, &e->monotonic);
2495 r = event_arm_timer(e, &e->realtime_alarm);
2499 r = event_arm_timer(e, &e->boottime_alarm);
2503 if (event_next_pending(e) || e->need_process_child)
2506 e->state = SD_EVENT_ARMED;
2511 e->state = SD_EVENT_ARMED;
2512 r = sd_event_wait(e, 0);
2514 e->state = SD_EVENT_ARMED;
2519 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2520 struct epoll_event *ev_queue;
2521 unsigned ev_queue_max;
2524 assert_return(e, -EINVAL);
2525 assert_return(!event_pid_changed(e), -ECHILD);
2526 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2527 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2529 if (e->exit_requested) {
2530 e->state = SD_EVENT_PENDING;
2534 ev_queue_max = MAX(e->n_sources, 1u);
2535 ev_queue = newa(struct epoll_event, ev_queue_max);
2537 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2538 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2540 if (errno == EINTR) {
2541 e->state = SD_EVENT_PENDING;
2549 dual_timestamp_get(&e->timestamp);
2550 e->timestamp_boottime = now(clock_boottime_or_monotonic());
2552 for (i = 0; i < m; i++) {
2554 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2555 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2557 WakeupType *t = ev_queue[i].data.ptr;
2561 case WAKEUP_EVENT_SOURCE:
2562 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2565 case WAKEUP_CLOCK_DATA: {
2566 struct clock_data *d = ev_queue[i].data.ptr;
2567 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2571 case WAKEUP_SIGNAL_DATA:
2572 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2576 assert_not_reached("Invalid wake-up pointer");
2583 r = process_watchdog(e);
2587 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2591 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2595 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2599 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2603 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2607 if (e->need_process_child) {
2608 r = process_child(e);
2613 if (event_next_pending(e)) {
2614 e->state = SD_EVENT_PENDING;
2622 e->state = SD_EVENT_INITIAL;
2627 _public_ int sd_event_dispatch(sd_event *e) {
2631 assert_return(e, -EINVAL);
2632 assert_return(!event_pid_changed(e), -ECHILD);
2633 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2634 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2636 if (e->exit_requested)
2637 return dispatch_exit(e);
2639 p = event_next_pending(e);
2643 e->state = SD_EVENT_RUNNING;
2644 r = source_dispatch(p);
2645 e->state = SD_EVENT_INITIAL;
2652 e->state = SD_EVENT_INITIAL;
2657 static void event_log_delays(sd_event *e) {
2658 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2662 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2663 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2666 log_debug("Event loop iterations: %.*s", o, b);
2669 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2672 assert_return(e, -EINVAL);
2673 assert_return(!event_pid_changed(e), -ECHILD);
2674 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2675 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2677 if (e->profile_delays && e->last_run) {
2681 this_run = now(CLOCK_MONOTONIC);
2683 l = u64log2(this_run - e->last_run);
2684 assert(l < sizeof(e->delays));
2687 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2688 event_log_delays(e);
2689 e->last_log = this_run;
2693 r = sd_event_prepare(e);
2695 /* There was nothing? Then wait... */
2696 r = sd_event_wait(e, timeout);
2698 if (e->profile_delays)
2699 e->last_run = now(CLOCK_MONOTONIC);
2702 /* There's something now, then let's dispatch it */
2703 r = sd_event_dispatch(e);
2713 #if 0 /// UNNEEDED by elogind
2714 _public_ int sd_event_loop(sd_event *e) {
2717 assert_return(e, -EINVAL);
2718 assert_return(!event_pid_changed(e), -ECHILD);
2719 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2723 while (e->state != SD_EVENT_FINISHED) {
2724 r = sd_event_run(e, (uint64_t) -1);
2736 _public_ int sd_event_get_fd(sd_event *e) {
2738 assert_return(e, -EINVAL);
2739 assert_return(!event_pid_changed(e), -ECHILD);
2745 _public_ int sd_event_get_state(sd_event *e) {
2746 assert_return(e, -EINVAL);
2747 assert_return(!event_pid_changed(e), -ECHILD);
2752 #if 0 /// UNNEEDED by elogind
2753 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2754 assert_return(e, -EINVAL);
2755 assert_return(code, -EINVAL);
2756 assert_return(!event_pid_changed(e), -ECHILD);
2758 if (!e->exit_requested)
2761 *code = e->exit_code;
2766 _public_ int sd_event_exit(sd_event *e, int code) {
2767 assert_return(e, -EINVAL);
2768 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2769 assert_return(!event_pid_changed(e), -ECHILD);
2771 e->exit_requested = true;
2772 e->exit_code = code;
2777 #if 0 /// UNNEEDED by elogind
2778 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2779 assert_return(e, -EINVAL);
2780 assert_return(usec, -EINVAL);
2781 assert_return(!event_pid_changed(e), -ECHILD);
2782 assert_return(IN_SET(clock,
2784 CLOCK_REALTIME_ALARM,
2787 CLOCK_BOOTTIME_ALARM), -EOPNOTSUPP);
2789 if (!dual_timestamp_is_set(&e->timestamp)) {
2790 /* Implicitly fall back to now() if we never ran
2791 * before and thus have no cached time. */
2798 case CLOCK_REALTIME:
2799 case CLOCK_REALTIME_ALARM:
2800 *usec = e->timestamp.realtime;
2803 case CLOCK_MONOTONIC:
2804 *usec = e->timestamp.monotonic;
2807 case CLOCK_BOOTTIME:
2808 case CLOCK_BOOTTIME_ALARM:
2809 *usec = e->timestamp_boottime;
2813 assert_not_reached("Unknown clock?");
2820 _public_ int sd_event_default(sd_event **ret) {
2822 static thread_local sd_event *default_event = NULL;
2827 return !!default_event;
2829 if (default_event) {
2830 *ret = sd_event_ref(default_event);
2834 r = sd_event_new(&e);
2838 e->default_event_ptr = &default_event;
2846 #if 0 /// UNNEEDED by elogind
2847 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2848 assert_return(e, -EINVAL);
2849 assert_return(tid, -EINVAL);
2850 assert_return(!event_pid_changed(e), -ECHILD);
2861 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2864 assert_return(e, -EINVAL);
2865 assert_return(!event_pid_changed(e), -ECHILD);
2867 if (e->watchdog == !!b)
2871 struct epoll_event ev = {};
2873 r = sd_watchdog_enabled(false, &e->watchdog_period);
2877 /* Issue first ping immediately */
2878 sd_notify(false, "WATCHDOG=1");
2879 e->watchdog_last = now(CLOCK_MONOTONIC);
2881 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2882 if (e->watchdog_fd < 0)
2885 r = arm_watchdog(e);
2889 ev.events = EPOLLIN;
2890 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2892 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2899 if (e->watchdog_fd >= 0) {
2900 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2901 e->watchdog_fd = safe_close(e->watchdog_fd);
2909 e->watchdog_fd = safe_close(e->watchdog_fd);
2913 #if 0 /// UNNEEDED by elogind
2914 _public_ int sd_event_get_watchdog(sd_event *e) {
2915 assert_return(e, -EINVAL);
2916 assert_return(!event_pid_changed(e), -ECHILD);