2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
24 #include "sd-daemon.h"
28 #include "alloc-util.h"
35 #include "process-util.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType {
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
62 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType {
87 _WAKEUP_TYPE_INVALID = -1,
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92 struct sd_event_source {
99 sd_event_handler_t prepare;
103 EventSourceType type:5;
110 unsigned pending_index;
111 unsigned prepare_index;
112 uint64_t pending_iteration;
113 uint64_t prepare_iteration;
115 LIST_FIELDS(sd_event_source, sources);
119 sd_event_io_handler_t callback;
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
137 sd_event_child_handler_t callback;
143 sd_event_handler_t callback;
146 sd_event_handler_t callback;
149 sd_event_handler_t callback;
150 unsigned prioq_index;
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source *current;
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
199 struct clock_data boottime;
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
219 triple_timestamp timestamp;
222 bool exit_requested:1;
223 bool need_process_child:1;
225 bool profile_delays:1;
230 sd_event **default_event_ptr;
232 usec_t watchdog_last, watchdog_period;
236 LIST_HEAD(sd_event_source, sources);
238 usec_t last_run, last_log;
239 unsigned delays[sizeof(usec_t) * 8];
242 static void source_disconnect(sd_event_source *s);
244 static int pending_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
256 /* Lower priority values first */
257 if (x->priority < y->priority)
259 if (x->priority > y->priority)
262 /* Older entries first */
263 if (x->pending_iteration < y->pending_iteration)
265 if (x->pending_iteration > y->pending_iteration)
271 static int prepare_prioq_compare(const void *a, const void *b) {
272 const sd_event_source *x = a, *y = b;
277 /* Enabled ones first */
278 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
280 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
283 /* Move most recently prepared ones last, so that we can stop
284 * preparing as soon as we hit one that has already been
285 * prepared in the current iteration */
286 if (x->prepare_iteration < y->prepare_iteration)
288 if (x->prepare_iteration > y->prepare_iteration)
291 /* Lower priority values first */
292 if (x->priority < y->priority)
294 if (x->priority > y->priority)
300 static int earliest_time_prioq_compare(const void *a, const void *b) {
301 const sd_event_source *x = a, *y = b;
303 assert(EVENT_SOURCE_IS_TIME(x->type));
304 assert(x->type == y->type);
306 /* Enabled ones first */
307 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
309 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
312 /* Move the pending ones to the end */
313 if (!x->pending && y->pending)
315 if (x->pending && !y->pending)
319 if (x->time.next < y->time.next)
321 if (x->time.next > y->time.next)
327 static usec_t time_event_source_latest(const sd_event_source *s) {
328 return usec_add(s->time.next, s->time.accuracy);
331 static int latest_time_prioq_compare(const void *a, const void *b) {
332 const sd_event_source *x = a, *y = b;
334 assert(EVENT_SOURCE_IS_TIME(x->type));
335 assert(x->type == y->type);
337 /* Enabled ones first */
338 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
340 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
343 /* Move the pending ones to the end */
344 if (!x->pending && y->pending)
346 if (x->pending && !y->pending)
350 if (time_event_source_latest(x) < time_event_source_latest(y))
352 if (time_event_source_latest(x) > time_event_source_latest(y))
358 static int exit_prioq_compare(const void *a, const void *b) {
359 const sd_event_source *x = a, *y = b;
361 assert(x->type == SOURCE_EXIT);
362 assert(y->type == SOURCE_EXIT);
364 /* Enabled ones first */
365 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
367 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
370 /* Lower priority values first */
371 if (x->priority < y->priority)
373 if (x->priority > y->priority)
379 static void free_clock_data(struct clock_data *d) {
381 assert(d->wakeup == WAKEUP_CLOCK_DATA);
384 prioq_free(d->earliest);
385 prioq_free(d->latest);
388 static void event_free(sd_event *e) {
393 while ((s = e->sources)) {
395 source_disconnect(s);
396 sd_event_source_unref(s);
399 assert(e->n_sources == 0);
401 if (e->default_event_ptr)
402 *(e->default_event_ptr) = NULL;
404 safe_close(e->epoll_fd);
405 safe_close(e->watchdog_fd);
407 free_clock_data(&e->realtime);
408 free_clock_data(&e->boottime);
409 free_clock_data(&e->monotonic);
410 free_clock_data(&e->realtime_alarm);
411 free_clock_data(&e->boottime_alarm);
413 prioq_free(e->pending);
414 prioq_free(e->prepare);
417 free(e->signal_sources);
418 hashmap_free(e->signal_data);
420 hashmap_free(e->child_sources);
421 set_free(e->post_sources);
425 _public_ int sd_event_new(sd_event** ret) {
429 assert_return(ret, -EINVAL);
431 e = new0(sd_event, 1);
436 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
437 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
438 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
439 e->original_pid = getpid();
440 e->perturb = USEC_INFINITY;
442 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
446 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
447 if (e->epoll_fd < 0) {
452 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
453 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
454 e->profile_delays = true;
465 _public_ sd_event* sd_event_ref(sd_event *e) {
470 assert(e->n_ref >= 1);
476 _public_ sd_event* sd_event_unref(sd_event *e) {
481 assert(e->n_ref >= 1);
490 static bool event_pid_changed(sd_event *e) {
493 /* We don't support people creating an event loop and keeping
494 * it around over a fork(). Let's complain. */
496 return e->original_pid != getpid();
499 static void source_io_unregister(sd_event_source *s) {
503 assert(s->type == SOURCE_IO);
505 if (event_pid_changed(s->event))
508 if (!s->io.registered)
511 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
514 strna(s->description), event_source_type_to_string(s->type));
516 s->io.registered = false;
519 static int source_io_register(
524 struct epoll_event ev = {};
528 assert(s->type == SOURCE_IO);
529 assert(enabled != SD_EVENT_OFF);
534 if (enabled == SD_EVENT_ONESHOT)
535 ev.events |= EPOLLONESHOT;
537 if (s->io.registered)
538 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
544 s->io.registered = true;
549 #if 0 /// UNNEEDED by elogind
550 static clockid_t event_source_type_to_clock(EventSourceType t) {
554 case SOURCE_TIME_REALTIME:
555 return CLOCK_REALTIME;
557 case SOURCE_TIME_BOOTTIME:
558 return CLOCK_BOOTTIME;
560 case SOURCE_TIME_MONOTONIC:
561 return CLOCK_MONOTONIC;
563 case SOURCE_TIME_REALTIME_ALARM:
564 return CLOCK_REALTIME_ALARM;
566 case SOURCE_TIME_BOOTTIME_ALARM:
567 return CLOCK_BOOTTIME_ALARM;
570 return (clockid_t) -1;
575 static EventSourceType clock_to_event_source_type(clockid_t clock) {
580 return SOURCE_TIME_REALTIME;
583 return SOURCE_TIME_BOOTTIME;
585 case CLOCK_MONOTONIC:
586 return SOURCE_TIME_MONOTONIC;
588 case CLOCK_REALTIME_ALARM:
589 return SOURCE_TIME_REALTIME_ALARM;
591 case CLOCK_BOOTTIME_ALARM:
592 return SOURCE_TIME_BOOTTIME_ALARM;
595 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
599 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
604 case SOURCE_TIME_REALTIME:
607 case SOURCE_TIME_BOOTTIME:
610 case SOURCE_TIME_MONOTONIC:
611 return &e->monotonic;
613 case SOURCE_TIME_REALTIME_ALARM:
614 return &e->realtime_alarm;
616 case SOURCE_TIME_BOOTTIME_ALARM:
617 return &e->boottime_alarm;
624 static int event_make_signal_data(
627 struct signal_data **ret) {
629 struct epoll_event ev = {};
630 struct signal_data *d;
638 if (event_pid_changed(e))
641 if (e->signal_sources && e->signal_sources[sig])
642 priority = e->signal_sources[sig]->priority;
646 d = hashmap_get(e->signal_data, &priority);
648 if (sigismember(&d->sigset, sig) > 0) {
654 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
658 d = new0(struct signal_data, 1);
662 d->wakeup = WAKEUP_SIGNAL_DATA;
664 d->priority = priority;
666 r = hashmap_put(e->signal_data, &d->priority, d);
676 assert_se(sigaddset(&ss_copy, sig) >= 0);
678 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
697 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
710 d->fd = safe_close(d->fd);
711 hashmap_remove(e->signal_data, &d->priority);
718 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
722 /* Turns off the specified signal in the signal data
723 * object. If the signal mask of the object becomes empty that
726 if (sigismember(&d->sigset, sig) == 0)
729 assert_se(sigdelset(&d->sigset, sig) >= 0);
731 if (sigisemptyset(&d->sigset)) {
733 /* If all the mask is all-zero we can get rid of the structure */
734 hashmap_remove(e->signal_data, &d->priority);
742 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
743 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
746 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
747 struct signal_data *d;
748 static const int64_t zero_priority = 0;
752 /* Rechecks if the specified signal is still something we are
753 * interested in. If not, we'll unmask it, and possibly drop
754 * the signalfd for it. */
756 if (sig == SIGCHLD &&
757 e->n_enabled_child_sources > 0)
760 if (e->signal_sources &&
761 e->signal_sources[sig] &&
762 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
766 * The specified signal might be enabled in three different queues:
768 * 1) the one that belongs to the priority passed (if it is non-NULL)
769 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
770 * 3) the 0 priority (to cover the SIGCHLD case)
772 * Hence, let's remove it from all three here.
776 d = hashmap_get(e->signal_data, priority);
778 event_unmask_signal_data(e, d, sig);
781 if (e->signal_sources && e->signal_sources[sig]) {
782 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
784 event_unmask_signal_data(e, d, sig);
787 d = hashmap_get(e->signal_data, &zero_priority);
789 event_unmask_signal_data(e, d, sig);
792 static void source_disconnect(sd_event_source *s) {
800 assert(s->event->n_sources > 0);
806 source_io_unregister(s);
810 case SOURCE_TIME_REALTIME:
811 case SOURCE_TIME_BOOTTIME:
812 case SOURCE_TIME_MONOTONIC:
813 case SOURCE_TIME_REALTIME_ALARM:
814 case SOURCE_TIME_BOOTTIME_ALARM: {
815 struct clock_data *d;
817 d = event_get_clock_data(s->event, s->type);
820 prioq_remove(d->earliest, s, &s->time.earliest_index);
821 prioq_remove(d->latest, s, &s->time.latest_index);
822 d->needs_rearm = true;
827 if (s->signal.sig > 0) {
829 if (s->event->signal_sources)
830 s->event->signal_sources[s->signal.sig] = NULL;
832 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
838 if (s->child.pid > 0) {
839 if (s->enabled != SD_EVENT_OFF) {
840 assert(s->event->n_enabled_child_sources > 0);
841 s->event->n_enabled_child_sources--;
844 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
845 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
855 set_remove(s->event->post_sources, s);
859 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
863 assert_not_reached("Wut? I shouldn't exist.");
867 prioq_remove(s->event->pending, s, &s->pending_index);
870 prioq_remove(s->event->prepare, s, &s->prepare_index);
874 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
876 LIST_REMOVE(sources, event->sources, s);
880 sd_event_unref(event);
883 static void source_free(sd_event_source *s) {
886 source_disconnect(s);
887 free(s->description);
891 static int source_set_pending(sd_event_source *s, bool b) {
895 assert(s->type != SOURCE_EXIT);
903 s->pending_iteration = s->event->iteration;
905 r = prioq_put(s->event->pending, s, &s->pending_index);
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
916 d = event_get_clock_data(s->event, s->type);
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
940 s = new0(sd_event_source, 1);
946 s->floating = floating;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
953 LIST_PREPEND(sources, e->sources, s);
959 _public_ int sd_event_add_io(
961 sd_event_source **ret,
964 sd_event_io_handler_t callback,
970 assert_return(e, -EINVAL);
971 assert_return(fd >= 0, -EBADF);
972 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
973 assert_return(callback, -EINVAL);
974 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
975 assert_return(!event_pid_changed(e), -ECHILD);
977 s = source_new(e, !ret, SOURCE_IO);
981 s->wakeup = WAKEUP_EVENT_SOURCE;
983 s->io.events = events;
984 s->io.callback = callback;
985 s->userdata = userdata;
986 s->enabled = SD_EVENT_ON;
988 r = source_io_register(s, s->enabled, events);
1000 static void initialize_perturb(sd_event *e) {
1001 sd_id128_t bootid = {};
1003 /* When we sleep for longer, we try to realign the wakeup to
1004 the same time wihtin each minute/second/250ms, so that
1005 events all across the system can be coalesced into a single
1006 CPU wakeup. However, let's take some system-specific
1007 randomness for this value, so that in a network of systems
1008 with synced clocks timer events are distributed a
1009 bit. Here, we calculate a perturbation usec offset from the
1012 if (_likely_(e->perturb != USEC_INFINITY))
1015 if (sd_id128_get_boot(&bootid) >= 0)
1016 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1019 static int event_setup_timer_fd(
1021 struct clock_data *d,
1024 struct epoll_event ev = {};
1030 if (_likely_(d->fd >= 0))
1033 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1037 ev.events = EPOLLIN;
1040 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1050 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1053 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1056 _public_ int sd_event_add_time(
1058 sd_event_source **ret,
1062 sd_event_time_handler_t callback,
1065 EventSourceType type;
1067 struct clock_data *d;
1070 assert_return(e, -EINVAL);
1071 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1072 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1073 assert_return(!event_pid_changed(e), -ECHILD);
1075 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1078 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1083 callback = time_exit_callback;
1085 d = event_get_clock_data(e, type);
1088 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1092 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1097 r = event_setup_timer_fd(e, d, clock);
1102 s = source_new(e, !ret, type);
1106 s->time.next = usec;
1107 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1108 s->time.callback = callback;
1109 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1110 s->userdata = userdata;
1111 s->enabled = SD_EVENT_ONESHOT;
1113 d->needs_rearm = true;
1115 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1119 r = prioq_put(d->latest, s, &s->time.latest_index);
1133 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1136 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1139 _public_ int sd_event_add_signal(
1141 sd_event_source **ret,
1143 sd_event_signal_handler_t callback,
1147 struct signal_data *d;
1151 assert_return(e, -EINVAL);
1152 assert_return(SIGNAL_VALID(sig), -EINVAL);
1153 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1154 assert_return(!event_pid_changed(e), -ECHILD);
1157 callback = signal_exit_callback;
1159 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1163 if (!sigismember(&ss, sig))
1166 if (!e->signal_sources) {
1167 e->signal_sources = new0(sd_event_source*, _NSIG);
1168 if (!e->signal_sources)
1170 } else if (e->signal_sources[sig])
1173 s = source_new(e, !ret, SOURCE_SIGNAL);
1177 s->signal.sig = sig;
1178 s->signal.callback = callback;
1179 s->userdata = userdata;
1180 s->enabled = SD_EVENT_ON;
1182 e->signal_sources[sig] = s;
1184 r = event_make_signal_data(e, sig, &d);
1190 /* Use the signal name as description for the event source by default */
1191 (void) sd_event_source_set_description(s, signal_to_string(sig));
1199 _public_ int sd_event_add_child(
1201 sd_event_source **ret,
1204 sd_event_child_handler_t callback,
1210 assert_return(e, -EINVAL);
1211 assert_return(pid > 1, -EINVAL);
1212 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1213 assert_return(options != 0, -EINVAL);
1214 assert_return(callback, -EINVAL);
1215 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1216 assert_return(!event_pid_changed(e), -ECHILD);
1218 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1222 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1225 s = source_new(e, !ret, SOURCE_CHILD);
1230 s->child.options = options;
1231 s->child.callback = callback;
1232 s->userdata = userdata;
1233 s->enabled = SD_EVENT_ONESHOT;
1235 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1241 e->n_enabled_child_sources++;
1243 r = event_make_signal_data(e, SIGCHLD, NULL);
1245 e->n_enabled_child_sources--;
1250 e->need_process_child = true;
1258 _public_ int sd_event_add_defer(
1260 sd_event_source **ret,
1261 sd_event_handler_t callback,
1267 assert_return(e, -EINVAL);
1268 assert_return(callback, -EINVAL);
1269 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1270 assert_return(!event_pid_changed(e), -ECHILD);
1272 s = source_new(e, !ret, SOURCE_DEFER);
1276 s->defer.callback = callback;
1277 s->userdata = userdata;
1278 s->enabled = SD_EVENT_ONESHOT;
1280 r = source_set_pending(s, true);
1292 _public_ int sd_event_add_post(
1294 sd_event_source **ret,
1295 sd_event_handler_t callback,
1301 assert_return(e, -EINVAL);
1302 assert_return(callback, -EINVAL);
1303 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1304 assert_return(!event_pid_changed(e), -ECHILD);
1306 r = set_ensure_allocated(&e->post_sources, NULL);
1310 s = source_new(e, !ret, SOURCE_POST);
1314 s->post.callback = callback;
1315 s->userdata = userdata;
1316 s->enabled = SD_EVENT_ON;
1318 r = set_put(e->post_sources, s);
1330 _public_ int sd_event_add_exit(
1332 sd_event_source **ret,
1333 sd_event_handler_t callback,
1339 assert_return(e, -EINVAL);
1340 assert_return(callback, -EINVAL);
1341 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1342 assert_return(!event_pid_changed(e), -ECHILD);
1344 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1348 s = source_new(e, !ret, SOURCE_EXIT);
1352 s->exit.callback = callback;
1353 s->userdata = userdata;
1354 s->exit.prioq_index = PRIOQ_IDX_NULL;
1355 s->enabled = SD_EVENT_ONESHOT;
1357 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1369 #if 0 /// UNNEEDED by elogind
1370 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1375 assert(s->n_ref >= 1);
1382 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1387 assert(s->n_ref >= 1);
1390 if (s->n_ref <= 0) {
1391 /* Here's a special hack: when we are called from a
1392 * dispatch handler we won't free the event source
1393 * immediately, but we will detach the fd from the
1394 * epoll. This way it is safe for the caller to unref
1395 * the event source and immediately close the fd, but
1396 * we still retain a valid event source object after
1399 if (s->dispatching) {
1400 if (s->type == SOURCE_IO)
1401 source_io_unregister(s);
1403 source_disconnect(s);
1411 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1412 assert_return(s, -EINVAL);
1413 assert_return(!event_pid_changed(s->event), -ECHILD);
1415 return free_and_strdup(&s->description, description);
1418 #if 0 /// UNNEEDED by elogind
1419 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1420 assert_return(s, -EINVAL);
1421 assert_return(description, -EINVAL);
1422 assert_return(s->description, -ENXIO);
1423 assert_return(!event_pid_changed(s->event), -ECHILD);
1425 *description = s->description;
1430 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1431 assert_return(s, NULL);
1436 #if 0 /// UNNEEDED by elogind
1437 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1438 assert_return(s, -EINVAL);
1439 assert_return(s->type != SOURCE_EXIT, -EDOM);
1440 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1441 assert_return(!event_pid_changed(s->event), -ECHILD);
1446 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1447 assert_return(s, -EINVAL);
1448 assert_return(s->type == SOURCE_IO, -EDOM);
1449 assert_return(!event_pid_changed(s->event), -ECHILD);
1455 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1458 assert_return(s, -EINVAL);
1459 assert_return(fd >= 0, -EBADF);
1460 assert_return(s->type == SOURCE_IO, -EDOM);
1461 assert_return(!event_pid_changed(s->event), -ECHILD);
1466 if (s->enabled == SD_EVENT_OFF) {
1468 s->io.registered = false;
1472 saved_fd = s->io.fd;
1473 assert(s->io.registered);
1476 s->io.registered = false;
1478 r = source_io_register(s, s->enabled, s->io.events);
1480 s->io.fd = saved_fd;
1481 s->io.registered = true;
1485 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1491 #if 0 /// UNNEEDED by elogind
1492 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1493 assert_return(s, -EINVAL);
1494 assert_return(events, -EINVAL);
1495 assert_return(s->type == SOURCE_IO, -EDOM);
1496 assert_return(!event_pid_changed(s->event), -ECHILD);
1498 *events = s->io.events;
1503 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1506 assert_return(s, -EINVAL);
1507 assert_return(s->type == SOURCE_IO, -EDOM);
1508 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1509 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1510 assert_return(!event_pid_changed(s->event), -ECHILD);
1512 /* edge-triggered updates are never skipped, so we can reset edges */
1513 if (s->io.events == events && !(events & EPOLLET))
1516 if (s->enabled != SD_EVENT_OFF) {
1517 r = source_io_register(s, s->enabled, events);
1522 s->io.events = events;
1523 source_set_pending(s, false);
1528 #if 0 /// UNNEEDED by elogind
1529 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1530 assert_return(s, -EINVAL);
1531 assert_return(revents, -EINVAL);
1532 assert_return(s->type == SOURCE_IO, -EDOM);
1533 assert_return(s->pending, -ENODATA);
1534 assert_return(!event_pid_changed(s->event), -ECHILD);
1536 *revents = s->io.revents;
1540 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1541 assert_return(s, -EINVAL);
1542 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1543 assert_return(!event_pid_changed(s->event), -ECHILD);
1545 return s->signal.sig;
1549 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1550 assert_return(s, -EINVAL);
1551 assert_return(!event_pid_changed(s->event), -ECHILD);
1553 *priority = s->priority;
1557 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1560 assert_return(s, -EINVAL);
1561 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1562 assert_return(!event_pid_changed(s->event), -ECHILD);
1564 if (s->priority == priority)
1567 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1568 struct signal_data *old, *d;
1570 /* Move us from the signalfd belonging to the old
1571 * priority to the signalfd of the new priority */
1573 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1575 s->priority = priority;
1577 r = event_make_signal_data(s->event, s->signal.sig, &d);
1579 s->priority = old->priority;
1583 event_unmask_signal_data(s->event, old, s->signal.sig);
1585 s->priority = priority;
1588 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1591 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1593 if (s->type == SOURCE_EXIT)
1594 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1599 #if 0 /// UNNEEDED by elogind
1600 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1601 assert_return(s, -EINVAL);
1602 assert_return(m, -EINVAL);
1603 assert_return(!event_pid_changed(s->event), -ECHILD);
1610 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1613 assert_return(s, -EINVAL);
1614 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1615 assert_return(!event_pid_changed(s->event), -ECHILD);
1617 /* If we are dead anyway, we are fine with turning off
1618 * sources, but everything else needs to fail. */
1619 if (s->event->state == SD_EVENT_FINISHED)
1620 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1622 if (s->enabled == m)
1625 if (m == SD_EVENT_OFF) {
1630 source_io_unregister(s);
1634 case SOURCE_TIME_REALTIME:
1635 case SOURCE_TIME_BOOTTIME:
1636 case SOURCE_TIME_MONOTONIC:
1637 case SOURCE_TIME_REALTIME_ALARM:
1638 case SOURCE_TIME_BOOTTIME_ALARM: {
1639 struct clock_data *d;
1642 d = event_get_clock_data(s->event, s->type);
1645 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1646 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1647 d->needs_rearm = true;
1654 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1660 assert(s->event->n_enabled_child_sources > 0);
1661 s->event->n_enabled_child_sources--;
1663 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1668 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1677 assert_not_reached("Wut? I shouldn't exist.");
1684 r = source_io_register(s, m, s->io.events);
1691 case SOURCE_TIME_REALTIME:
1692 case SOURCE_TIME_BOOTTIME:
1693 case SOURCE_TIME_MONOTONIC:
1694 case SOURCE_TIME_REALTIME_ALARM:
1695 case SOURCE_TIME_BOOTTIME_ALARM: {
1696 struct clock_data *d;
1699 d = event_get_clock_data(s->event, s->type);
1702 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1703 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1704 d->needs_rearm = true;
1712 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1714 s->enabled = SD_EVENT_OFF;
1715 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1723 if (s->enabled == SD_EVENT_OFF)
1724 s->event->n_enabled_child_sources++;
1728 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1730 s->enabled = SD_EVENT_OFF;
1731 s->event->n_enabled_child_sources--;
1732 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1740 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1749 assert_not_reached("Wut? I shouldn't exist.");
1754 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1757 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1762 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1763 assert_return(s, -EINVAL);
1764 assert_return(usec, -EINVAL);
1765 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1766 assert_return(!event_pid_changed(s->event), -ECHILD);
1768 *usec = s->time.next;
1772 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1773 struct clock_data *d;
1775 assert_return(s, -EINVAL);
1776 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1777 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1778 assert_return(!event_pid_changed(s->event), -ECHILD);
1780 s->time.next = usec;
1782 source_set_pending(s, false);
1784 d = event_get_clock_data(s->event, s->type);
1787 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1788 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1789 d->needs_rearm = true;
1794 #if 0 /// UNNEEDED by elogind
1795 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1796 assert_return(s, -EINVAL);
1797 assert_return(usec, -EINVAL);
1798 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1799 assert_return(!event_pid_changed(s->event), -ECHILD);
1801 *usec = s->time.accuracy;
1805 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1806 struct clock_data *d;
1808 assert_return(s, -EINVAL);
1809 assert_return(usec != (uint64_t) -1, -EINVAL);
1810 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1811 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1812 assert_return(!event_pid_changed(s->event), -ECHILD);
1815 usec = DEFAULT_ACCURACY_USEC;
1817 s->time.accuracy = usec;
1819 source_set_pending(s, false);
1821 d = event_get_clock_data(s->event, s->type);
1824 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1825 d->needs_rearm = true;
1830 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1831 assert_return(s, -EINVAL);
1832 assert_return(clock, -EINVAL);
1833 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1834 assert_return(!event_pid_changed(s->event), -ECHILD);
1836 *clock = event_source_type_to_clock(s->type);
1840 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1841 assert_return(s, -EINVAL);
1842 assert_return(pid, -EINVAL);
1843 assert_return(s->type == SOURCE_CHILD, -EDOM);
1844 assert_return(!event_pid_changed(s->event), -ECHILD);
1846 *pid = s->child.pid;
1851 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1854 assert_return(s, -EINVAL);
1855 assert_return(s->type != SOURCE_EXIT, -EDOM);
1856 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1857 assert_return(!event_pid_changed(s->event), -ECHILD);
1859 if (s->prepare == callback)
1862 if (callback && s->prepare) {
1863 s->prepare = callback;
1867 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1871 s->prepare = callback;
1874 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1878 prioq_remove(s->event->prepare, s, &s->prepare_index);
1883 #if 0 /// UNNEEDED by elogind
1884 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1885 assert_return(s, NULL);
1890 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1893 assert_return(s, NULL);
1896 s->userdata = userdata;
1902 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1909 if (a >= USEC_INFINITY)
1910 return USEC_INFINITY;
1915 initialize_perturb(e);
1918 Find a good time to wake up again between times a and b. We
1919 have two goals here:
1921 a) We want to wake up as seldom as possible, hence prefer
1922 later times over earlier times.
1924 b) But if we have to wake up, then let's make sure to
1925 dispatch as much as possible on the entire system.
1927 We implement this by waking up everywhere at the same time
1928 within any given minute if we can, synchronised via the
1929 perturbation value determined from the boot ID. If we can't,
1930 then we try to find the same spot in every 10s, then 1s and
1931 then 250ms step. Otherwise, we pick the last possible time
1935 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1937 if (_unlikely_(c < USEC_PER_MINUTE))
1940 c -= USEC_PER_MINUTE;
1946 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1948 if (_unlikely_(c < USEC_PER_SEC*10))
1951 c -= USEC_PER_SEC*10;
1957 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1959 if (_unlikely_(c < USEC_PER_SEC))
1968 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1970 if (_unlikely_(c < USEC_PER_MSEC*250))
1973 c -= USEC_PER_MSEC*250;
1982 static int event_arm_timer(
1984 struct clock_data *d) {
1986 struct itimerspec its = {};
1987 sd_event_source *a, *b;
1994 if (!d->needs_rearm)
1997 d->needs_rearm = false;
1999 a = prioq_peek(d->earliest);
2000 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2005 if (d->next == USEC_INFINITY)
2009 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2013 d->next = USEC_INFINITY;
2017 b = prioq_peek(d->latest);
2018 assert_se(b && b->enabled != SD_EVENT_OFF);
2020 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2024 assert_se(d->fd >= 0);
2027 /* We don' want to disarm here, just mean some time looooong ago. */
2028 its.it_value.tv_sec = 0;
2029 its.it_value.tv_nsec = 1;
2031 timespec_store(&its.it_value, t);
2033 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2041 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2044 assert(s->type == SOURCE_IO);
2046 /* If the event source was already pending, we just OR in the
2047 * new revents, otherwise we reset the value. The ORing is
2048 * necessary to handle EPOLLONESHOT events properly where
2049 * readability might happen independently of writability, and
2050 * we need to keep track of both */
2053 s->io.revents |= revents;
2055 s->io.revents = revents;
2057 return source_set_pending(s, true);
2060 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2067 assert_return(events == EPOLLIN, -EIO);
2069 ss = read(fd, &x, sizeof(x));
2071 if (errno == EAGAIN || errno == EINTR)
2077 if (_unlikely_(ss != sizeof(x)))
2081 *next = USEC_INFINITY;
2086 static int process_timer(
2089 struct clock_data *d) {
2098 s = prioq_peek(d->earliest);
2101 s->enabled == SD_EVENT_OFF ||
2105 r = source_set_pending(s, true);
2109 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2110 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2111 d->needs_rearm = true;
2117 static int process_child(sd_event *e) {
2124 e->need_process_child = false;
2127 So, this is ugly. We iteratively invoke waitid() with P_PID
2128 + WNOHANG for each PID we wait for, instead of using
2129 P_ALL. This is because we only want to get child
2130 information of very specific child processes, and not all
2131 of them. We might not have processed the SIGCHLD even of a
2132 previous invocation and we don't want to maintain a
2133 unbounded *per-child* event queue, hence we really don't
2134 want anything flushed out of the kernel's queue that we
2135 don't care about. Since this is O(n) this means that if you
2136 have a lot of processes you probably want to handle SIGCHLD
2139 We do not reap the children here (by using WNOWAIT), this
2140 is only done after the event source is dispatched so that
2141 the callback still sees the process as a zombie.
2144 HASHMAP_FOREACH(s, e->child_sources, i) {
2145 assert(s->type == SOURCE_CHILD);
2150 if (s->enabled == SD_EVENT_OFF)
2153 zero(s->child.siginfo);
2154 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2155 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2159 if (s->child.siginfo.si_pid != 0) {
2161 s->child.siginfo.si_code == CLD_EXITED ||
2162 s->child.siginfo.si_code == CLD_KILLED ||
2163 s->child.siginfo.si_code == CLD_DUMPED;
2165 if (!zombie && (s->child.options & WEXITED)) {
2166 /* If the child isn't dead then let's
2167 * immediately remove the state change
2168 * from the queue, since there's no
2169 * benefit in leaving it queued */
2171 assert(s->child.options & (WSTOPPED|WCONTINUED));
2172 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2175 r = source_set_pending(s, true);
2184 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2185 bool read_one = false;
2189 assert_return(events == EPOLLIN, -EIO);
2191 /* If there's a signal queued on this priority and SIGCHLD is
2192 on this priority too, then make sure to recheck the
2193 children we watch. This is because we only ever dequeue
2194 the first signal per priority, and if we dequeue one, and
2195 SIGCHLD might be enqueued later we wouldn't know, but we
2196 might have higher priority children we care about hence we
2197 need to check that explicitly. */
2199 if (sigismember(&d->sigset, SIGCHLD))
2200 e->need_process_child = true;
2202 /* If there's already an event source pending for this
2203 * priority we don't read another */
2208 struct signalfd_siginfo si;
2210 sd_event_source *s = NULL;
2212 n = read(d->fd, &si, sizeof(si));
2214 if (errno == EAGAIN || errno == EINTR)
2220 if (_unlikely_(n != sizeof(si)))
2223 assert(SIGNAL_VALID(si.ssi_signo));
2227 if (e->signal_sources)
2228 s = e->signal_sources[si.ssi_signo];
2234 s->signal.siginfo = si;
2237 r = source_set_pending(s, true);
2245 static int source_dispatch(sd_event_source *s) {
2246 EventSourceType saved_type;
2250 assert(s->pending || s->type == SOURCE_EXIT);
2252 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2254 saved_type = s->type;
2256 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2257 r = source_set_pending(s, false);
2262 if (s->type != SOURCE_POST) {
2266 /* If we execute a non-post source, let's mark all
2267 * post sources as pending */
2269 SET_FOREACH(z, s->event->post_sources, i) {
2270 if (z->enabled == SD_EVENT_OFF)
2273 r = source_set_pending(z, true);
2279 if (s->enabled == SD_EVENT_ONESHOT) {
2280 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2285 s->dispatching = true;
2290 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2293 case SOURCE_TIME_REALTIME:
2294 case SOURCE_TIME_BOOTTIME:
2295 case SOURCE_TIME_MONOTONIC:
2296 case SOURCE_TIME_REALTIME_ALARM:
2297 case SOURCE_TIME_BOOTTIME_ALARM:
2298 r = s->time.callback(s, s->time.next, s->userdata);
2302 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2305 case SOURCE_CHILD: {
2308 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2309 s->child.siginfo.si_code == CLD_KILLED ||
2310 s->child.siginfo.si_code == CLD_DUMPED;
2312 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2314 /* Now, reap the PID for good. */
2316 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2322 r = s->defer.callback(s, s->userdata);
2326 r = s->post.callback(s, s->userdata);
2330 r = s->exit.callback(s, s->userdata);
2333 case SOURCE_WATCHDOG:
2334 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2335 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2336 assert_not_reached("Wut? I shouldn't exist.");
2339 s->dispatching = false;
2342 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2343 strna(s->description), event_source_type_to_string(saved_type));
2348 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2353 static int event_prepare(sd_event *e) {
2361 s = prioq_peek(e->prepare);
2362 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2365 s->prepare_iteration = e->iteration;
2366 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2372 s->dispatching = true;
2373 r = s->prepare(s, s->userdata);
2374 s->dispatching = false;
2377 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2378 strna(s->description), event_source_type_to_string(s->type));
2383 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2389 static int dispatch_exit(sd_event *e) {
2395 p = prioq_peek(e->exit);
2396 if (!p || p->enabled == SD_EVENT_OFF) {
2397 e->state = SD_EVENT_FINISHED;
2403 e->state = SD_EVENT_EXITING;
2405 r = source_dispatch(p);
2407 e->state = SD_EVENT_INITIAL;
2413 static sd_event_source* event_next_pending(sd_event *e) {
2418 p = prioq_peek(e->pending);
2422 if (p->enabled == SD_EVENT_OFF)
2428 static int arm_watchdog(sd_event *e) {
2429 struct itimerspec its = {};
2434 assert(e->watchdog_fd >= 0);
2436 t = sleep_between(e,
2437 e->watchdog_last + (e->watchdog_period / 2),
2438 e->watchdog_last + (e->watchdog_period * 3 / 4));
2440 timespec_store(&its.it_value, t);
2442 /* Make sure we never set the watchdog to 0, which tells the
2443 * kernel to disable it. */
2444 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2445 its.it_value.tv_nsec = 1;
2447 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2454 static int process_watchdog(sd_event *e) {
2460 /* Don't notify watchdog too often */
2461 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2464 sd_notify(false, "WATCHDOG=1");
2465 e->watchdog_last = e->timestamp.monotonic;
2467 return arm_watchdog(e);
2470 _public_ int sd_event_prepare(sd_event *e) {
2473 assert_return(e, -EINVAL);
2474 assert_return(!event_pid_changed(e), -ECHILD);
2475 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2476 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2478 if (e->exit_requested)
2483 e->state = SD_EVENT_PREPARING;
2484 r = event_prepare(e);
2485 e->state = SD_EVENT_INITIAL;
2489 r = event_arm_timer(e, &e->realtime);
2493 r = event_arm_timer(e, &e->boottime);
2497 r = event_arm_timer(e, &e->monotonic);
2501 r = event_arm_timer(e, &e->realtime_alarm);
2505 r = event_arm_timer(e, &e->boottime_alarm);
2509 if (event_next_pending(e) || e->need_process_child)
2512 e->state = SD_EVENT_ARMED;
2517 e->state = SD_EVENT_ARMED;
2518 r = sd_event_wait(e, 0);
2520 e->state = SD_EVENT_ARMED;
2525 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2526 struct epoll_event *ev_queue;
2527 unsigned ev_queue_max;
2530 assert_return(e, -EINVAL);
2531 assert_return(!event_pid_changed(e), -ECHILD);
2532 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2533 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2535 if (e->exit_requested) {
2536 e->state = SD_EVENT_PENDING;
2540 ev_queue_max = MAX(e->n_sources, 1u);
2541 ev_queue = newa(struct epoll_event, ev_queue_max);
2543 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2544 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2546 if (errno == EINTR) {
2547 e->state = SD_EVENT_PENDING;
2555 triple_timestamp_get(&e->timestamp);
2557 for (i = 0; i < m; i++) {
2559 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2560 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2562 WakeupType *t = ev_queue[i].data.ptr;
2566 case WAKEUP_EVENT_SOURCE:
2567 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2570 case WAKEUP_CLOCK_DATA: {
2571 struct clock_data *d = ev_queue[i].data.ptr;
2572 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2576 case WAKEUP_SIGNAL_DATA:
2577 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2581 assert_not_reached("Invalid wake-up pointer");
2588 r = process_watchdog(e);
2592 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2596 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2600 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2604 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2608 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2612 if (e->need_process_child) {
2613 r = process_child(e);
2618 if (event_next_pending(e)) {
2619 e->state = SD_EVENT_PENDING;
2627 e->state = SD_EVENT_INITIAL;
2632 _public_ int sd_event_dispatch(sd_event *e) {
2636 assert_return(e, -EINVAL);
2637 assert_return(!event_pid_changed(e), -ECHILD);
2638 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2639 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2641 if (e->exit_requested)
2642 return dispatch_exit(e);
2644 p = event_next_pending(e);
2648 e->state = SD_EVENT_RUNNING;
2649 r = source_dispatch(p);
2650 e->state = SD_EVENT_INITIAL;
2657 e->state = SD_EVENT_INITIAL;
2662 static void event_log_delays(sd_event *e) {
2663 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2667 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2668 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2671 log_debug("Event loop iterations: %.*s", o, b);
2674 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2677 assert_return(e, -EINVAL);
2678 assert_return(!event_pid_changed(e), -ECHILD);
2679 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2680 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2682 if (e->profile_delays && e->last_run) {
2686 this_run = now(CLOCK_MONOTONIC);
2688 l = u64log2(this_run - e->last_run);
2689 assert(l < sizeof(e->delays));
2692 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2693 event_log_delays(e);
2694 e->last_log = this_run;
2698 r = sd_event_prepare(e);
2700 /* There was nothing? Then wait... */
2701 r = sd_event_wait(e, timeout);
2703 if (e->profile_delays)
2704 e->last_run = now(CLOCK_MONOTONIC);
2707 /* There's something now, then let's dispatch it */
2708 r = sd_event_dispatch(e);
2718 _public_ int sd_event_loop(sd_event *e) {
2721 assert_return(e, -EINVAL);
2722 assert_return(!event_pid_changed(e), -ECHILD);
2723 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2727 while (e->state != SD_EVENT_FINISHED) {
2728 r = sd_event_run(e, (uint64_t) -1);
2740 #if 0 /// UNNEEDED by elogind
2741 _public_ int sd_event_get_fd(sd_event *e) {
2743 assert_return(e, -EINVAL);
2744 assert_return(!event_pid_changed(e), -ECHILD);
2750 _public_ int sd_event_get_state(sd_event *e) {
2751 assert_return(e, -EINVAL);
2752 assert_return(!event_pid_changed(e), -ECHILD);
2757 #if 0 /// UNNEEDED by elogind
2758 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2759 assert_return(e, -EINVAL);
2760 assert_return(code, -EINVAL);
2761 assert_return(!event_pid_changed(e), -ECHILD);
2763 if (!e->exit_requested)
2766 *code = e->exit_code;
2771 _public_ int sd_event_exit(sd_event *e, int code) {
2772 assert_return(e, -EINVAL);
2773 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2774 assert_return(!event_pid_changed(e), -ECHILD);
2776 e->exit_requested = true;
2777 e->exit_code = code;
2782 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2783 assert_return(e, -EINVAL);
2784 assert_return(usec, -EINVAL);
2785 assert_return(!event_pid_changed(e), -ECHILD);
2787 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2790 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2791 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2792 * the purpose of getting the time this doesn't matter. */
2793 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2796 if (!triple_timestamp_is_set(&e->timestamp)) {
2797 /* Implicitly fall back to now() if we never ran
2798 * before and thus have no cached time. */
2803 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2807 _public_ int sd_event_default(sd_event **ret) {
2809 static thread_local sd_event *default_event = NULL;
2814 return !!default_event;
2816 if (default_event) {
2817 *ret = sd_event_ref(default_event);
2821 r = sd_event_new(&e);
2825 e->default_event_ptr = &default_event;
2833 #if 0 /// UNNEEDED by elogind
2834 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2835 assert_return(e, -EINVAL);
2836 assert_return(tid, -EINVAL);
2837 assert_return(!event_pid_changed(e), -ECHILD);
2848 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2851 assert_return(e, -EINVAL);
2852 assert_return(!event_pid_changed(e), -ECHILD);
2854 if (e->watchdog == !!b)
2858 struct epoll_event ev = {};
2860 r = sd_watchdog_enabled(false, &e->watchdog_period);
2864 /* Issue first ping immediately */
2865 sd_notify(false, "WATCHDOG=1");
2866 e->watchdog_last = now(CLOCK_MONOTONIC);
2868 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2869 if (e->watchdog_fd < 0)
2872 r = arm_watchdog(e);
2876 ev.events = EPOLLIN;
2877 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2879 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2886 if (e->watchdog_fd >= 0) {
2887 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2888 e->watchdog_fd = safe_close(e->watchdog_fd);
2896 e->watchdog_fd = safe_close(e->watchdog_fd);
2900 #if 0 /// UNNEEDED by elogind
2901 _public_ int sd_event_get_watchdog(sd_event *e) {
2902 assert_return(e, -EINVAL);
2903 assert_return(!event_pid_changed(e), -ECHILD);
2909 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2910 assert_return(e, -EINVAL);
2911 assert_return(!event_pid_changed(e), -ECHILD);
2913 *ret = e->iteration;