1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
26 #include "sd-daemon.h"
30 #include "alloc-util.h"
37 #include "process-util.h"
39 #include "signal-util.h"
40 #include "string-table.h"
41 #include "string-util.h"
42 #include "time-util.h"
45 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
47 typedef enum EventSourceType {
51 SOURCE_TIME_MONOTONIC,
52 SOURCE_TIME_REALTIME_ALARM,
53 SOURCE_TIME_BOOTTIME_ALARM,
60 _SOURCE_EVENT_SOURCE_TYPE_MAX,
61 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
64 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
66 [SOURCE_TIME_REALTIME] = "realtime",
67 [SOURCE_TIME_BOOTTIME] = "bootime",
68 [SOURCE_TIME_MONOTONIC] = "monotonic",
69 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
70 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
71 [SOURCE_SIGNAL] = "signal",
72 [SOURCE_CHILD] = "child",
73 [SOURCE_DEFER] = "defer",
74 [SOURCE_POST] = "post",
75 [SOURCE_EXIT] = "exit",
76 [SOURCE_WATCHDOG] = "watchdog",
79 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
81 /* All objects we use in epoll events start with this value, so that
82 * we know how to dispatch it */
83 typedef enum WakeupType {
89 _WAKEUP_TYPE_INVALID = -1,
92 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
94 struct sd_event_source {
101 sd_event_handler_t prepare;
105 EventSourceType type:5;
112 unsigned pending_index;
113 unsigned prepare_index;
114 unsigned pending_iteration;
115 unsigned prepare_iteration;
117 LIST_FIELDS(sd_event_source, sources);
121 sd_event_io_handler_t callback;
128 sd_event_time_handler_t callback;
129 usec_t next, accuracy;
130 unsigned earliest_index;
131 unsigned latest_index;
134 sd_event_signal_handler_t callback;
135 struct signalfd_siginfo siginfo;
139 sd_event_child_handler_t callback;
145 sd_event_handler_t callback;
148 sd_event_handler_t callback;
151 sd_event_handler_t callback;
152 unsigned prioq_index;
161 /* For all clocks we maintain two priority queues each, one
162 * ordered for the earliest times the events may be
163 * dispatched, and one ordered by the latest times they must
164 * have been dispatched. The range between the top entries in
165 * the two prioqs is the time window we can freely schedule
178 /* For each priority we maintain one signal fd, so that we
179 * only have to dequeue a single event per priority at a
185 sd_event_source *current;
197 /* timerfd_create() only supports these five clocks so far. We
198 * can add support for more clocks when the kernel learns to
199 * deal with them, too. */
200 struct clock_data realtime;
201 struct clock_data boottime;
202 struct clock_data monotonic;
203 struct clock_data realtime_alarm;
204 struct clock_data boottime_alarm;
208 sd_event_source **signal_sources; /* indexed by signal number */
209 Hashmap *signal_data; /* indexed by priority */
211 Hashmap *child_sources;
212 unsigned n_enabled_child_sources;
221 dual_timestamp timestamp;
222 usec_t timestamp_boottime;
225 bool exit_requested:1;
226 bool need_process_child:1;
228 bool profile_delays:1;
233 sd_event **default_event_ptr;
235 usec_t watchdog_last, watchdog_period;
239 LIST_HEAD(sd_event_source, sources);
241 usec_t last_run, last_log;
242 unsigned delays[sizeof(usec_t) * 8];
245 static void source_disconnect(sd_event_source *s);
247 static int pending_prioq_compare(const void *a, const void *b) {
248 const sd_event_source *x = a, *y = b;
253 /* Enabled ones first */
254 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
256 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
259 /* Lower priority values first */
260 if (x->priority < y->priority)
262 if (x->priority > y->priority)
265 /* Older entries first */
266 if (x->pending_iteration < y->pending_iteration)
268 if (x->pending_iteration > y->pending_iteration)
274 static int prepare_prioq_compare(const void *a, const void *b) {
275 const sd_event_source *x = a, *y = b;
280 /* Enabled ones first */
281 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
283 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
286 /* Move most recently prepared ones last, so that we can stop
287 * preparing as soon as we hit one that has already been
288 * prepared in the current iteration */
289 if (x->prepare_iteration < y->prepare_iteration)
291 if (x->prepare_iteration > y->prepare_iteration)
294 /* Lower priority values first */
295 if (x->priority < y->priority)
297 if (x->priority > y->priority)
303 static int earliest_time_prioq_compare(const void *a, const void *b) {
304 const sd_event_source *x = a, *y = b;
306 assert(EVENT_SOURCE_IS_TIME(x->type));
307 assert(x->type == y->type);
309 /* Enabled ones first */
310 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
312 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
315 /* Move the pending ones to the end */
316 if (!x->pending && y->pending)
318 if (x->pending && !y->pending)
322 if (x->time.next < y->time.next)
324 if (x->time.next > y->time.next)
330 static usec_t time_event_source_latest(const sd_event_source *s) {
331 return usec_add(s->time.next, s->time.accuracy);
334 static int latest_time_prioq_compare(const void *a, const void *b) {
335 const sd_event_source *x = a, *y = b;
337 assert(EVENT_SOURCE_IS_TIME(x->type));
338 assert(x->type == y->type);
340 /* Enabled ones first */
341 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
343 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
346 /* Move the pending ones to the end */
347 if (!x->pending && y->pending)
349 if (x->pending && !y->pending)
353 if (time_event_source_latest(x) < time_event_source_latest(y))
355 if (time_event_source_latest(x) > time_event_source_latest(y))
361 static int exit_prioq_compare(const void *a, const void *b) {
362 const sd_event_source *x = a, *y = b;
364 assert(x->type == SOURCE_EXIT);
365 assert(y->type == SOURCE_EXIT);
367 /* Enabled ones first */
368 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
370 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
373 /* Lower priority values first */
374 if (x->priority < y->priority)
376 if (x->priority > y->priority)
382 static void free_clock_data(struct clock_data *d) {
384 assert(d->wakeup == WAKEUP_CLOCK_DATA);
387 prioq_free(d->earliest);
388 prioq_free(d->latest);
391 static void event_free(sd_event *e) {
396 while ((s = e->sources)) {
398 source_disconnect(s);
399 sd_event_source_unref(s);
402 assert(e->n_sources == 0);
404 if (e->default_event_ptr)
405 *(e->default_event_ptr) = NULL;
407 safe_close(e->epoll_fd);
408 safe_close(e->watchdog_fd);
410 free_clock_data(&e->realtime);
411 free_clock_data(&e->boottime);
412 free_clock_data(&e->monotonic);
413 free_clock_data(&e->realtime_alarm);
414 free_clock_data(&e->boottime_alarm);
416 prioq_free(e->pending);
417 prioq_free(e->prepare);
420 free(e->signal_sources);
421 hashmap_free(e->signal_data);
423 hashmap_free(e->child_sources);
424 set_free(e->post_sources);
428 _public_ int sd_event_new(sd_event** ret) {
432 assert_return(ret, -EINVAL);
434 e = new0(sd_event, 1);
439 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
440 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
441 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
442 e->original_pid = getpid();
443 e->perturb = USEC_INFINITY;
445 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
449 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
450 if (e->epoll_fd < 0) {
455 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
456 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
457 e->profile_delays = true;
468 _public_ sd_event* sd_event_ref(sd_event *e) {
469 assert_return(e, NULL);
471 assert(e->n_ref >= 1);
477 _public_ sd_event* sd_event_unref(sd_event *e) {
482 assert(e->n_ref >= 1);
491 static bool event_pid_changed(sd_event *e) {
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
497 return e->original_pid != getpid();
500 static void source_io_unregister(sd_event_source *s) {
504 assert(s->type == SOURCE_IO);
506 if (event_pid_changed(s->event))
509 if (!s->io.registered)
512 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
514 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s->description), event_source_type_to_string(s->type));
517 s->io.registered = false;
520 static int source_io_register(
525 struct epoll_event ev = {};
529 assert(s->type == SOURCE_IO);
530 assert(enabled != SD_EVENT_OFF);
535 if (enabled == SD_EVENT_ONESHOT)
536 ev.events |= EPOLLONESHOT;
538 if (s->io.registered)
539 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
541 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
545 s->io.registered = true;
550 #if 0 /// UNNEEDED by elogind
551 static clockid_t event_source_type_to_clock(EventSourceType t) {
555 case SOURCE_TIME_REALTIME:
556 return CLOCK_REALTIME;
558 case SOURCE_TIME_BOOTTIME:
559 return CLOCK_BOOTTIME;
561 case SOURCE_TIME_MONOTONIC:
562 return CLOCK_MONOTONIC;
564 case SOURCE_TIME_REALTIME_ALARM:
565 return CLOCK_REALTIME_ALARM;
567 case SOURCE_TIME_BOOTTIME_ALARM:
568 return CLOCK_BOOTTIME_ALARM;
571 return (clockid_t) -1;
576 static EventSourceType clock_to_event_source_type(clockid_t clock) {
581 return SOURCE_TIME_REALTIME;
584 return SOURCE_TIME_BOOTTIME;
586 case CLOCK_MONOTONIC:
587 return SOURCE_TIME_MONOTONIC;
589 case CLOCK_REALTIME_ALARM:
590 return SOURCE_TIME_REALTIME_ALARM;
592 case CLOCK_BOOTTIME_ALARM:
593 return SOURCE_TIME_BOOTTIME_ALARM;
596 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
600 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
605 case SOURCE_TIME_REALTIME:
608 case SOURCE_TIME_BOOTTIME:
611 case SOURCE_TIME_MONOTONIC:
612 return &e->monotonic;
614 case SOURCE_TIME_REALTIME_ALARM:
615 return &e->realtime_alarm;
617 case SOURCE_TIME_BOOTTIME_ALARM:
618 return &e->boottime_alarm;
625 static int event_make_signal_data(
628 struct signal_data **ret) {
630 struct epoll_event ev = {};
631 struct signal_data *d;
639 if (event_pid_changed(e))
642 if (e->signal_sources && e->signal_sources[sig])
643 priority = e->signal_sources[sig]->priority;
647 d = hashmap_get(e->signal_data, &priority);
649 if (sigismember(&d->sigset, sig) > 0) {
655 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
659 d = new0(struct signal_data, 1);
663 d->wakeup = WAKEUP_SIGNAL_DATA;
665 d->priority = priority;
667 r = hashmap_put(e->signal_data, &d->priority, d);
675 assert_se(sigaddset(&ss_copy, sig) >= 0);
677 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
696 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
709 d->fd = safe_close(d->fd);
710 hashmap_remove(e->signal_data, &d->priority);
717 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
721 /* Turns off the specified signal in the signal data
722 * object. If the signal mask of the object becomes empty that
725 if (sigismember(&d->sigset, sig) == 0)
728 assert_se(sigdelset(&d->sigset, sig) >= 0);
730 if (sigisemptyset(&d->sigset)) {
732 /* If all the mask is all-zero we can get rid of the structure */
733 hashmap_remove(e->signal_data, &d->priority);
742 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
743 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
746 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
747 struct signal_data *d;
748 static const int64_t zero_priority = 0;
752 /* Rechecks if the specified signal is still something we are
753 * interested in. If not, we'll unmask it, and possibly drop
754 * the signalfd for it. */
756 if (sig == SIGCHLD &&
757 e->n_enabled_child_sources > 0)
760 if (e->signal_sources &&
761 e->signal_sources[sig] &&
762 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
766 * The specified signal might be enabled in three different queues:
768 * 1) the one that belongs to the priority passed (if it is non-NULL)
769 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
770 * 3) the 0 priority (to cover the SIGCHLD case)
772 * Hence, let's remove it from all three here.
776 d = hashmap_get(e->signal_data, priority);
778 event_unmask_signal_data(e, d, sig);
781 if (e->signal_sources && e->signal_sources[sig]) {
782 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
784 event_unmask_signal_data(e, d, sig);
787 d = hashmap_get(e->signal_data, &zero_priority);
789 event_unmask_signal_data(e, d, sig);
792 static void source_disconnect(sd_event_source *s) {
800 assert(s->event->n_sources > 0);
806 source_io_unregister(s);
810 case SOURCE_TIME_REALTIME:
811 case SOURCE_TIME_BOOTTIME:
812 case SOURCE_TIME_MONOTONIC:
813 case SOURCE_TIME_REALTIME_ALARM:
814 case SOURCE_TIME_BOOTTIME_ALARM: {
815 struct clock_data *d;
817 d = event_get_clock_data(s->event, s->type);
820 prioq_remove(d->earliest, s, &s->time.earliest_index);
821 prioq_remove(d->latest, s, &s->time.latest_index);
822 d->needs_rearm = true;
827 if (s->signal.sig > 0) {
829 if (s->event->signal_sources)
830 s->event->signal_sources[s->signal.sig] = NULL;
832 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
838 if (s->child.pid > 0) {
839 if (s->enabled != SD_EVENT_OFF) {
840 assert(s->event->n_enabled_child_sources > 0);
841 s->event->n_enabled_child_sources--;
844 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
845 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
855 set_remove(s->event->post_sources, s);
859 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
863 assert_not_reached("Wut? I shouldn't exist.");
867 prioq_remove(s->event->pending, s, &s->pending_index);
870 prioq_remove(s->event->prepare, s, &s->prepare_index);
874 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
876 LIST_REMOVE(sources, event->sources, s);
880 sd_event_unref(event);
883 static void source_free(sd_event_source *s) {
886 source_disconnect(s);
887 free(s->description);
891 static int source_set_pending(sd_event_source *s, bool b) {
895 assert(s->type != SOURCE_EXIT);
903 s->pending_iteration = s->event->iteration;
905 r = prioq_put(s->event->pending, s, &s->pending_index);
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
916 d = event_get_clock_data(s->event, s->type);
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
940 s = new0(sd_event_source, 1);
946 s->floating = floating;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
953 LIST_PREPEND(sources, e->sources, s);
959 _public_ int sd_event_add_io(
961 sd_event_source **ret,
964 sd_event_io_handler_t callback,
970 assert_return(e, -EINVAL);
971 assert_return(fd >= 0, -EBADF);
972 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
973 assert_return(callback, -EINVAL);
974 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
975 assert_return(!event_pid_changed(e), -ECHILD);
977 s = source_new(e, !ret, SOURCE_IO);
981 s->wakeup = WAKEUP_EVENT_SOURCE;
983 s->io.events = events;
984 s->io.callback = callback;
985 s->userdata = userdata;
986 s->enabled = SD_EVENT_ON;
988 r = source_io_register(s, s->enabled, events);
1000 static void initialize_perturb(sd_event *e) {
1001 sd_id128_t bootid = {};
1003 /* When we sleep for longer, we try to realign the wakeup to
1004 the same time wihtin each minute/second/250ms, so that
1005 events all across the system can be coalesced into a single
1006 CPU wakeup. However, let's take some system-specific
1007 randomness for this value, so that in a network of systems
1008 with synced clocks timer events are distributed a
1009 bit. Here, we calculate a perturbation usec offset from the
1012 if (_likely_(e->perturb != USEC_INFINITY))
1015 if (sd_id128_get_boot(&bootid) >= 0)
1016 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1019 static int event_setup_timer_fd(
1021 struct clock_data *d,
1024 struct epoll_event ev = {};
1030 if (_likely_(d->fd >= 0))
1033 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1037 ev.events = EPOLLIN;
1040 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1050 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1053 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1056 _public_ int sd_event_add_time(
1058 sd_event_source **ret,
1062 sd_event_time_handler_t callback,
1065 EventSourceType type;
1067 struct clock_data *d;
1070 assert_return(e, -EINVAL);
1071 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1072 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1073 assert_return(!event_pid_changed(e), -ECHILD);
1076 callback = time_exit_callback;
1078 type = clock_to_event_source_type(clock);
1079 assert_return(type >= 0, -EOPNOTSUPP);
1081 d = event_get_clock_data(e, type);
1084 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1088 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1093 r = event_setup_timer_fd(e, d, clock);
1098 s = source_new(e, !ret, type);
1102 s->time.next = usec;
1103 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1104 s->time.callback = callback;
1105 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1106 s->userdata = userdata;
1107 s->enabled = SD_EVENT_ONESHOT;
1109 d->needs_rearm = true;
1111 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1115 r = prioq_put(d->latest, s, &s->time.latest_index);
1129 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1132 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1135 _public_ int sd_event_add_signal(
1137 sd_event_source **ret,
1139 sd_event_signal_handler_t callback,
1143 struct signal_data *d;
1147 assert_return(e, -EINVAL);
1148 assert_return(sig > 0, -EINVAL);
1149 assert_return(sig < _NSIG, -EINVAL);
1150 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1151 assert_return(!event_pid_changed(e), -ECHILD);
1154 callback = signal_exit_callback;
1156 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1160 if (!sigismember(&ss, sig))
1163 if (!e->signal_sources) {
1164 e->signal_sources = new0(sd_event_source*, _NSIG);
1165 if (!e->signal_sources)
1167 } else if (e->signal_sources[sig])
1170 s = source_new(e, !ret, SOURCE_SIGNAL);
1174 s->signal.sig = sig;
1175 s->signal.callback = callback;
1176 s->userdata = userdata;
1177 s->enabled = SD_EVENT_ON;
1179 e->signal_sources[sig] = s;
1181 r = event_make_signal_data(e, sig, &d);
1187 /* Use the signal name as description for the event source by default */
1188 (void) sd_event_source_set_description(s, signal_to_string(sig));
1196 #if 0 /// UNNEEDED by elogind
1197 _public_ int sd_event_add_child(
1199 sd_event_source **ret,
1202 sd_event_child_handler_t callback,
1208 assert_return(e, -EINVAL);
1209 assert_return(pid > 1, -EINVAL);
1210 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1211 assert_return(options != 0, -EINVAL);
1212 assert_return(callback, -EINVAL);
1213 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1214 assert_return(!event_pid_changed(e), -ECHILD);
1216 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1220 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1223 s = source_new(e, !ret, SOURCE_CHILD);
1228 s->child.options = options;
1229 s->child.callback = callback;
1230 s->userdata = userdata;
1231 s->enabled = SD_EVENT_ONESHOT;
1233 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1239 e->n_enabled_child_sources ++;
1241 r = event_make_signal_data(e, SIGCHLD, NULL);
1243 e->n_enabled_child_sources--;
1248 e->need_process_child = true;
1256 _public_ int sd_event_add_defer(
1258 sd_event_source **ret,
1259 sd_event_handler_t callback,
1265 assert_return(e, -EINVAL);
1266 assert_return(callback, -EINVAL);
1267 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1268 assert_return(!event_pid_changed(e), -ECHILD);
1270 s = source_new(e, !ret, SOURCE_DEFER);
1274 s->defer.callback = callback;
1275 s->userdata = userdata;
1276 s->enabled = SD_EVENT_ONESHOT;
1278 r = source_set_pending(s, true);
1291 _public_ int sd_event_add_post(
1293 sd_event_source **ret,
1294 sd_event_handler_t callback,
1300 assert_return(e, -EINVAL);
1301 assert_return(callback, -EINVAL);
1302 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1303 assert_return(!event_pid_changed(e), -ECHILD);
1305 r = set_ensure_allocated(&e->post_sources, NULL);
1309 s = source_new(e, !ret, SOURCE_POST);
1313 s->post.callback = callback;
1314 s->userdata = userdata;
1315 s->enabled = SD_EVENT_ON;
1317 r = set_put(e->post_sources, s);
1329 _public_ int sd_event_add_exit(
1331 sd_event_source **ret,
1332 sd_event_handler_t callback,
1338 assert_return(e, -EINVAL);
1339 assert_return(callback, -EINVAL);
1340 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1341 assert_return(!event_pid_changed(e), -ECHILD);
1343 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1347 s = source_new(e, !ret, SOURCE_EXIT);
1351 s->exit.callback = callback;
1352 s->userdata = userdata;
1353 s->exit.prioq_index = PRIOQ_IDX_NULL;
1354 s->enabled = SD_EVENT_ONESHOT;
1356 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1368 #if 0 /// UNNEEDED by elogind
1369 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1370 assert_return(s, NULL);
1372 assert(s->n_ref >= 1);
1379 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1384 assert(s->n_ref >= 1);
1387 if (s->n_ref <= 0) {
1388 /* Here's a special hack: when we are called from a
1389 * dispatch handler we won't free the event source
1390 * immediately, but we will detach the fd from the
1391 * epoll. This way it is safe for the caller to unref
1392 * the event source and immediately close the fd, but
1393 * we still retain a valid event source object after
1396 if (s->dispatching) {
1397 if (s->type == SOURCE_IO)
1398 source_io_unregister(s);
1400 source_disconnect(s);
1408 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1409 assert_return(s, -EINVAL);
1410 assert_return(!event_pid_changed(s->event), -ECHILD);
1412 return free_and_strdup(&s->description, description);
1415 #if 0 /// UNNEEDED by elogind
1416 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1417 assert_return(s, -EINVAL);
1418 assert_return(description, -EINVAL);
1419 assert_return(s->description, -ENXIO);
1420 assert_return(!event_pid_changed(s->event), -ECHILD);
1422 *description = s->description;
1427 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1428 assert_return(s, NULL);
1433 #if 0 /// UNNEEDED by elogind
1434 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1435 assert_return(s, -EINVAL);
1436 assert_return(s->type != SOURCE_EXIT, -EDOM);
1437 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1438 assert_return(!event_pid_changed(s->event), -ECHILD);
1443 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1444 assert_return(s, -EINVAL);
1445 assert_return(s->type == SOURCE_IO, -EDOM);
1446 assert_return(!event_pid_changed(s->event), -ECHILD);
1452 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1455 assert_return(s, -EINVAL);
1456 assert_return(fd >= 0, -EBADF);
1457 assert_return(s->type == SOURCE_IO, -EDOM);
1458 assert_return(!event_pid_changed(s->event), -ECHILD);
1463 if (s->enabled == SD_EVENT_OFF) {
1465 s->io.registered = false;
1469 saved_fd = s->io.fd;
1470 assert(s->io.registered);
1473 s->io.registered = false;
1475 r = source_io_register(s, s->enabled, s->io.events);
1477 s->io.fd = saved_fd;
1478 s->io.registered = true;
1482 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1488 #if 0 /// UNNEEDED by elogind
1489 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1490 assert_return(s, -EINVAL);
1491 assert_return(events, -EINVAL);
1492 assert_return(s->type == SOURCE_IO, -EDOM);
1493 assert_return(!event_pid_changed(s->event), -ECHILD);
1495 *events = s->io.events;
1500 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1503 assert_return(s, -EINVAL);
1504 assert_return(s->type == SOURCE_IO, -EDOM);
1505 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1506 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1507 assert_return(!event_pid_changed(s->event), -ECHILD);
1509 /* edge-triggered updates are never skipped, so we can reset edges */
1510 if (s->io.events == events && !(events & EPOLLET))
1513 if (s->enabled != SD_EVENT_OFF) {
1514 r = source_io_register(s, s->enabled, events);
1519 s->io.events = events;
1520 source_set_pending(s, false);
1525 #if 0 /// UNNEEDED by elogind
1526 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1527 assert_return(s, -EINVAL);
1528 assert_return(revents, -EINVAL);
1529 assert_return(s->type == SOURCE_IO, -EDOM);
1530 assert_return(s->pending, -ENODATA);
1531 assert_return(!event_pid_changed(s->event), -ECHILD);
1533 *revents = s->io.revents;
1537 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1538 assert_return(s, -EINVAL);
1539 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1540 assert_return(!event_pid_changed(s->event), -ECHILD);
1542 return s->signal.sig;
1545 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1546 assert_return(s, -EINVAL);
1547 assert_return(!event_pid_changed(s->event), -ECHILD);
1553 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1556 assert_return(s, -EINVAL);
1557 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1558 assert_return(!event_pid_changed(s->event), -ECHILD);
1560 if (s->priority == priority)
1563 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1564 struct signal_data *old, *d;
1566 /* Move us from the signalfd belonging to the old
1567 * priority to the signalfd of the new priority */
1569 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1571 s->priority = priority;
1573 r = event_make_signal_data(s->event, s->signal.sig, &d);
1575 s->priority = old->priority;
1579 event_unmask_signal_data(s->event, old, s->signal.sig);
1581 s->priority = priority;
1584 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1587 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1589 if (s->type == SOURCE_EXIT)
1590 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1595 #if 0 /// UNNEEDED by elogind
1596 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1597 assert_return(s, -EINVAL);
1598 assert_return(m, -EINVAL);
1599 assert_return(!event_pid_changed(s->event), -ECHILD);
1606 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1609 assert_return(s, -EINVAL);
1610 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1611 assert_return(!event_pid_changed(s->event), -ECHILD);
1613 /* If we are dead anyway, we are fine with turning off
1614 * sources, but everything else needs to fail. */
1615 if (s->event->state == SD_EVENT_FINISHED)
1616 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1618 if (s->enabled == m)
1621 if (m == SD_EVENT_OFF) {
1626 source_io_unregister(s);
1630 case SOURCE_TIME_REALTIME:
1631 case SOURCE_TIME_BOOTTIME:
1632 case SOURCE_TIME_MONOTONIC:
1633 case SOURCE_TIME_REALTIME_ALARM:
1634 case SOURCE_TIME_BOOTTIME_ALARM: {
1635 struct clock_data *d;
1638 d = event_get_clock_data(s->event, s->type);
1641 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1642 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1643 d->needs_rearm = true;
1650 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1656 assert(s->event->n_enabled_child_sources > 0);
1657 s->event->n_enabled_child_sources--;
1659 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1664 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1673 assert_not_reached("Wut? I shouldn't exist.");
1680 r = source_io_register(s, m, s->io.events);
1687 case SOURCE_TIME_REALTIME:
1688 case SOURCE_TIME_BOOTTIME:
1689 case SOURCE_TIME_MONOTONIC:
1690 case SOURCE_TIME_REALTIME_ALARM:
1691 case SOURCE_TIME_BOOTTIME_ALARM: {
1692 struct clock_data *d;
1695 d = event_get_clock_data(s->event, s->type);
1698 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1699 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1700 d->needs_rearm = true;
1708 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1710 s->enabled = SD_EVENT_OFF;
1711 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1719 if (s->enabled == SD_EVENT_OFF)
1720 s->event->n_enabled_child_sources++;
1724 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1726 s->enabled = SD_EVENT_OFF;
1727 s->event->n_enabled_child_sources--;
1728 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1736 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1745 assert_not_reached("Wut? I shouldn't exist.");
1750 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1753 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1758 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1759 assert_return(s, -EINVAL);
1760 assert_return(usec, -EINVAL);
1761 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1762 assert_return(!event_pid_changed(s->event), -ECHILD);
1764 *usec = s->time.next;
1768 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1769 struct clock_data *d;
1771 assert_return(s, -EINVAL);
1772 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1773 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1774 assert_return(!event_pid_changed(s->event), -ECHILD);
1776 s->time.next = usec;
1778 source_set_pending(s, false);
1780 d = event_get_clock_data(s->event, s->type);
1783 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1784 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1785 d->needs_rearm = true;
1790 #if 0 /// UNNEEDED by elogind
1791 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1792 assert_return(s, -EINVAL);
1793 assert_return(usec, -EINVAL);
1794 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1795 assert_return(!event_pid_changed(s->event), -ECHILD);
1797 *usec = s->time.accuracy;
1801 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1802 struct clock_data *d;
1804 assert_return(s, -EINVAL);
1805 assert_return(usec != (uint64_t) -1, -EINVAL);
1806 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1807 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1808 assert_return(!event_pid_changed(s->event), -ECHILD);
1811 usec = DEFAULT_ACCURACY_USEC;
1813 s->time.accuracy = usec;
1815 source_set_pending(s, false);
1817 d = event_get_clock_data(s->event, s->type);
1820 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1821 d->needs_rearm = true;
1826 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1827 assert_return(s, -EINVAL);
1828 assert_return(clock, -EINVAL);
1829 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1830 assert_return(!event_pid_changed(s->event), -ECHILD);
1832 *clock = event_source_type_to_clock(s->type);
1836 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1837 assert_return(s, -EINVAL);
1838 assert_return(pid, -EINVAL);
1839 assert_return(s->type == SOURCE_CHILD, -EDOM);
1840 assert_return(!event_pid_changed(s->event), -ECHILD);
1842 *pid = s->child.pid;
1847 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1850 assert_return(s, -EINVAL);
1851 assert_return(s->type != SOURCE_EXIT, -EDOM);
1852 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1853 assert_return(!event_pid_changed(s->event), -ECHILD);
1855 if (s->prepare == callback)
1858 if (callback && s->prepare) {
1859 s->prepare = callback;
1863 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1867 s->prepare = callback;
1870 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1874 prioq_remove(s->event->prepare, s, &s->prepare_index);
1879 #if 0 /// UNNEEDED by elogind
1880 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1881 assert_return(s, NULL);
1886 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1889 assert_return(s, NULL);
1892 s->userdata = userdata;
1898 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1905 if (a >= USEC_INFINITY)
1906 return USEC_INFINITY;
1911 initialize_perturb(e);
1914 Find a good time to wake up again between times a and b. We
1915 have two goals here:
1917 a) We want to wake up as seldom as possible, hence prefer
1918 later times over earlier times.
1920 b) But if we have to wake up, then let's make sure to
1921 dispatch as much as possible on the entire system.
1923 We implement this by waking up everywhere at the same time
1924 within any given minute if we can, synchronised via the
1925 perturbation value determined from the boot ID. If we can't,
1926 then we try to find the same spot in every 10s, then 1s and
1927 then 250ms step. Otherwise, we pick the last possible time
1931 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1933 if (_unlikely_(c < USEC_PER_MINUTE))
1936 c -= USEC_PER_MINUTE;
1942 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1944 if (_unlikely_(c < USEC_PER_SEC*10))
1947 c -= USEC_PER_SEC*10;
1953 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1955 if (_unlikely_(c < USEC_PER_SEC))
1964 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1966 if (_unlikely_(c < USEC_PER_MSEC*250))
1969 c -= USEC_PER_MSEC*250;
1978 static int event_arm_timer(
1980 struct clock_data *d) {
1982 struct itimerspec its = {};
1983 sd_event_source *a, *b;
1990 if (!d->needs_rearm)
1993 d->needs_rearm = false;
1995 a = prioq_peek(d->earliest);
1996 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2001 if (d->next == USEC_INFINITY)
2005 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2009 d->next = USEC_INFINITY;
2013 b = prioq_peek(d->latest);
2014 assert_se(b && b->enabled != SD_EVENT_OFF);
2016 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2020 assert_se(d->fd >= 0);
2023 /* We don' want to disarm here, just mean some time looooong ago. */
2024 its.it_value.tv_sec = 0;
2025 its.it_value.tv_nsec = 1;
2027 timespec_store(&its.it_value, t);
2029 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2037 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2040 assert(s->type == SOURCE_IO);
2042 /* If the event source was already pending, we just OR in the
2043 * new revents, otherwise we reset the value. The ORing is
2044 * necessary to handle EPOLLONESHOT events properly where
2045 * readability might happen independently of writability, and
2046 * we need to keep track of both */
2049 s->io.revents |= revents;
2051 s->io.revents = revents;
2053 return source_set_pending(s, true);
2056 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2063 assert_return(events == EPOLLIN, -EIO);
2065 ss = read(fd, &x, sizeof(x));
2067 if (errno == EAGAIN || errno == EINTR)
2073 if (_unlikely_(ss != sizeof(x)))
2077 *next = USEC_INFINITY;
2082 static int process_timer(
2085 struct clock_data *d) {
2094 s = prioq_peek(d->earliest);
2097 s->enabled == SD_EVENT_OFF ||
2101 r = source_set_pending(s, true);
2105 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2106 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2107 d->needs_rearm = true;
2113 static int process_child(sd_event *e) {
2120 e->need_process_child = false;
2123 So, this is ugly. We iteratively invoke waitid() with P_PID
2124 + WNOHANG for each PID we wait for, instead of using
2125 P_ALL. This is because we only want to get child
2126 information of very specific child processes, and not all
2127 of them. We might not have processed the SIGCHLD even of a
2128 previous invocation and we don't want to maintain a
2129 unbounded *per-child* event queue, hence we really don't
2130 want anything flushed out of the kernel's queue that we
2131 don't care about. Since this is O(n) this means that if you
2132 have a lot of processes you probably want to handle SIGCHLD
2135 We do not reap the children here (by using WNOWAIT), this
2136 is only done after the event source is dispatched so that
2137 the callback still sees the process as a zombie.
2140 HASHMAP_FOREACH(s, e->child_sources, i) {
2141 assert(s->type == SOURCE_CHILD);
2146 if (s->enabled == SD_EVENT_OFF)
2149 zero(s->child.siginfo);
2150 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2151 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2155 if (s->child.siginfo.si_pid != 0) {
2157 s->child.siginfo.si_code == CLD_EXITED ||
2158 s->child.siginfo.si_code == CLD_KILLED ||
2159 s->child.siginfo.si_code == CLD_DUMPED;
2161 if (!zombie && (s->child.options & WEXITED)) {
2162 /* If the child isn't dead then let's
2163 * immediately remove the state change
2164 * from the queue, since there's no
2165 * benefit in leaving it queued */
2167 assert(s->child.options & (WSTOPPED|WCONTINUED));
2168 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2171 r = source_set_pending(s, true);
2180 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2181 bool read_one = false;
2185 assert_return(events == EPOLLIN, -EIO);
2187 /* If there's a signal queued on this priority and SIGCHLD is
2188 on this priority too, then make sure to recheck the
2189 children we watch. This is because we only ever dequeue
2190 the first signal per priority, and if we dequeue one, and
2191 SIGCHLD might be enqueued later we wouldn't know, but we
2192 might have higher priority children we care about hence we
2193 need to check that explicitly. */
2195 if (sigismember(&d->sigset, SIGCHLD))
2196 e->need_process_child = true;
2198 /* If there's already an event source pending for this
2199 * priority we don't read another */
2204 struct signalfd_siginfo si;
2206 sd_event_source *s = NULL;
2208 n = read(d->fd, &si, sizeof(si));
2210 if (errno == EAGAIN || errno == EINTR)
2216 if (_unlikely_(n != sizeof(si)))
2219 assert(si.ssi_signo < _NSIG);
2223 if (e->signal_sources)
2224 s = e->signal_sources[si.ssi_signo];
2230 s->signal.siginfo = si;
2233 r = source_set_pending(s, true);
2241 static int source_dispatch(sd_event_source *s) {
2245 assert(s->pending || s->type == SOURCE_EXIT);
2247 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2248 r = source_set_pending(s, false);
2253 if (s->type != SOURCE_POST) {
2257 /* If we execute a non-post source, let's mark all
2258 * post sources as pending */
2260 SET_FOREACH(z, s->event->post_sources, i) {
2261 if (z->enabled == SD_EVENT_OFF)
2264 r = source_set_pending(z, true);
2270 if (s->enabled == SD_EVENT_ONESHOT) {
2271 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2276 s->dispatching = true;
2281 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2284 case SOURCE_TIME_REALTIME:
2285 case SOURCE_TIME_BOOTTIME:
2286 case SOURCE_TIME_MONOTONIC:
2287 case SOURCE_TIME_REALTIME_ALARM:
2288 case SOURCE_TIME_BOOTTIME_ALARM:
2289 r = s->time.callback(s, s->time.next, s->userdata);
2293 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2296 case SOURCE_CHILD: {
2299 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2300 s->child.siginfo.si_code == CLD_KILLED ||
2301 s->child.siginfo.si_code == CLD_DUMPED;
2303 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2305 /* Now, reap the PID for good. */
2307 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2313 r = s->defer.callback(s, s->userdata);
2317 r = s->post.callback(s, s->userdata);
2321 r = s->exit.callback(s, s->userdata);
2324 case SOURCE_WATCHDOG:
2325 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2326 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2327 assert_not_reached("Wut? I shouldn't exist.");
2330 s->dispatching = false;
2333 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2334 strna(s->description), event_source_type_to_string(s->type));
2339 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2344 static int event_prepare(sd_event *e) {
2352 s = prioq_peek(e->prepare);
2353 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2356 s->prepare_iteration = e->iteration;
2357 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2363 s->dispatching = true;
2364 r = s->prepare(s, s->userdata);
2365 s->dispatching = false;
2368 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2369 strna(s->description), event_source_type_to_string(s->type));
2374 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2380 static int dispatch_exit(sd_event *e) {
2386 p = prioq_peek(e->exit);
2387 if (!p || p->enabled == SD_EVENT_OFF) {
2388 e->state = SD_EVENT_FINISHED;
2394 e->state = SD_EVENT_EXITING;
2396 r = source_dispatch(p);
2398 e->state = SD_EVENT_INITIAL;
2404 static sd_event_source* event_next_pending(sd_event *e) {
2409 p = prioq_peek(e->pending);
2413 if (p->enabled == SD_EVENT_OFF)
2419 static int arm_watchdog(sd_event *e) {
2420 struct itimerspec its = {};
2425 assert(e->watchdog_fd >= 0);
2427 t = sleep_between(e,
2428 e->watchdog_last + (e->watchdog_period / 2),
2429 e->watchdog_last + (e->watchdog_period * 3 / 4));
2431 timespec_store(&its.it_value, t);
2433 /* Make sure we never set the watchdog to 0, which tells the
2434 * kernel to disable it. */
2435 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2436 its.it_value.tv_nsec = 1;
2438 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2445 static int process_watchdog(sd_event *e) {
2451 /* Don't notify watchdog too often */
2452 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2455 sd_notify(false, "WATCHDOG=1");
2456 e->watchdog_last = e->timestamp.monotonic;
2458 return arm_watchdog(e);
2461 _public_ int sd_event_prepare(sd_event *e) {
2464 assert_return(e, -EINVAL);
2465 assert_return(!event_pid_changed(e), -ECHILD);
2466 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2467 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2469 if (e->exit_requested)
2474 e->state = SD_EVENT_PREPARING;
2475 r = event_prepare(e);
2476 e->state = SD_EVENT_INITIAL;
2480 r = event_arm_timer(e, &e->realtime);
2484 r = event_arm_timer(e, &e->boottime);
2488 r = event_arm_timer(e, &e->monotonic);
2492 r = event_arm_timer(e, &e->realtime_alarm);
2496 r = event_arm_timer(e, &e->boottime_alarm);
2500 if (event_next_pending(e) || e->need_process_child)
2503 e->state = SD_EVENT_ARMED;
2508 e->state = SD_EVENT_ARMED;
2509 r = sd_event_wait(e, 0);
2511 e->state = SD_EVENT_ARMED;
2516 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2517 struct epoll_event *ev_queue;
2518 unsigned ev_queue_max;
2521 assert_return(e, -EINVAL);
2522 assert_return(!event_pid_changed(e), -ECHILD);
2523 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2524 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2526 if (e->exit_requested) {
2527 e->state = SD_EVENT_PENDING;
2531 ev_queue_max = MAX(e->n_sources, 1u);
2532 ev_queue = newa(struct epoll_event, ev_queue_max);
2534 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2535 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2537 if (errno == EINTR) {
2538 e->state = SD_EVENT_PENDING;
2546 dual_timestamp_get(&e->timestamp);
2547 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2549 for (i = 0; i < m; i++) {
2551 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2552 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2554 WakeupType *t = ev_queue[i].data.ptr;
2558 case WAKEUP_EVENT_SOURCE:
2559 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2562 case WAKEUP_CLOCK_DATA: {
2563 struct clock_data *d = ev_queue[i].data.ptr;
2564 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2568 case WAKEUP_SIGNAL_DATA:
2569 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2573 assert_not_reached("Invalid wake-up pointer");
2580 r = process_watchdog(e);
2584 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2588 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2592 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2596 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2600 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2604 if (e->need_process_child) {
2605 r = process_child(e);
2610 if (event_next_pending(e)) {
2611 e->state = SD_EVENT_PENDING;
2619 e->state = SD_EVENT_INITIAL;
2624 _public_ int sd_event_dispatch(sd_event *e) {
2628 assert_return(e, -EINVAL);
2629 assert_return(!event_pid_changed(e), -ECHILD);
2630 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2631 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2633 if (e->exit_requested)
2634 return dispatch_exit(e);
2636 p = event_next_pending(e);
2640 e->state = SD_EVENT_RUNNING;
2641 r = source_dispatch(p);
2642 e->state = SD_EVENT_INITIAL;
2649 e->state = SD_EVENT_INITIAL;
2654 static void event_log_delays(sd_event *e) {
2655 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2659 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2660 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2663 log_debug("Event loop iterations: %.*s", o, b);
2666 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2669 assert_return(e, -EINVAL);
2670 assert_return(!event_pid_changed(e), -ECHILD);
2671 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2672 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2674 if (e->profile_delays && e->last_run) {
2678 this_run = now(CLOCK_MONOTONIC);
2680 l = u64log2(this_run - e->last_run);
2681 assert(l < sizeof(e->delays));
2684 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2685 event_log_delays(e);
2686 e->last_log = this_run;
2690 r = sd_event_prepare(e);
2692 /* There was nothing? Then wait... */
2693 r = sd_event_wait(e, timeout);
2695 if (e->profile_delays)
2696 e->last_run = now(CLOCK_MONOTONIC);
2699 /* There's something now, then let's dispatch it */
2700 r = sd_event_dispatch(e);
2710 #if 0 /// UNNEEDED by elogind
2711 _public_ int sd_event_loop(sd_event *e) {
2714 assert_return(e, -EINVAL);
2715 assert_return(!event_pid_changed(e), -ECHILD);
2716 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2720 while (e->state != SD_EVENT_FINISHED) {
2721 r = sd_event_run(e, (uint64_t) -1);
2733 _public_ int sd_event_get_fd(sd_event *e) {
2735 assert_return(e, -EINVAL);
2736 assert_return(!event_pid_changed(e), -ECHILD);
2742 _public_ int sd_event_get_state(sd_event *e) {
2743 assert_return(e, -EINVAL);
2744 assert_return(!event_pid_changed(e), -ECHILD);
2749 #if 0 /// UNNEEDED by elogind
2750 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2751 assert_return(e, -EINVAL);
2752 assert_return(code, -EINVAL);
2753 assert_return(!event_pid_changed(e), -ECHILD);
2755 if (!e->exit_requested)
2758 *code = e->exit_code;
2763 _public_ int sd_event_exit(sd_event *e, int code) {
2764 assert_return(e, -EINVAL);
2765 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2766 assert_return(!event_pid_changed(e), -ECHILD);
2768 e->exit_requested = true;
2769 e->exit_code = code;
2774 #if 0 /// UNNEEDED by elogind
2775 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2776 assert_return(e, -EINVAL);
2777 assert_return(usec, -EINVAL);
2778 assert_return(!event_pid_changed(e), -ECHILD);
2779 assert_return(IN_SET(clock,
2781 CLOCK_REALTIME_ALARM,
2784 CLOCK_BOOTTIME_ALARM), -EOPNOTSUPP);
2786 if (!dual_timestamp_is_set(&e->timestamp)) {
2787 /* Implicitly fall back to now() if we never ran
2788 * before and thus have no cached time. */
2795 case CLOCK_REALTIME:
2796 case CLOCK_REALTIME_ALARM:
2797 *usec = e->timestamp.realtime;
2800 case CLOCK_MONOTONIC:
2801 *usec = e->timestamp.monotonic;
2805 *usec = e->timestamp_boottime;
2813 _public_ int sd_event_default(sd_event **ret) {
2815 static thread_local sd_event *default_event = NULL;
2820 return !!default_event;
2822 if (default_event) {
2823 *ret = sd_event_ref(default_event);
2827 r = sd_event_new(&e);
2831 e->default_event_ptr = &default_event;
2839 #if 0 /// UNNEEDED by elogind
2840 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2841 assert_return(e, -EINVAL);
2842 assert_return(tid, -EINVAL);
2843 assert_return(!event_pid_changed(e), -ECHILD);
2854 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2857 assert_return(e, -EINVAL);
2858 assert_return(!event_pid_changed(e), -ECHILD);
2860 if (e->watchdog == !!b)
2864 struct epoll_event ev = {};
2866 r = sd_watchdog_enabled(false, &e->watchdog_period);
2870 /* Issue first ping immediately */
2871 sd_notify(false, "WATCHDOG=1");
2872 e->watchdog_last = now(CLOCK_MONOTONIC);
2874 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2875 if (e->watchdog_fd < 0)
2878 r = arm_watchdog(e);
2882 ev.events = EPOLLIN;
2883 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2885 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2892 if (e->watchdog_fd >= 0) {
2893 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2894 e->watchdog_fd = safe_close(e->watchdog_fd);
2902 e->watchdog_fd = safe_close(e->watchdog_fd);
2906 #if 0 /// UNNEEDED by elogind
2907 _public_ int sd_event_get_watchdog(sd_event *e) {
2908 assert_return(e, -EINVAL);
2909 assert_return(!event_pid_changed(e), -ECHILD);