1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
26 #include "sd-daemon.h"
30 #include "alloc-util.h"
37 #include "process-util.h"
39 #include "signal-util.h"
40 #include "string-util.h"
41 #include "time-util.h"
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46 typedef enum EventSourceType {
50 SOURCE_TIME_MONOTONIC,
51 SOURCE_TIME_REALTIME_ALARM,
52 SOURCE_TIME_BOOTTIME_ALARM,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
63 /* All objects we use in epoll events start with this value, so that
64 * we know how to dispatch it */
65 typedef enum WakeupType {
71 _WAKEUP_TYPE_INVALID = -1,
74 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
76 struct sd_event_source {
83 sd_event_handler_t prepare;
87 EventSourceType type:5;
94 unsigned pending_index;
95 unsigned prepare_index;
96 unsigned pending_iteration;
97 unsigned prepare_iteration;
99 LIST_FIELDS(sd_event_source, sources);
103 sd_event_io_handler_t callback;
110 sd_event_time_handler_t callback;
111 usec_t next, accuracy;
112 unsigned earliest_index;
113 unsigned latest_index;
116 sd_event_signal_handler_t callback;
117 struct signalfd_siginfo siginfo;
121 sd_event_child_handler_t callback;
127 sd_event_handler_t callback;
130 sd_event_handler_t callback;
133 sd_event_handler_t callback;
134 unsigned prioq_index;
143 /* For all clocks we maintain two priority queues each, one
144 * ordered for the earliest times the events may be
145 * dispatched, and one ordered by the latest times they must
146 * have been dispatched. The range between the top entries in
147 * the two prioqs is the time window we can freely schedule
160 /* For each priority we maintain one signal fd, so that we
161 * only have to dequeue a single event per priority at a
167 sd_event_source *current;
179 /* timerfd_create() only supports these five clocks so far. We
180 * can add support for more clocks when the kernel learns to
181 * deal with them, too. */
182 struct clock_data realtime;
183 struct clock_data boottime;
184 struct clock_data monotonic;
185 struct clock_data realtime_alarm;
186 struct clock_data boottime_alarm;
190 sd_event_source **signal_sources; /* indexed by signal number */
191 Hashmap *signal_data; /* indexed by priority */
193 Hashmap *child_sources;
194 unsigned n_enabled_child_sources;
203 dual_timestamp timestamp;
204 usec_t timestamp_boottime;
207 bool exit_requested:1;
208 bool need_process_child:1;
210 bool profile_delays:1;
215 sd_event **default_event_ptr;
217 usec_t watchdog_last, watchdog_period;
221 LIST_HEAD(sd_event_source, sources);
223 usec_t last_run, last_log;
224 unsigned delays[sizeof(usec_t) * 8];
227 static void source_disconnect(sd_event_source *s);
229 static int pending_prioq_compare(const void *a, const void *b) {
230 const sd_event_source *x = a, *y = b;
235 /* Enabled ones first */
236 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
241 /* Lower priority values first */
242 if (x->priority < y->priority)
244 if (x->priority > y->priority)
247 /* Older entries first */
248 if (x->pending_iteration < y->pending_iteration)
250 if (x->pending_iteration > y->pending_iteration)
256 static int prepare_prioq_compare(const void *a, const void *b) {
257 const sd_event_source *x = a, *y = b;
262 /* Enabled ones first */
263 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
265 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
268 /* Move most recently prepared ones last, so that we can stop
269 * preparing as soon as we hit one that has already been
270 * prepared in the current iteration */
271 if (x->prepare_iteration < y->prepare_iteration)
273 if (x->prepare_iteration > y->prepare_iteration)
276 /* Lower priority values first */
277 if (x->priority < y->priority)
279 if (x->priority > y->priority)
285 static int earliest_time_prioq_compare(const void *a, const void *b) {
286 const sd_event_source *x = a, *y = b;
288 assert(EVENT_SOURCE_IS_TIME(x->type));
289 assert(x->type == y->type);
291 /* Enabled ones first */
292 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
294 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
297 /* Move the pending ones to the end */
298 if (!x->pending && y->pending)
300 if (x->pending && !y->pending)
304 if (x->time.next < y->time.next)
306 if (x->time.next > y->time.next)
312 static int latest_time_prioq_compare(const void *a, const void *b) {
313 const sd_event_source *x = a, *y = b;
315 assert(EVENT_SOURCE_IS_TIME(x->type));
316 assert(x->type == y->type);
318 /* Enabled ones first */
319 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
321 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
324 /* Move the pending ones to the end */
325 if (!x->pending && y->pending)
327 if (x->pending && !y->pending)
331 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
333 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
339 static int exit_prioq_compare(const void *a, const void *b) {
340 const sd_event_source *x = a, *y = b;
342 assert(x->type == SOURCE_EXIT);
343 assert(y->type == SOURCE_EXIT);
345 /* Enabled ones first */
346 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
348 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
351 /* Lower priority values first */
352 if (x->priority < y->priority)
354 if (x->priority > y->priority)
360 static void free_clock_data(struct clock_data *d) {
362 assert(d->wakeup == WAKEUP_CLOCK_DATA);
365 prioq_free(d->earliest);
366 prioq_free(d->latest);
369 static void event_free(sd_event *e) {
374 while ((s = e->sources)) {
376 source_disconnect(s);
377 sd_event_source_unref(s);
380 assert(e->n_sources == 0);
382 if (e->default_event_ptr)
383 *(e->default_event_ptr) = NULL;
385 safe_close(e->epoll_fd);
386 safe_close(e->watchdog_fd);
388 free_clock_data(&e->realtime);
389 free_clock_data(&e->boottime);
390 free_clock_data(&e->monotonic);
391 free_clock_data(&e->realtime_alarm);
392 free_clock_data(&e->boottime_alarm);
394 prioq_free(e->pending);
395 prioq_free(e->prepare);
398 free(e->signal_sources);
399 hashmap_free(e->signal_data);
401 hashmap_free(e->child_sources);
402 set_free(e->post_sources);
406 _public_ int sd_event_new(sd_event** ret) {
410 assert_return(ret, -EINVAL);
412 e = new0(sd_event, 1);
417 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
418 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
419 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
420 e->original_pid = getpid();
421 e->perturb = USEC_INFINITY;
423 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
427 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
428 if (e->epoll_fd < 0) {
433 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
434 log_info("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
435 e->profile_delays = true;
446 _public_ sd_event* sd_event_ref(sd_event *e) {
447 assert_return(e, NULL);
449 assert(e->n_ref >= 1);
455 _public_ sd_event* sd_event_unref(sd_event *e) {
460 assert(e->n_ref >= 1);
469 static bool event_pid_changed(sd_event *e) {
472 /* We don't support people creating an event loop and keeping
473 * it around over a fork(). Let's complain. */
475 return e->original_pid != getpid();
478 static void source_io_unregister(sd_event_source *s) {
482 assert(s->type == SOURCE_IO);
484 if (event_pid_changed(s->event))
487 if (!s->io.registered)
490 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
492 log_debug_errno(errno, "Failed to remove source %s from epoll: %m", strna(s->description));
494 s->io.registered = false;
497 static int source_io_register(
502 struct epoll_event ev = {};
506 assert(s->type == SOURCE_IO);
507 assert(enabled != SD_EVENT_OFF);
512 if (enabled == SD_EVENT_ONESHOT)
513 ev.events |= EPOLLONESHOT;
515 if (s->io.registered)
516 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
518 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
522 s->io.registered = true;
527 #if 0 /// UNNEEDED by elogind
528 static clockid_t event_source_type_to_clock(EventSourceType t) {
532 case SOURCE_TIME_REALTIME:
533 return CLOCK_REALTIME;
535 case SOURCE_TIME_BOOTTIME:
536 return CLOCK_BOOTTIME;
538 case SOURCE_TIME_MONOTONIC:
539 return CLOCK_MONOTONIC;
541 case SOURCE_TIME_REALTIME_ALARM:
542 return CLOCK_REALTIME_ALARM;
544 case SOURCE_TIME_BOOTTIME_ALARM:
545 return CLOCK_BOOTTIME_ALARM;
548 return (clockid_t) -1;
553 static EventSourceType clock_to_event_source_type(clockid_t clock) {
558 return SOURCE_TIME_REALTIME;
561 return SOURCE_TIME_BOOTTIME;
563 case CLOCK_MONOTONIC:
564 return SOURCE_TIME_MONOTONIC;
566 case CLOCK_REALTIME_ALARM:
567 return SOURCE_TIME_REALTIME_ALARM;
569 case CLOCK_BOOTTIME_ALARM:
570 return SOURCE_TIME_BOOTTIME_ALARM;
573 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
577 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
582 case SOURCE_TIME_REALTIME:
585 case SOURCE_TIME_BOOTTIME:
588 case SOURCE_TIME_MONOTONIC:
589 return &e->monotonic;
591 case SOURCE_TIME_REALTIME_ALARM:
592 return &e->realtime_alarm;
594 case SOURCE_TIME_BOOTTIME_ALARM:
595 return &e->boottime_alarm;
602 static int event_make_signal_data(
605 struct signal_data **ret) {
607 struct epoll_event ev = {};
608 struct signal_data *d;
616 if (event_pid_changed(e))
619 if (e->signal_sources && e->signal_sources[sig])
620 priority = e->signal_sources[sig]->priority;
624 d = hashmap_get(e->signal_data, &priority);
626 if (sigismember(&d->sigset, sig) > 0) {
632 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
636 d = new0(struct signal_data, 1);
640 d->wakeup = WAKEUP_SIGNAL_DATA;
642 d->priority = priority;
644 r = hashmap_put(e->signal_data, &d->priority, d);
652 assert_se(sigaddset(&ss_copy, sig) >= 0);
654 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
673 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
686 d->fd = safe_close(d->fd);
687 hashmap_remove(e->signal_data, &d->priority);
694 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
698 /* Turns off the specified signal in the signal data
699 * object. If the signal mask of the object becomes empty that
702 if (sigismember(&d->sigset, sig) == 0)
705 assert_se(sigdelset(&d->sigset, sig) >= 0);
707 if (sigisemptyset(&d->sigset)) {
709 /* If all the mask is all-zero we can get rid of the structure */
710 hashmap_remove(e->signal_data, &d->priority);
719 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
720 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
723 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
724 struct signal_data *d;
725 static const int64_t zero_priority = 0;
729 /* Rechecks if the specified signal is still something we are
730 * interested in. If not, we'll unmask it, and possibly drop
731 * the signalfd for it. */
733 if (sig == SIGCHLD &&
734 e->n_enabled_child_sources > 0)
737 if (e->signal_sources &&
738 e->signal_sources[sig] &&
739 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
743 * The specified signal might be enabled in three different queues:
745 * 1) the one that belongs to the priority passed (if it is non-NULL)
746 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
747 * 3) the 0 priority (to cover the SIGCHLD case)
749 * Hence, let's remove it from all three here.
753 d = hashmap_get(e->signal_data, priority);
755 event_unmask_signal_data(e, d, sig);
758 if (e->signal_sources && e->signal_sources[sig]) {
759 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
761 event_unmask_signal_data(e, d, sig);
764 d = hashmap_get(e->signal_data, &zero_priority);
766 event_unmask_signal_data(e, d, sig);
769 static void source_disconnect(sd_event_source *s) {
777 assert(s->event->n_sources > 0);
783 source_io_unregister(s);
787 case SOURCE_TIME_REALTIME:
788 case SOURCE_TIME_BOOTTIME:
789 case SOURCE_TIME_MONOTONIC:
790 case SOURCE_TIME_REALTIME_ALARM:
791 case SOURCE_TIME_BOOTTIME_ALARM: {
792 struct clock_data *d;
794 d = event_get_clock_data(s->event, s->type);
797 prioq_remove(d->earliest, s, &s->time.earliest_index);
798 prioq_remove(d->latest, s, &s->time.latest_index);
799 d->needs_rearm = true;
804 if (s->signal.sig > 0) {
806 if (s->event->signal_sources)
807 s->event->signal_sources[s->signal.sig] = NULL;
809 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
815 if (s->child.pid > 0) {
816 if (s->enabled != SD_EVENT_OFF) {
817 assert(s->event->n_enabled_child_sources > 0);
818 s->event->n_enabled_child_sources--;
821 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
822 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
832 set_remove(s->event->post_sources, s);
836 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
840 assert_not_reached("Wut? I shouldn't exist.");
844 prioq_remove(s->event->pending, s, &s->pending_index);
847 prioq_remove(s->event->prepare, s, &s->prepare_index);
851 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
853 LIST_REMOVE(sources, event->sources, s);
857 sd_event_unref(event);
860 static void source_free(sd_event_source *s) {
863 source_disconnect(s);
864 free(s->description);
868 static int source_set_pending(sd_event_source *s, bool b) {
872 assert(s->type != SOURCE_EXIT);
880 s->pending_iteration = s->event->iteration;
882 r = prioq_put(s->event->pending, s, &s->pending_index);
888 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
890 if (EVENT_SOURCE_IS_TIME(s->type)) {
891 struct clock_data *d;
893 d = event_get_clock_data(s->event, s->type);
896 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
897 prioq_reshuffle(d->latest, s, &s->time.latest_index);
898 d->needs_rearm = true;
901 if (s->type == SOURCE_SIGNAL && !b) {
902 struct signal_data *d;
904 d = hashmap_get(s->event->signal_data, &s->priority);
905 if (d && d->current == s)
912 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
917 s = new0(sd_event_source, 1);
923 s->floating = floating;
925 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
930 LIST_PREPEND(sources, e->sources, s);
936 _public_ int sd_event_add_io(
938 sd_event_source **ret,
941 sd_event_io_handler_t callback,
947 assert_return(e, -EINVAL);
948 assert_return(fd >= 0, -EBADF);
949 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
950 assert_return(callback, -EINVAL);
951 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
952 assert_return(!event_pid_changed(e), -ECHILD);
954 s = source_new(e, !ret, SOURCE_IO);
958 s->wakeup = WAKEUP_EVENT_SOURCE;
960 s->io.events = events;
961 s->io.callback = callback;
962 s->userdata = userdata;
963 s->enabled = SD_EVENT_ON;
965 r = source_io_register(s, s->enabled, events);
977 static void initialize_perturb(sd_event *e) {
978 sd_id128_t bootid = {};
980 /* When we sleep for longer, we try to realign the wakeup to
981 the same time wihtin each minute/second/250ms, so that
982 events all across the system can be coalesced into a single
983 CPU wakeup. However, let's take some system-specific
984 randomness for this value, so that in a network of systems
985 with synced clocks timer events are distributed a
986 bit. Here, we calculate a perturbation usec offset from the
989 if (_likely_(e->perturb != USEC_INFINITY))
992 if (sd_id128_get_boot(&bootid) >= 0)
993 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
996 static int event_setup_timer_fd(
998 struct clock_data *d,
1001 struct epoll_event ev = {};
1007 if (_likely_(d->fd >= 0))
1010 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1014 ev.events = EPOLLIN;
1017 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1027 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1030 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1033 _public_ int sd_event_add_time(
1035 sd_event_source **ret,
1039 sd_event_time_handler_t callback,
1042 EventSourceType type;
1044 struct clock_data *d;
1047 assert_return(e, -EINVAL);
1048 assert_return(usec != (uint64_t) -1, -EINVAL);
1049 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1050 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1051 assert_return(!event_pid_changed(e), -ECHILD);
1054 callback = time_exit_callback;
1056 type = clock_to_event_source_type(clock);
1057 assert_return(type >= 0, -EOPNOTSUPP);
1059 d = event_get_clock_data(e, type);
1062 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1066 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1071 r = event_setup_timer_fd(e, d, clock);
1076 s = source_new(e, !ret, type);
1080 s->time.next = usec;
1081 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1082 s->time.callback = callback;
1083 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1084 s->userdata = userdata;
1085 s->enabled = SD_EVENT_ONESHOT;
1087 d->needs_rearm = true;
1089 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1093 r = prioq_put(d->latest, s, &s->time.latest_index);
1107 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1110 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1113 _public_ int sd_event_add_signal(
1115 sd_event_source **ret,
1117 sd_event_signal_handler_t callback,
1121 struct signal_data *d;
1125 assert_return(e, -EINVAL);
1126 assert_return(sig > 0, -EINVAL);
1127 assert_return(sig < _NSIG, -EINVAL);
1128 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1129 assert_return(!event_pid_changed(e), -ECHILD);
1132 callback = signal_exit_callback;
1134 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1138 if (!sigismember(&ss, sig))
1141 if (!e->signal_sources) {
1142 e->signal_sources = new0(sd_event_source*, _NSIG);
1143 if (!e->signal_sources)
1145 } else if (e->signal_sources[sig])
1148 s = source_new(e, !ret, SOURCE_SIGNAL);
1152 s->signal.sig = sig;
1153 s->signal.callback = callback;
1154 s->userdata = userdata;
1155 s->enabled = SD_EVENT_ON;
1157 e->signal_sources[sig] = s;
1159 r = event_make_signal_data(e, sig, &d);
1165 /* Use the signal name as description for the event source by default */
1166 (void) sd_event_source_set_description(s, signal_to_string(sig));
1174 #if 0 /// UNNEEDED by elogind
1175 _public_ int sd_event_add_child(
1177 sd_event_source **ret,
1180 sd_event_child_handler_t callback,
1186 assert_return(e, -EINVAL);
1187 assert_return(pid > 1, -EINVAL);
1188 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1189 assert_return(options != 0, -EINVAL);
1190 assert_return(callback, -EINVAL);
1191 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1192 assert_return(!event_pid_changed(e), -ECHILD);
1194 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1198 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1201 s = source_new(e, !ret, SOURCE_CHILD);
1206 s->child.options = options;
1207 s->child.callback = callback;
1208 s->userdata = userdata;
1209 s->enabled = SD_EVENT_ONESHOT;
1211 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1217 e->n_enabled_child_sources ++;
1219 r = event_make_signal_data(e, SIGCHLD, NULL);
1221 e->n_enabled_child_sources--;
1226 e->need_process_child = true;
1234 _public_ int sd_event_add_defer(
1236 sd_event_source **ret,
1237 sd_event_handler_t callback,
1243 assert_return(e, -EINVAL);
1244 assert_return(callback, -EINVAL);
1245 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1246 assert_return(!event_pid_changed(e), -ECHILD);
1248 s = source_new(e, !ret, SOURCE_DEFER);
1252 s->defer.callback = callback;
1253 s->userdata = userdata;
1254 s->enabled = SD_EVENT_ONESHOT;
1256 r = source_set_pending(s, true);
1269 _public_ int sd_event_add_post(
1271 sd_event_source **ret,
1272 sd_event_handler_t callback,
1278 assert_return(e, -EINVAL);
1279 assert_return(callback, -EINVAL);
1280 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1281 assert_return(!event_pid_changed(e), -ECHILD);
1283 r = set_ensure_allocated(&e->post_sources, NULL);
1287 s = source_new(e, !ret, SOURCE_POST);
1291 s->post.callback = callback;
1292 s->userdata = userdata;
1293 s->enabled = SD_EVENT_ON;
1295 r = set_put(e->post_sources, s);
1307 _public_ int sd_event_add_exit(
1309 sd_event_source **ret,
1310 sd_event_handler_t callback,
1316 assert_return(e, -EINVAL);
1317 assert_return(callback, -EINVAL);
1318 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1319 assert_return(!event_pid_changed(e), -ECHILD);
1321 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1325 s = source_new(e, !ret, SOURCE_EXIT);
1329 s->exit.callback = callback;
1330 s->userdata = userdata;
1331 s->exit.prioq_index = PRIOQ_IDX_NULL;
1332 s->enabled = SD_EVENT_ONESHOT;
1334 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1346 #if 0 /// UNNEEDED by elogind
1347 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1348 assert_return(s, NULL);
1350 assert(s->n_ref >= 1);
1357 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1362 assert(s->n_ref >= 1);
1365 if (s->n_ref <= 0) {
1366 /* Here's a special hack: when we are called from a
1367 * dispatch handler we won't free the event source
1368 * immediately, but we will detach the fd from the
1369 * epoll. This way it is safe for the caller to unref
1370 * the event source and immediately close the fd, but
1371 * we still retain a valid event source object after
1374 if (s->dispatching) {
1375 if (s->type == SOURCE_IO)
1376 source_io_unregister(s);
1378 source_disconnect(s);
1386 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1387 assert_return(s, -EINVAL);
1388 assert_return(!event_pid_changed(s->event), -ECHILD);
1390 return free_and_strdup(&s->description, description);
1393 #if 0 /// UNNEEDED by elogind
1394 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1395 assert_return(s, -EINVAL);
1396 assert_return(description, -EINVAL);
1397 assert_return(s->description, -ENXIO);
1398 assert_return(!event_pid_changed(s->event), -ECHILD);
1400 *description = s->description;
1405 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1406 assert_return(s, NULL);
1411 #if 0 /// UNNEEDED by elogind
1412 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1413 assert_return(s, -EINVAL);
1414 assert_return(s->type != SOURCE_EXIT, -EDOM);
1415 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1416 assert_return(!event_pid_changed(s->event), -ECHILD);
1421 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1422 assert_return(s, -EINVAL);
1423 assert_return(s->type == SOURCE_IO, -EDOM);
1424 assert_return(!event_pid_changed(s->event), -ECHILD);
1430 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1433 assert_return(s, -EINVAL);
1434 assert_return(fd >= 0, -EBADF);
1435 assert_return(s->type == SOURCE_IO, -EDOM);
1436 assert_return(!event_pid_changed(s->event), -ECHILD);
1441 if (s->enabled == SD_EVENT_OFF) {
1443 s->io.registered = false;
1447 saved_fd = s->io.fd;
1448 assert(s->io.registered);
1451 s->io.registered = false;
1453 r = source_io_register(s, s->enabled, s->io.events);
1455 s->io.fd = saved_fd;
1456 s->io.registered = true;
1460 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1466 #if 0 /// UNNEEDED by elogind
1467 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1468 assert_return(s, -EINVAL);
1469 assert_return(events, -EINVAL);
1470 assert_return(s->type == SOURCE_IO, -EDOM);
1471 assert_return(!event_pid_changed(s->event), -ECHILD);
1473 *events = s->io.events;
1478 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1481 assert_return(s, -EINVAL);
1482 assert_return(s->type == SOURCE_IO, -EDOM);
1483 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1484 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1485 assert_return(!event_pid_changed(s->event), -ECHILD);
1487 /* edge-triggered updates are never skipped, so we can reset edges */
1488 if (s->io.events == events && !(events & EPOLLET))
1491 if (s->enabled != SD_EVENT_OFF) {
1492 r = source_io_register(s, s->enabled, events);
1497 s->io.events = events;
1498 source_set_pending(s, false);
1503 #if 0 /// UNNEEDED by elogind
1504 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1505 assert_return(s, -EINVAL);
1506 assert_return(revents, -EINVAL);
1507 assert_return(s->type == SOURCE_IO, -EDOM);
1508 assert_return(s->pending, -ENODATA);
1509 assert_return(!event_pid_changed(s->event), -ECHILD);
1511 *revents = s->io.revents;
1515 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1516 assert_return(s, -EINVAL);
1517 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1518 assert_return(!event_pid_changed(s->event), -ECHILD);
1520 return s->signal.sig;
1523 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1524 assert_return(s, -EINVAL);
1525 assert_return(!event_pid_changed(s->event), -ECHILD);
1531 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1534 assert_return(s, -EINVAL);
1535 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1536 assert_return(!event_pid_changed(s->event), -ECHILD);
1538 if (s->priority == priority)
1541 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1542 struct signal_data *old, *d;
1544 /* Move us from the signalfd belonging to the old
1545 * priority to the signalfd of the new priority */
1547 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1549 s->priority = priority;
1551 r = event_make_signal_data(s->event, s->signal.sig, &d);
1553 s->priority = old->priority;
1557 event_unmask_signal_data(s->event, old, s->signal.sig);
1559 s->priority = priority;
1562 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1565 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1567 if (s->type == SOURCE_EXIT)
1568 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1573 #if 0 /// UNNEEDED by elogind
1574 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1575 assert_return(s, -EINVAL);
1576 assert_return(m, -EINVAL);
1577 assert_return(!event_pid_changed(s->event), -ECHILD);
1584 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1587 assert_return(s, -EINVAL);
1588 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1589 assert_return(!event_pid_changed(s->event), -ECHILD);
1591 /* If we are dead anyway, we are fine with turning off
1592 * sources, but everything else needs to fail. */
1593 if (s->event->state == SD_EVENT_FINISHED)
1594 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1596 if (s->enabled == m)
1599 if (m == SD_EVENT_OFF) {
1604 source_io_unregister(s);
1608 case SOURCE_TIME_REALTIME:
1609 case SOURCE_TIME_BOOTTIME:
1610 case SOURCE_TIME_MONOTONIC:
1611 case SOURCE_TIME_REALTIME_ALARM:
1612 case SOURCE_TIME_BOOTTIME_ALARM: {
1613 struct clock_data *d;
1616 d = event_get_clock_data(s->event, s->type);
1619 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1620 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1621 d->needs_rearm = true;
1628 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1634 assert(s->event->n_enabled_child_sources > 0);
1635 s->event->n_enabled_child_sources--;
1637 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1642 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1651 assert_not_reached("Wut? I shouldn't exist.");
1658 r = source_io_register(s, m, s->io.events);
1665 case SOURCE_TIME_REALTIME:
1666 case SOURCE_TIME_BOOTTIME:
1667 case SOURCE_TIME_MONOTONIC:
1668 case SOURCE_TIME_REALTIME_ALARM:
1669 case SOURCE_TIME_BOOTTIME_ALARM: {
1670 struct clock_data *d;
1673 d = event_get_clock_data(s->event, s->type);
1676 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1677 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1678 d->needs_rearm = true;
1686 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1688 s->enabled = SD_EVENT_OFF;
1689 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1697 if (s->enabled == SD_EVENT_OFF)
1698 s->event->n_enabled_child_sources++;
1702 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1704 s->enabled = SD_EVENT_OFF;
1705 s->event->n_enabled_child_sources--;
1706 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1714 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1723 assert_not_reached("Wut? I shouldn't exist.");
1728 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1731 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1736 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1737 assert_return(s, -EINVAL);
1738 assert_return(usec, -EINVAL);
1739 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1740 assert_return(!event_pid_changed(s->event), -ECHILD);
1742 *usec = s->time.next;
1746 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1747 struct clock_data *d;
1749 assert_return(s, -EINVAL);
1750 assert_return(usec != (uint64_t) -1, -EINVAL);
1751 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1752 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1753 assert_return(!event_pid_changed(s->event), -ECHILD);
1755 s->time.next = usec;
1757 source_set_pending(s, false);
1759 d = event_get_clock_data(s->event, s->type);
1762 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1763 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1764 d->needs_rearm = true;
1769 #if 0 /// UNNEEDED by elogind
1770 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1771 assert_return(s, -EINVAL);
1772 assert_return(usec, -EINVAL);
1773 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1774 assert_return(!event_pid_changed(s->event), -ECHILD);
1776 *usec = s->time.accuracy;
1780 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1781 struct clock_data *d;
1783 assert_return(s, -EINVAL);
1784 assert_return(usec != (uint64_t) -1, -EINVAL);
1785 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1786 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1787 assert_return(!event_pid_changed(s->event), -ECHILD);
1790 usec = DEFAULT_ACCURACY_USEC;
1792 s->time.accuracy = usec;
1794 source_set_pending(s, false);
1796 d = event_get_clock_data(s->event, s->type);
1799 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1800 d->needs_rearm = true;
1805 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1806 assert_return(s, -EINVAL);
1807 assert_return(clock, -EINVAL);
1808 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1809 assert_return(!event_pid_changed(s->event), -ECHILD);
1811 *clock = event_source_type_to_clock(s->type);
1815 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1816 assert_return(s, -EINVAL);
1817 assert_return(pid, -EINVAL);
1818 assert_return(s->type == SOURCE_CHILD, -EDOM);
1819 assert_return(!event_pid_changed(s->event), -ECHILD);
1821 *pid = s->child.pid;
1826 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1829 assert_return(s, -EINVAL);
1830 assert_return(s->type != SOURCE_EXIT, -EDOM);
1831 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1832 assert_return(!event_pid_changed(s->event), -ECHILD);
1834 if (s->prepare == callback)
1837 if (callback && s->prepare) {
1838 s->prepare = callback;
1842 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1846 s->prepare = callback;
1849 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1853 prioq_remove(s->event->prepare, s, &s->prepare_index);
1858 #if 0 /// UNNEEDED by elogind
1859 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1860 assert_return(s, NULL);
1865 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1868 assert_return(s, NULL);
1871 s->userdata = userdata;
1877 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1888 initialize_perturb(e);
1891 Find a good time to wake up again between times a and b. We
1892 have two goals here:
1894 a) We want to wake up as seldom as possible, hence prefer
1895 later times over earlier times.
1897 b) But if we have to wake up, then let's make sure to
1898 dispatch as much as possible on the entire system.
1900 We implement this by waking up everywhere at the same time
1901 within any given minute if we can, synchronised via the
1902 perturbation value determined from the boot ID. If we can't,
1903 then we try to find the same spot in every 10s, then 1s and
1904 then 250ms step. Otherwise, we pick the last possible time
1908 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1910 if (_unlikely_(c < USEC_PER_MINUTE))
1913 c -= USEC_PER_MINUTE;
1919 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1921 if (_unlikely_(c < USEC_PER_SEC*10))
1924 c -= USEC_PER_SEC*10;
1930 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1932 if (_unlikely_(c < USEC_PER_SEC))
1941 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1943 if (_unlikely_(c < USEC_PER_MSEC*250))
1946 c -= USEC_PER_MSEC*250;
1955 static int event_arm_timer(
1957 struct clock_data *d) {
1959 struct itimerspec its = {};
1960 sd_event_source *a, *b;
1967 if (!d->needs_rearm)
1970 d->needs_rearm = false;
1972 a = prioq_peek(d->earliest);
1973 if (!a || a->enabled == SD_EVENT_OFF) {
1978 if (d->next == USEC_INFINITY)
1982 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1986 d->next = USEC_INFINITY;
1990 b = prioq_peek(d->latest);
1991 assert_se(b && b->enabled != SD_EVENT_OFF);
1993 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1997 assert_se(d->fd >= 0);
2000 /* We don' want to disarm here, just mean some time looooong ago. */
2001 its.it_value.tv_sec = 0;
2002 its.it_value.tv_nsec = 1;
2004 timespec_store(&its.it_value, t);
2006 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2014 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2017 assert(s->type == SOURCE_IO);
2019 /* If the event source was already pending, we just OR in the
2020 * new revents, otherwise we reset the value. The ORing is
2021 * necessary to handle EPOLLONESHOT events properly where
2022 * readability might happen independently of writability, and
2023 * we need to keep track of both */
2026 s->io.revents |= revents;
2028 s->io.revents = revents;
2030 return source_set_pending(s, true);
2033 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2040 assert_return(events == EPOLLIN, -EIO);
2042 ss = read(fd, &x, sizeof(x));
2044 if (errno == EAGAIN || errno == EINTR)
2050 if (_unlikely_(ss != sizeof(x)))
2054 *next = USEC_INFINITY;
2059 static int process_timer(
2062 struct clock_data *d) {
2071 s = prioq_peek(d->earliest);
2074 s->enabled == SD_EVENT_OFF ||
2078 r = source_set_pending(s, true);
2082 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2083 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2084 d->needs_rearm = true;
2090 static int process_child(sd_event *e) {
2097 e->need_process_child = false;
2100 So, this is ugly. We iteratively invoke waitid() with P_PID
2101 + WNOHANG for each PID we wait for, instead of using
2102 P_ALL. This is because we only want to get child
2103 information of very specific child processes, and not all
2104 of them. We might not have processed the SIGCHLD even of a
2105 previous invocation and we don't want to maintain a
2106 unbounded *per-child* event queue, hence we really don't
2107 want anything flushed out of the kernel's queue that we
2108 don't care about. Since this is O(n) this means that if you
2109 have a lot of processes you probably want to handle SIGCHLD
2112 We do not reap the children here (by using WNOWAIT), this
2113 is only done after the event source is dispatched so that
2114 the callback still sees the process as a zombie.
2117 HASHMAP_FOREACH(s, e->child_sources, i) {
2118 assert(s->type == SOURCE_CHILD);
2123 if (s->enabled == SD_EVENT_OFF)
2126 zero(s->child.siginfo);
2127 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2128 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2132 if (s->child.siginfo.si_pid != 0) {
2134 s->child.siginfo.si_code == CLD_EXITED ||
2135 s->child.siginfo.si_code == CLD_KILLED ||
2136 s->child.siginfo.si_code == CLD_DUMPED;
2138 if (!zombie && (s->child.options & WEXITED)) {
2139 /* If the child isn't dead then let's
2140 * immediately remove the state change
2141 * from the queue, since there's no
2142 * benefit in leaving it queued */
2144 assert(s->child.options & (WSTOPPED|WCONTINUED));
2145 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2148 r = source_set_pending(s, true);
2157 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2158 bool read_one = false;
2162 assert_return(events == EPOLLIN, -EIO);
2164 /* If there's a signal queued on this priority and SIGCHLD is
2165 on this priority too, then make sure to recheck the
2166 children we watch. This is because we only ever dequeue
2167 the first signal per priority, and if we dequeue one, and
2168 SIGCHLD might be enqueued later we wouldn't know, but we
2169 might have higher priority children we care about hence we
2170 need to check that explicitly. */
2172 if (sigismember(&d->sigset, SIGCHLD))
2173 e->need_process_child = true;
2175 /* If there's already an event source pending for this
2176 * priority we don't read another */
2181 struct signalfd_siginfo si;
2183 sd_event_source *s = NULL;
2185 n = read(d->fd, &si, sizeof(si));
2187 if (errno == EAGAIN || errno == EINTR)
2193 if (_unlikely_(n != sizeof(si)))
2196 assert(si.ssi_signo < _NSIG);
2200 if (e->signal_sources)
2201 s = e->signal_sources[si.ssi_signo];
2207 s->signal.siginfo = si;
2210 r = source_set_pending(s, true);
2218 static int source_dispatch(sd_event_source *s) {
2222 assert(s->pending || s->type == SOURCE_EXIT);
2224 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2225 r = source_set_pending(s, false);
2230 if (s->type != SOURCE_POST) {
2234 /* If we execute a non-post source, let's mark all
2235 * post sources as pending */
2237 SET_FOREACH(z, s->event->post_sources, i) {
2238 if (z->enabled == SD_EVENT_OFF)
2241 r = source_set_pending(z, true);
2247 if (s->enabled == SD_EVENT_ONESHOT) {
2248 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2253 s->dispatching = true;
2258 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2261 case SOURCE_TIME_REALTIME:
2262 case SOURCE_TIME_BOOTTIME:
2263 case SOURCE_TIME_MONOTONIC:
2264 case SOURCE_TIME_REALTIME_ALARM:
2265 case SOURCE_TIME_BOOTTIME_ALARM:
2266 r = s->time.callback(s, s->time.next, s->userdata);
2270 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2273 case SOURCE_CHILD: {
2276 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2277 s->child.siginfo.si_code == CLD_KILLED ||
2278 s->child.siginfo.si_code == CLD_DUMPED;
2280 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2282 /* Now, reap the PID for good. */
2284 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2290 r = s->defer.callback(s, s->userdata);
2294 r = s->post.callback(s, s->userdata);
2298 r = s->exit.callback(s, s->userdata);
2301 case SOURCE_WATCHDOG:
2302 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2303 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2304 assert_not_reached("Wut? I shouldn't exist.");
2307 s->dispatching = false;
2311 log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2313 log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2319 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2324 static int event_prepare(sd_event *e) {
2332 s = prioq_peek(e->prepare);
2333 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2336 s->prepare_iteration = e->iteration;
2337 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2343 s->dispatching = true;
2344 r = s->prepare(s, s->userdata);
2345 s->dispatching = false;
2349 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2351 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2357 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2363 static int dispatch_exit(sd_event *e) {
2369 p = prioq_peek(e->exit);
2370 if (!p || p->enabled == SD_EVENT_OFF) {
2371 e->state = SD_EVENT_FINISHED;
2377 e->state = SD_EVENT_EXITING;
2379 r = source_dispatch(p);
2381 e->state = SD_EVENT_INITIAL;
2387 static sd_event_source* event_next_pending(sd_event *e) {
2392 p = prioq_peek(e->pending);
2396 if (p->enabled == SD_EVENT_OFF)
2402 static int arm_watchdog(sd_event *e) {
2403 struct itimerspec its = {};
2408 assert(e->watchdog_fd >= 0);
2410 t = sleep_between(e,
2411 e->watchdog_last + (e->watchdog_period / 2),
2412 e->watchdog_last + (e->watchdog_period * 3 / 4));
2414 timespec_store(&its.it_value, t);
2416 /* Make sure we never set the watchdog to 0, which tells the
2417 * kernel to disable it. */
2418 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2419 its.it_value.tv_nsec = 1;
2421 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2428 static int process_watchdog(sd_event *e) {
2434 /* Don't notify watchdog too often */
2435 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2438 sd_notify(false, "WATCHDOG=1");
2439 e->watchdog_last = e->timestamp.monotonic;
2441 return arm_watchdog(e);
2444 _public_ int sd_event_prepare(sd_event *e) {
2447 assert_return(e, -EINVAL);
2448 assert_return(!event_pid_changed(e), -ECHILD);
2449 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2450 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2452 if (e->exit_requested)
2457 e->state = SD_EVENT_PREPARING;
2458 r = event_prepare(e);
2459 e->state = SD_EVENT_INITIAL;
2463 r = event_arm_timer(e, &e->realtime);
2467 r = event_arm_timer(e, &e->boottime);
2471 r = event_arm_timer(e, &e->monotonic);
2475 r = event_arm_timer(e, &e->realtime_alarm);
2479 r = event_arm_timer(e, &e->boottime_alarm);
2483 if (event_next_pending(e) || e->need_process_child)
2486 e->state = SD_EVENT_ARMED;
2491 e->state = SD_EVENT_ARMED;
2492 r = sd_event_wait(e, 0);
2494 e->state = SD_EVENT_ARMED;
2499 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2500 struct epoll_event *ev_queue;
2501 unsigned ev_queue_max;
2504 assert_return(e, -EINVAL);
2505 assert_return(!event_pid_changed(e), -ECHILD);
2506 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2507 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2509 if (e->exit_requested) {
2510 e->state = SD_EVENT_PENDING;
2514 ev_queue_max = MAX(e->n_sources, 1u);
2515 ev_queue = newa(struct epoll_event, ev_queue_max);
2517 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2518 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2520 if (errno == EINTR) {
2521 e->state = SD_EVENT_PENDING;
2529 dual_timestamp_get(&e->timestamp);
2530 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2532 for (i = 0; i < m; i++) {
2534 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2535 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2537 WakeupType *t = ev_queue[i].data.ptr;
2541 case WAKEUP_EVENT_SOURCE:
2542 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2545 case WAKEUP_CLOCK_DATA: {
2546 struct clock_data *d = ev_queue[i].data.ptr;
2547 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2551 case WAKEUP_SIGNAL_DATA:
2552 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2556 assert_not_reached("Invalid wake-up pointer");
2563 r = process_watchdog(e);
2567 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2571 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2575 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2579 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2583 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2587 if (e->need_process_child) {
2588 r = process_child(e);
2593 if (event_next_pending(e)) {
2594 e->state = SD_EVENT_PENDING;
2602 e->state = SD_EVENT_INITIAL;
2607 _public_ int sd_event_dispatch(sd_event *e) {
2611 assert_return(e, -EINVAL);
2612 assert_return(!event_pid_changed(e), -ECHILD);
2613 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2614 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2616 if (e->exit_requested)
2617 return dispatch_exit(e);
2619 p = event_next_pending(e);
2623 e->state = SD_EVENT_RUNNING;
2624 r = source_dispatch(p);
2625 e->state = SD_EVENT_INITIAL;
2632 e->state = SD_EVENT_INITIAL;
2637 static void event_log_delays(sd_event *e) {
2638 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2641 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2642 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2645 log_info("Event loop iterations: %.*s", o, b);
2648 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2651 assert_return(e, -EINVAL);
2652 assert_return(!event_pid_changed(e), -ECHILD);
2653 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2654 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2656 if (e->profile_delays && e->last_run) {
2660 this_run = now(CLOCK_MONOTONIC);
2662 l = u64log2(this_run - e->last_run);
2663 assert(l < sizeof(e->delays));
2666 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2667 event_log_delays(e);
2668 e->last_log = this_run;
2672 r = sd_event_prepare(e);
2674 /* There was nothing? Then wait... */
2675 r = sd_event_wait(e, timeout);
2677 if (e->profile_delays)
2678 e->last_run = now(CLOCK_MONOTONIC);
2681 /* There's something now, then let's dispatch it */
2682 r = sd_event_dispatch(e);
2692 #if 0 /// UNNEEDED by elogind
2693 _public_ int sd_event_loop(sd_event *e) {
2696 assert_return(e, -EINVAL);
2697 assert_return(!event_pid_changed(e), -ECHILD);
2698 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2702 while (e->state != SD_EVENT_FINISHED) {
2703 r = sd_event_run(e, (uint64_t) -1);
2715 _public_ int sd_event_get_fd(sd_event *e) {
2717 assert_return(e, -EINVAL);
2718 assert_return(!event_pid_changed(e), -ECHILD);
2724 _public_ int sd_event_get_state(sd_event *e) {
2725 assert_return(e, -EINVAL);
2726 assert_return(!event_pid_changed(e), -ECHILD);
2731 #if 0 /// UNNEEDED by elogind
2732 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2733 assert_return(e, -EINVAL);
2734 assert_return(code, -EINVAL);
2735 assert_return(!event_pid_changed(e), -ECHILD);
2737 if (!e->exit_requested)
2740 *code = e->exit_code;
2745 _public_ int sd_event_exit(sd_event *e, int code) {
2746 assert_return(e, -EINVAL);
2747 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2748 assert_return(!event_pid_changed(e), -ECHILD);
2750 e->exit_requested = true;
2751 e->exit_code = code;
2756 #if 0 /// UNNEEDED by elogind
2757 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2758 assert_return(e, -EINVAL);
2759 assert_return(usec, -EINVAL);
2760 assert_return(!event_pid_changed(e), -ECHILD);
2762 if (!dual_timestamp_is_set(&e->timestamp)) {
2763 /* Implicitly fall back to now() if we never ran
2764 * before and thus have no cached time. */
2771 case CLOCK_REALTIME:
2772 case CLOCK_REALTIME_ALARM:
2773 *usec = e->timestamp.realtime;
2776 case CLOCK_MONOTONIC:
2777 *usec = e->timestamp.monotonic;
2780 case CLOCK_BOOTTIME:
2781 case CLOCK_BOOTTIME_ALARM:
2782 *usec = e->timestamp_boottime;
2790 _public_ int sd_event_default(sd_event **ret) {
2792 static thread_local sd_event *default_event = NULL;
2797 return !!default_event;
2799 if (default_event) {
2800 *ret = sd_event_ref(default_event);
2804 r = sd_event_new(&e);
2808 e->default_event_ptr = &default_event;
2816 #if 0 /// UNNEEDED by elogind
2817 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2818 assert_return(e, -EINVAL);
2819 assert_return(tid, -EINVAL);
2820 assert_return(!event_pid_changed(e), -ECHILD);
2831 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2834 assert_return(e, -EINVAL);
2835 assert_return(!event_pid_changed(e), -ECHILD);
2837 if (e->watchdog == !!b)
2841 struct epoll_event ev = {};
2843 r = sd_watchdog_enabled(false, &e->watchdog_period);
2847 /* Issue first ping immediately */
2848 sd_notify(false, "WATCHDOG=1");
2849 e->watchdog_last = now(CLOCK_MONOTONIC);
2851 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2852 if (e->watchdog_fd < 0)
2855 r = arm_watchdog(e);
2859 ev.events = EPOLLIN;
2860 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2862 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2869 if (e->watchdog_fd >= 0) {
2870 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2871 e->watchdog_fd = safe_close(e->watchdog_fd);
2879 e->watchdog_fd = safe_close(e->watchdog_fd);
2883 #if 0 /// UNNEEDED by elogind
2884 _public_ int sd_event_get_watchdog(sd_event *e) {
2885 assert_return(e, -EINVAL);
2886 assert_return(!event_pid_changed(e), -ECHILD);