1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
28 #include "sd-daemon.h"
33 #include "time-util.h"
40 #define EPOLL_QUEUE_MAX 512U
41 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
43 typedef enum EventSourceType {
47 SOURCE_TIME_MONOTONIC,
48 SOURCE_TIME_REALTIME_ALARM,
49 SOURCE_TIME_BOOTTIME_ALARM,
56 _SOURCE_EVENT_SOURCE_TYPE_MAX,
57 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
60 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
62 struct sd_event_source {
67 sd_event_handler_t prepare;
71 EventSourceType type:5;
78 unsigned pending_index;
79 unsigned prepare_index;
80 unsigned pending_iteration;
81 unsigned prepare_iteration;
83 LIST_FIELDS(sd_event_source, sources);
87 sd_event_io_handler_t callback;
94 sd_event_time_handler_t callback;
95 usec_t next, accuracy;
96 unsigned earliest_index;
97 unsigned latest_index;
100 sd_event_signal_handler_t callback;
101 struct signalfd_siginfo siginfo;
105 sd_event_child_handler_t callback;
111 sd_event_handler_t callback;
114 sd_event_handler_t callback;
117 sd_event_handler_t callback;
118 unsigned prioq_index;
126 /* For all clocks we maintain two priority queues each, one
127 * ordered for the earliest times the events may be
128 * dispatched, and one ordered by the latest times they must
129 * have been dispatched. The range between the top entries in
130 * the two prioqs is the time window we can freely schedule
150 /* timerfd_create() only supports these five clocks so far. We
151 * can add support for more clocks when the kernel learns to
152 * deal with them, too. */
153 struct clock_data realtime;
154 struct clock_data boottime;
155 struct clock_data monotonic;
156 struct clock_data realtime_alarm;
157 struct clock_data boottime_alarm;
162 sd_event_source **signal_sources;
164 Hashmap *child_sources;
165 unsigned n_enabled_child_sources;
174 dual_timestamp timestamp;
175 usec_t timestamp_boottime;
178 bool exit_requested:1;
179 bool need_process_child:1;
185 sd_event **default_event_ptr;
187 usec_t watchdog_last, watchdog_period;
191 LIST_HEAD(sd_event_source, sources);
194 static void source_disconnect(sd_event_source *s);
196 static int pending_prioq_compare(const void *a, const void *b) {
197 const sd_event_source *x = a, *y = b;
202 /* Enabled ones first */
203 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
205 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
208 /* Lower priority values first */
209 if (x->priority < y->priority)
211 if (x->priority > y->priority)
214 /* Older entries first */
215 if (x->pending_iteration < y->pending_iteration)
217 if (x->pending_iteration > y->pending_iteration)
220 /* Stability for the rest */
229 static int prepare_prioq_compare(const void *a, const void *b) {
230 const sd_event_source *x = a, *y = b;
235 /* Move most recently prepared ones last, so that we can stop
236 * preparing as soon as we hit one that has already been
237 * prepared in the current iteration */
238 if (x->prepare_iteration < y->prepare_iteration)
240 if (x->prepare_iteration > y->prepare_iteration)
243 /* Enabled ones first */
244 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
246 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
249 /* Lower priority values first */
250 if (x->priority < y->priority)
252 if (x->priority > y->priority)
255 /* Stability for the rest */
264 static int earliest_time_prioq_compare(const void *a, const void *b) {
265 const sd_event_source *x = a, *y = b;
267 assert(EVENT_SOURCE_IS_TIME(x->type));
268 assert(x->type == y->type);
270 /* Enabled ones first */
271 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
273 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
276 /* Move the pending ones to the end */
277 if (!x->pending && y->pending)
279 if (x->pending && !y->pending)
283 if (x->time.next < y->time.next)
285 if (x->time.next > y->time.next)
288 /* Stability for the rest */
297 static int latest_time_prioq_compare(const void *a, const void *b) {
298 const sd_event_source *x = a, *y = b;
300 assert(EVENT_SOURCE_IS_TIME(x->type));
301 assert(x->type == y->type);
303 /* Enabled ones first */
304 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
306 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
309 /* Move the pending ones to the end */
310 if (!x->pending && y->pending)
312 if (x->pending && !y->pending)
316 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
318 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
321 /* Stability for the rest */
330 static int exit_prioq_compare(const void *a, const void *b) {
331 const sd_event_source *x = a, *y = b;
333 assert(x->type == SOURCE_EXIT);
334 assert(y->type == SOURCE_EXIT);
336 /* Enabled ones first */
337 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
339 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
342 /* Lower priority values first */
343 if (x->priority < y->priority)
345 if (x->priority > y->priority)
348 /* Stability for the rest */
357 static void free_clock_data(struct clock_data *d) {
361 prioq_free(d->earliest);
362 prioq_free(d->latest);
365 static void event_free(sd_event *e) {
370 while ((s = e->sources)) {
372 source_disconnect(s);
373 sd_event_source_unref(s);
376 assert(e->n_sources == 0);
378 if (e->default_event_ptr)
379 *(e->default_event_ptr) = NULL;
381 safe_close(e->epoll_fd);
382 safe_close(e->signal_fd);
383 safe_close(e->watchdog_fd);
385 free_clock_data(&e->realtime);
386 free_clock_data(&e->boottime);
387 free_clock_data(&e->monotonic);
388 free_clock_data(&e->realtime_alarm);
389 free_clock_data(&e->boottime_alarm);
391 prioq_free(e->pending);
392 prioq_free(e->prepare);
395 free(e->signal_sources);
397 hashmap_free(e->child_sources);
398 set_free(e->post_sources);
402 _public_ int sd_event_new(sd_event** ret) {
406 assert_return(ret, -EINVAL);
408 e = new0(sd_event, 1);
413 e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
414 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
415 e->original_pid = getpid();
416 e->perturb = USEC_INFINITY;
418 assert_se(sigemptyset(&e->sigset) == 0);
420 e->pending = prioq_new(pending_prioq_compare);
426 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
427 if (e->epoll_fd < 0) {
440 _public_ sd_event* sd_event_ref(sd_event *e) {
441 assert_return(e, NULL);
443 assert(e->n_ref >= 1);
449 _public_ sd_event* sd_event_unref(sd_event *e) {
454 assert(e->n_ref >= 1);
463 static bool event_pid_changed(sd_event *e) {
466 /* We don't support people creating am event loop and keeping
467 * it around over a fork(). Let's complain. */
469 return e->original_pid != getpid();
472 static int source_io_unregister(sd_event_source *s) {
476 assert(s->type == SOURCE_IO);
478 if (!s->io.registered)
481 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
485 s->io.registered = false;
489 static int source_io_register(
494 struct epoll_event ev = {};
498 assert(s->type == SOURCE_IO);
499 assert(enabled != SD_EVENT_OFF);
504 if (enabled == SD_EVENT_ONESHOT)
505 ev.events |= EPOLLONESHOT;
507 if (s->io.registered)
508 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
510 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
515 s->io.registered = true;
520 static clockid_t event_source_type_to_clock(EventSourceType t) {
524 case SOURCE_TIME_REALTIME:
525 return CLOCK_REALTIME;
527 case SOURCE_TIME_BOOTTIME:
528 return CLOCK_BOOTTIME;
530 case SOURCE_TIME_MONOTONIC:
531 return CLOCK_MONOTONIC;
533 case SOURCE_TIME_REALTIME_ALARM:
534 return CLOCK_REALTIME_ALARM;
536 case SOURCE_TIME_BOOTTIME_ALARM:
537 return CLOCK_BOOTTIME_ALARM;
540 return (clockid_t) -1;
544 static EventSourceType clock_to_event_source_type(clockid_t clock) {
549 return SOURCE_TIME_REALTIME;
552 return SOURCE_TIME_BOOTTIME;
554 case CLOCK_MONOTONIC:
555 return SOURCE_TIME_MONOTONIC;
557 case CLOCK_REALTIME_ALARM:
558 return SOURCE_TIME_REALTIME_ALARM;
560 case CLOCK_BOOTTIME_ALARM:
561 return SOURCE_TIME_BOOTTIME_ALARM;
564 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
568 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
573 case SOURCE_TIME_REALTIME:
576 case SOURCE_TIME_BOOTTIME:
579 case SOURCE_TIME_MONOTONIC:
580 return &e->monotonic;
582 case SOURCE_TIME_REALTIME_ALARM:
583 return &e->realtime_alarm;
585 case SOURCE_TIME_BOOTTIME_ALARM:
586 return &e->boottime_alarm;
593 static bool need_signal(sd_event *e, int signal) {
594 return (e->signal_sources && e->signal_sources[signal] &&
595 e->signal_sources[signal]->enabled != SD_EVENT_OFF)
597 (signal == SIGCHLD &&
598 e->n_enabled_child_sources > 0);
601 static int event_update_signal_fd(sd_event *e) {
602 struct epoll_event ev = {};
608 add_to_epoll = e->signal_fd < 0;
610 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
620 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
622 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
624 e->signal_fd = safe_close(e->signal_fd);
631 static void source_disconnect(sd_event_source *s) {
639 assert(s->event->n_sources > 0);
645 source_io_unregister(s);
649 case SOURCE_TIME_REALTIME:
650 case SOURCE_TIME_BOOTTIME:
651 case SOURCE_TIME_MONOTONIC:
652 case SOURCE_TIME_REALTIME_ALARM:
653 case SOURCE_TIME_BOOTTIME_ALARM: {
654 struct clock_data *d;
656 d = event_get_clock_data(s->event, s->type);
659 prioq_remove(d->earliest, s, &s->time.earliest_index);
660 prioq_remove(d->latest, s, &s->time.latest_index);
661 d->needs_rearm = true;
666 if (s->signal.sig > 0) {
667 if (s->event->signal_sources)
668 s->event->signal_sources[s->signal.sig] = NULL;
670 /* If the signal was on and now it is off... */
671 if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
672 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
674 (void) event_update_signal_fd(s->event);
675 /* If disabling failed, we might get a spurious event,
676 * but otherwise nothing bad should happen. */
683 if (s->child.pid > 0) {
684 if (s->enabled != SD_EVENT_OFF) {
685 assert(s->event->n_enabled_child_sources > 0);
686 s->event->n_enabled_child_sources--;
688 /* We know the signal was on, if it is off now... */
689 if (!need_signal(s->event, SIGCHLD)) {
690 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
692 (void) event_update_signal_fd(s->event);
693 /* If disabling failed, we might get a spurious event,
694 * but otherwise nothing bad should happen. */
698 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
708 set_remove(s->event->post_sources, s);
712 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
716 assert_not_reached("Wut? I shouldn't exist.");
720 prioq_remove(s->event->pending, s, &s->pending_index);
723 prioq_remove(s->event->prepare, s, &s->prepare_index);
727 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
729 LIST_REMOVE(sources, event->sources, s);
733 sd_event_unref(event);
736 static void source_free(sd_event_source *s) {
739 source_disconnect(s);
744 static int source_set_pending(sd_event_source *s, bool b) {
748 assert(s->type != SOURCE_EXIT);
756 s->pending_iteration = s->event->iteration;
758 r = prioq_put(s->event->pending, s, &s->pending_index);
764 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
766 if (EVENT_SOURCE_IS_TIME(s->type)) {
767 struct clock_data *d;
769 d = event_get_clock_data(s->event, s->type);
772 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
773 prioq_reshuffle(d->latest, s, &s->time.latest_index);
774 d->needs_rearm = true;
780 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
785 s = new0(sd_event_source, 1);
791 s->floating = floating;
793 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
798 LIST_PREPEND(sources, e->sources, s);
804 _public_ int sd_event_add_io(
806 sd_event_source **ret,
809 sd_event_io_handler_t callback,
815 assert_return(e, -EINVAL);
816 assert_return(fd >= 0, -EINVAL);
817 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
818 assert_return(callback, -EINVAL);
819 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
820 assert_return(!event_pid_changed(e), -ECHILD);
822 s = source_new(e, !ret, SOURCE_IO);
827 s->io.events = events;
828 s->io.callback = callback;
829 s->userdata = userdata;
830 s->enabled = SD_EVENT_ON;
832 r = source_io_register(s, s->enabled, events);
844 static void initialize_perturb(sd_event *e) {
845 sd_id128_t bootid = {};
847 /* When we sleep for longer, we try to realign the wakeup to
848 the same time wihtin each minute/second/250ms, so that
849 events all across the system can be coalesced into a single
850 CPU wakeup. However, let's take some system-specific
851 randomness for this value, so that in a network of systems
852 with synced clocks timer events are distributed a
853 bit. Here, we calculate a perturbation usec offset from the
856 if (_likely_(e->perturb != USEC_INFINITY))
859 if (sd_id128_get_boot(&bootid) >= 0)
860 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
863 static int event_setup_timer_fd(
865 struct clock_data *d,
868 struct epoll_event ev = {};
874 if (_likely_(d->fd >= 0))
877 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
882 ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
884 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
894 _public_ int sd_event_add_time(
896 sd_event_source **ret,
900 sd_event_time_handler_t callback,
903 EventSourceType type;
905 struct clock_data *d;
908 assert_return(e, -EINVAL);
909 assert_return(usec != (uint64_t) -1, -EINVAL);
910 assert_return(accuracy != (uint64_t) -1, -EINVAL);
911 assert_return(callback, -EINVAL);
912 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
913 assert_return(!event_pid_changed(e), -ECHILD);
915 type = clock_to_event_source_type(clock);
916 assert_return(type >= 0, -ENOTSUP);
918 d = event_get_clock_data(e, type);
922 d->earliest = prioq_new(earliest_time_prioq_compare);
928 d->latest = prioq_new(latest_time_prioq_compare);
934 r = event_setup_timer_fd(e, d, clock);
939 s = source_new(e, !ret, type);
944 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
945 s->time.callback = callback;
946 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
947 s->userdata = userdata;
948 s->enabled = SD_EVENT_ONESHOT;
950 d->needs_rearm = true;
952 r = prioq_put(d->earliest, s, &s->time.earliest_index);
956 r = prioq_put(d->latest, s, &s->time.latest_index);
970 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
973 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
976 _public_ int sd_event_add_signal(
978 sd_event_source **ret,
980 sd_event_signal_handler_t callback,
988 assert_return(e, -EINVAL);
989 assert_return(sig > 0, -EINVAL);
990 assert_return(sig < _NSIG, -EINVAL);
991 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992 assert_return(!event_pid_changed(e), -ECHILD);
995 callback = signal_exit_callback;
997 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1001 if (!sigismember(&ss, sig))
1004 if (!e->signal_sources) {
1005 e->signal_sources = new0(sd_event_source*, _NSIG);
1006 if (!e->signal_sources)
1008 } else if (e->signal_sources[sig])
1011 previous = need_signal(e, sig);
1013 s = source_new(e, !ret, SOURCE_SIGNAL);
1017 s->signal.sig = sig;
1018 s->signal.callback = callback;
1019 s->userdata = userdata;
1020 s->enabled = SD_EVENT_ON;
1022 e->signal_sources[sig] = s;
1025 assert_se(sigaddset(&e->sigset, sig) == 0);
1027 r = event_update_signal_fd(e);
1040 _public_ int sd_event_add_child(
1042 sd_event_source **ret,
1045 sd_event_child_handler_t callback,
1052 assert_return(e, -EINVAL);
1053 assert_return(pid > 1, -EINVAL);
1054 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1055 assert_return(options != 0, -EINVAL);
1056 assert_return(callback, -EINVAL);
1057 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1058 assert_return(!event_pid_changed(e), -ECHILD);
1060 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1064 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1067 previous = need_signal(e, SIGCHLD);
1069 s = source_new(e, !ret, SOURCE_CHILD);
1074 s->child.options = options;
1075 s->child.callback = callback;
1076 s->userdata = userdata;
1077 s->enabled = SD_EVENT_ONESHOT;
1079 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1085 e->n_enabled_child_sources ++;
1088 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
1090 r = event_update_signal_fd(e);
1097 e->need_process_child = true;
1105 _public_ int sd_event_add_defer(
1107 sd_event_source **ret,
1108 sd_event_handler_t callback,
1114 assert_return(e, -EINVAL);
1115 assert_return(callback, -EINVAL);
1116 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1117 assert_return(!event_pid_changed(e), -ECHILD);
1119 s = source_new(e, !ret, SOURCE_DEFER);
1123 s->defer.callback = callback;
1124 s->userdata = userdata;
1125 s->enabled = SD_EVENT_ONESHOT;
1127 r = source_set_pending(s, true);
1139 _public_ int sd_event_add_post(
1141 sd_event_source **ret,
1142 sd_event_handler_t callback,
1148 assert_return(e, -EINVAL);
1149 assert_return(callback, -EINVAL);
1150 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1151 assert_return(!event_pid_changed(e), -ECHILD);
1153 r = set_ensure_allocated(&e->post_sources, NULL);
1157 s = source_new(e, !ret, SOURCE_POST);
1161 s->post.callback = callback;
1162 s->userdata = userdata;
1163 s->enabled = SD_EVENT_ON;
1165 r = set_put(e->post_sources, s);
1177 _public_ int sd_event_add_exit(
1179 sd_event_source **ret,
1180 sd_event_handler_t callback,
1186 assert_return(e, -EINVAL);
1187 assert_return(callback, -EINVAL);
1188 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1189 assert_return(!event_pid_changed(e), -ECHILD);
1192 e->exit = prioq_new(exit_prioq_compare);
1197 s = source_new(e, !ret, SOURCE_EXIT);
1201 s->exit.callback = callback;
1202 s->userdata = userdata;
1203 s->exit.prioq_index = PRIOQ_IDX_NULL;
1204 s->enabled = SD_EVENT_ONESHOT;
1206 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1218 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1219 assert_return(s, NULL);
1221 assert(s->n_ref >= 1);
1227 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1232 assert(s->n_ref >= 1);
1235 if (s->n_ref <= 0) {
1236 /* Here's a special hack: when we are called from a
1237 * dispatch handler we won't free the event source
1238 * immediately, but we will detach the fd from the
1239 * epoll. This way it is safe for the caller to unref
1240 * the event source and immediately close the fd, but
1241 * we still retain a valid event source object after
1244 if (s->dispatching) {
1245 if (s->type == SOURCE_IO)
1246 source_io_unregister(s);
1248 source_disconnect(s);
1256 _public_ int sd_event_source_set_name(sd_event_source *s, const char *name) {
1257 assert_return(s, -EINVAL);
1259 return free_and_strdup(&s->name, name);
1262 _public_ int sd_event_source_get_name(sd_event_source *s, const char **name) {
1263 assert_return(s, -EINVAL);
1264 assert_return(name, -EINVAL);
1271 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1272 assert_return(s, NULL);
1277 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1278 assert_return(s, -EINVAL);
1279 assert_return(s->type != SOURCE_EXIT, -EDOM);
1280 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1281 assert_return(!event_pid_changed(s->event), -ECHILD);
1286 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1287 assert_return(s, -EINVAL);
1288 assert_return(s->type == SOURCE_IO, -EDOM);
1289 assert_return(!event_pid_changed(s->event), -ECHILD);
1294 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1297 assert_return(s, -EINVAL);
1298 assert_return(fd >= 0, -EINVAL);
1299 assert_return(s->type == SOURCE_IO, -EDOM);
1300 assert_return(!event_pid_changed(s->event), -ECHILD);
1305 if (s->enabled == SD_EVENT_OFF) {
1307 s->io.registered = false;
1311 saved_fd = s->io.fd;
1312 assert(s->io.registered);
1315 s->io.registered = false;
1317 r = source_io_register(s, s->enabled, s->io.events);
1319 s->io.fd = saved_fd;
1320 s->io.registered = true;
1324 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1330 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1331 assert_return(s, -EINVAL);
1332 assert_return(events, -EINVAL);
1333 assert_return(s->type == SOURCE_IO, -EDOM);
1334 assert_return(!event_pid_changed(s->event), -ECHILD);
1336 *events = s->io.events;
1340 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1343 assert_return(s, -EINVAL);
1344 assert_return(s->type == SOURCE_IO, -EDOM);
1345 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1346 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1347 assert_return(!event_pid_changed(s->event), -ECHILD);
1349 /* edge-triggered updates are never skipped, so we can reset edges */
1350 if (s->io.events == events && !(events & EPOLLET))
1353 if (s->enabled != SD_EVENT_OFF) {
1354 r = source_io_register(s, s->enabled, events);
1359 s->io.events = events;
1360 source_set_pending(s, false);
1365 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1366 assert_return(s, -EINVAL);
1367 assert_return(revents, -EINVAL);
1368 assert_return(s->type == SOURCE_IO, -EDOM);
1369 assert_return(s->pending, -ENODATA);
1370 assert_return(!event_pid_changed(s->event), -ECHILD);
1372 *revents = s->io.revents;
1376 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1377 assert_return(s, -EINVAL);
1378 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1379 assert_return(!event_pid_changed(s->event), -ECHILD);
1381 return s->signal.sig;
1384 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1385 assert_return(s, -EINVAL);
1386 assert_return(!event_pid_changed(s->event), -ECHILD);
1391 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1392 assert_return(s, -EINVAL);
1393 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1394 assert_return(!event_pid_changed(s->event), -ECHILD);
1396 if (s->priority == priority)
1399 s->priority = priority;
1402 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1405 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1407 if (s->type == SOURCE_EXIT)
1408 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1413 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1414 assert_return(s, -EINVAL);
1415 assert_return(m, -EINVAL);
1416 assert_return(!event_pid_changed(s->event), -ECHILD);
1422 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1425 assert_return(s, -EINVAL);
1426 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1427 assert_return(!event_pid_changed(s->event), -ECHILD);
1429 /* If we are dead anyway, we are fine with turning off
1430 * sources, but everything else needs to fail. */
1431 if (s->event->state == SD_EVENT_FINISHED)
1432 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1434 if (s->enabled == m)
1437 if (m == SD_EVENT_OFF) {
1442 r = source_io_unregister(s);
1449 case SOURCE_TIME_REALTIME:
1450 case SOURCE_TIME_BOOTTIME:
1451 case SOURCE_TIME_MONOTONIC:
1452 case SOURCE_TIME_REALTIME_ALARM:
1453 case SOURCE_TIME_BOOTTIME_ALARM: {
1454 struct clock_data *d;
1457 d = event_get_clock_data(s->event, s->type);
1460 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1461 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1462 d->needs_rearm = true;
1467 assert(need_signal(s->event, s->signal.sig));
1471 if (!need_signal(s->event, s->signal.sig)) {
1472 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1474 (void) event_update_signal_fd(s->event);
1475 /* If disabling failed, we might get a spurious event,
1476 * but otherwise nothing bad should happen. */
1482 assert(need_signal(s->event, SIGCHLD));
1486 assert(s->event->n_enabled_child_sources > 0);
1487 s->event->n_enabled_child_sources--;
1489 if (!need_signal(s->event, SIGCHLD)) {
1490 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1492 (void) event_update_signal_fd(s->event);
1499 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1508 assert_not_reached("Wut? I shouldn't exist.");
1515 r = source_io_register(s, m, s->io.events);
1522 case SOURCE_TIME_REALTIME:
1523 case SOURCE_TIME_BOOTTIME:
1524 case SOURCE_TIME_MONOTONIC:
1525 case SOURCE_TIME_REALTIME_ALARM:
1526 case SOURCE_TIME_BOOTTIME_ALARM: {
1527 struct clock_data *d;
1530 d = event_get_clock_data(s->event, s->type);
1533 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1534 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1535 d->needs_rearm = true;
1540 /* Check status before enabling. */
1541 if (!need_signal(s->event, s->signal.sig)) {
1542 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1544 r = event_update_signal_fd(s->event);
1546 s->enabled = SD_EVENT_OFF;
1555 /* Check status before enabling. */
1556 if (s->enabled == SD_EVENT_OFF) {
1557 if (!need_signal(s->event, SIGCHLD)) {
1558 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1560 r = event_update_signal_fd(s->event);
1562 s->enabled = SD_EVENT_OFF;
1567 s->event->n_enabled_child_sources++;
1575 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1584 assert_not_reached("Wut? I shouldn't exist.");
1589 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1592 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1597 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1598 assert_return(s, -EINVAL);
1599 assert_return(usec, -EINVAL);
1600 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1601 assert_return(!event_pid_changed(s->event), -ECHILD);
1603 *usec = s->time.next;
1607 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1608 struct clock_data *d;
1610 assert_return(s, -EINVAL);
1611 assert_return(usec != (uint64_t) -1, -EINVAL);
1612 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1613 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1614 assert_return(!event_pid_changed(s->event), -ECHILD);
1616 s->time.next = usec;
1618 source_set_pending(s, false);
1620 d = event_get_clock_data(s->event, s->type);
1623 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1624 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1625 d->needs_rearm = true;
1630 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1631 assert_return(s, -EINVAL);
1632 assert_return(usec, -EINVAL);
1633 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1634 assert_return(!event_pid_changed(s->event), -ECHILD);
1636 *usec = s->time.accuracy;
1640 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1641 struct clock_data *d;
1643 assert_return(s, -EINVAL);
1644 assert_return(usec != (uint64_t) -1, -EINVAL);
1645 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1646 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1647 assert_return(!event_pid_changed(s->event), -ECHILD);
1650 usec = DEFAULT_ACCURACY_USEC;
1652 s->time.accuracy = usec;
1654 source_set_pending(s, false);
1656 d = event_get_clock_data(s->event, s->type);
1659 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1660 d->needs_rearm = true;
1665 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1666 assert_return(s, -EINVAL);
1667 assert_return(clock, -EINVAL);
1668 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1669 assert_return(!event_pid_changed(s->event), -ECHILD);
1671 *clock = event_source_type_to_clock(s->type);
1675 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1676 assert_return(s, -EINVAL);
1677 assert_return(pid, -EINVAL);
1678 assert_return(s->type == SOURCE_CHILD, -EDOM);
1679 assert_return(!event_pid_changed(s->event), -ECHILD);
1681 *pid = s->child.pid;
1685 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1688 assert_return(s, -EINVAL);
1689 assert_return(s->type != SOURCE_EXIT, -EDOM);
1690 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1691 assert_return(!event_pid_changed(s->event), -ECHILD);
1693 if (s->prepare == callback)
1696 if (callback && s->prepare) {
1697 s->prepare = callback;
1701 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1705 s->prepare = callback;
1708 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1712 prioq_remove(s->event->prepare, s, &s->prepare_index);
1717 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1718 assert_return(s, NULL);
1723 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1726 assert_return(s, NULL);
1729 s->userdata = userdata;
1734 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1745 initialize_perturb(e);
1748 Find a good time to wake up again between times a and b. We
1749 have two goals here:
1751 a) We want to wake up as seldom as possible, hence prefer
1752 later times over earlier times.
1754 b) But if we have to wake up, then let's make sure to
1755 dispatch as much as possible on the entire system.
1757 We implement this by waking up everywhere at the same time
1758 within any given minute if we can, synchronised via the
1759 perturbation value determined from the boot ID. If we can't,
1760 then we try to find the same spot in every 10s, then 1s and
1761 then 250ms step. Otherwise, we pick the last possible time
1765 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1767 if (_unlikely_(c < USEC_PER_MINUTE))
1770 c -= USEC_PER_MINUTE;
1776 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1778 if (_unlikely_(c < USEC_PER_SEC*10))
1781 c -= USEC_PER_SEC*10;
1787 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1789 if (_unlikely_(c < USEC_PER_SEC))
1798 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1800 if (_unlikely_(c < USEC_PER_MSEC*250))
1803 c -= USEC_PER_MSEC*250;
1812 static int event_arm_timer(
1814 struct clock_data *d) {
1816 struct itimerspec its = {};
1817 sd_event_source *a, *b;
1824 if (!d->needs_rearm)
1827 d->needs_rearm = false;
1829 a = prioq_peek(d->earliest);
1830 if (!a || a->enabled == SD_EVENT_OFF) {
1835 if (d->next == USEC_INFINITY)
1839 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1843 d->next = USEC_INFINITY;
1847 b = prioq_peek(d->latest);
1848 assert_se(b && b->enabled != SD_EVENT_OFF);
1850 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1854 assert_se(d->fd >= 0);
1857 /* We don' want to disarm here, just mean some time looooong ago. */
1858 its.it_value.tv_sec = 0;
1859 its.it_value.tv_nsec = 1;
1861 timespec_store(&its.it_value, t);
1863 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1871 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1874 assert(s->type == SOURCE_IO);
1876 /* If the event source was already pending, we just OR in the
1877 * new revents, otherwise we reset the value. The ORing is
1878 * necessary to handle EPOLLONESHOT events properly where
1879 * readability might happen independently of writability, and
1880 * we need to keep track of both */
1883 s->io.revents |= revents;
1885 s->io.revents = revents;
1887 return source_set_pending(s, true);
1890 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1897 assert_return(events == EPOLLIN, -EIO);
1899 ss = read(fd, &x, sizeof(x));
1901 if (errno == EAGAIN || errno == EINTR)
1907 if (_unlikely_(ss != sizeof(x)))
1911 *next = USEC_INFINITY;
1916 static int process_timer(
1919 struct clock_data *d) {
1928 s = prioq_peek(d->earliest);
1931 s->enabled == SD_EVENT_OFF ||
1935 r = source_set_pending(s, true);
1939 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1940 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1941 d->needs_rearm = true;
1947 static int process_child(sd_event *e) {
1954 e->need_process_child = false;
1957 So, this is ugly. We iteratively invoke waitid() with P_PID
1958 + WNOHANG for each PID we wait for, instead of using
1959 P_ALL. This is because we only want to get child
1960 information of very specific child processes, and not all
1961 of them. We might not have processed the SIGCHLD even of a
1962 previous invocation and we don't want to maintain a
1963 unbounded *per-child* event queue, hence we really don't
1964 want anything flushed out of the kernel's queue that we
1965 don't care about. Since this is O(n) this means that if you
1966 have a lot of processes you probably want to handle SIGCHLD
1969 We do not reap the children here (by using WNOWAIT), this
1970 is only done after the event source is dispatched so that
1971 the callback still sees the process as a zombie.
1974 HASHMAP_FOREACH(s, e->child_sources, i) {
1975 assert(s->type == SOURCE_CHILD);
1980 if (s->enabled == SD_EVENT_OFF)
1983 zero(s->child.siginfo);
1984 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1985 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1989 if (s->child.siginfo.si_pid != 0) {
1991 s->child.siginfo.si_code == CLD_EXITED ||
1992 s->child.siginfo.si_code == CLD_KILLED ||
1993 s->child.siginfo.si_code == CLD_DUMPED;
1995 if (!zombie && (s->child.options & WEXITED)) {
1996 /* If the child isn't dead then let's
1997 * immediately remove the state change
1998 * from the queue, since there's no
1999 * benefit in leaving it queued */
2001 assert(s->child.options & (WSTOPPED|WCONTINUED));
2002 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2005 r = source_set_pending(s, true);
2014 static int process_signal(sd_event *e, uint32_t events) {
2015 bool read_one = false;
2020 assert_return(events == EPOLLIN, -EIO);
2023 struct signalfd_siginfo si;
2025 sd_event_source *s = NULL;
2027 n = read(e->signal_fd, &si, sizeof(si));
2029 if (errno == EAGAIN || errno == EINTR)
2035 if (_unlikely_(n != sizeof(si)))
2038 assert(si.ssi_signo < _NSIG);
2042 if (si.ssi_signo == SIGCHLD) {
2043 r = process_child(e);
2050 if (e->signal_sources)
2051 s = e->signal_sources[si.ssi_signo];
2056 s->signal.siginfo = si;
2057 r = source_set_pending(s, true);
2063 static int source_dispatch(sd_event_source *s) {
2067 assert(s->pending || s->type == SOURCE_EXIT);
2069 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2070 r = source_set_pending(s, false);
2075 if (s->type != SOURCE_POST) {
2079 /* If we execute a non-post source, let's mark all
2080 * post sources as pending */
2082 SET_FOREACH(z, s->event->post_sources, i) {
2083 if (z->enabled == SD_EVENT_OFF)
2086 r = source_set_pending(z, true);
2092 if (s->enabled == SD_EVENT_ONESHOT) {
2093 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2098 s->dispatching = true;
2103 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2106 case SOURCE_TIME_REALTIME:
2107 case SOURCE_TIME_BOOTTIME:
2108 case SOURCE_TIME_MONOTONIC:
2109 case SOURCE_TIME_REALTIME_ALARM:
2110 case SOURCE_TIME_BOOTTIME_ALARM:
2111 r = s->time.callback(s, s->time.next, s->userdata);
2115 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2118 case SOURCE_CHILD: {
2121 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2122 s->child.siginfo.si_code == CLD_KILLED ||
2123 s->child.siginfo.si_code == CLD_DUMPED;
2125 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2127 /* Now, reap the PID for good. */
2129 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2135 r = s->defer.callback(s, s->userdata);
2139 r = s->post.callback(s, s->userdata);
2143 r = s->exit.callback(s, s->userdata);
2146 case SOURCE_WATCHDOG:
2147 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2148 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2149 assert_not_reached("Wut? I shouldn't exist.");
2152 s->dispatching = false;
2156 log_debug("Event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2158 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
2164 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2169 static int event_prepare(sd_event *e) {
2177 s = prioq_peek(e->prepare);
2178 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2181 s->prepare_iteration = e->iteration;
2182 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2188 s->dispatching = true;
2189 r = s->prepare(s, s->userdata);
2190 s->dispatching = false;
2194 log_debug("Prepare callback of event source '%s' returned error, disabling: %s", s->name, strerror(-r));
2196 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
2202 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2208 static int dispatch_exit(sd_event *e) {
2214 p = prioq_peek(e->exit);
2215 if (!p || p->enabled == SD_EVENT_OFF) {
2216 e->state = SD_EVENT_FINISHED;
2222 e->state = SD_EVENT_EXITING;
2224 r = source_dispatch(p);
2226 e->state = SD_EVENT_PASSIVE;
2232 static sd_event_source* event_next_pending(sd_event *e) {
2237 p = prioq_peek(e->pending);
2241 if (p->enabled == SD_EVENT_OFF)
2247 static int arm_watchdog(sd_event *e) {
2248 struct itimerspec its = {};
2253 assert(e->watchdog_fd >= 0);
2255 t = sleep_between(e,
2256 e->watchdog_last + (e->watchdog_period / 2),
2257 e->watchdog_last + (e->watchdog_period * 3 / 4));
2259 timespec_store(&its.it_value, t);
2261 /* Make sure we never set the watchdog to 0, which tells the
2262 * kernel to disable it. */
2263 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2264 its.it_value.tv_nsec = 1;
2266 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2273 static int process_watchdog(sd_event *e) {
2279 /* Don't notify watchdog too often */
2280 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2283 sd_notify(false, "WATCHDOG=1");
2284 e->watchdog_last = e->timestamp.monotonic;
2286 return arm_watchdog(e);
2289 _public_ int sd_event_prepare(sd_event *e) {
2292 assert_return(e, -EINVAL);
2293 assert_return(!event_pid_changed(e), -ECHILD);
2294 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2295 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2297 if (e->exit_requested)
2302 r = event_prepare(e);
2306 r = event_arm_timer(e, &e->realtime);
2310 r = event_arm_timer(e, &e->boottime);
2314 r = event_arm_timer(e, &e->monotonic);
2318 r = event_arm_timer(e, &e->realtime_alarm);
2322 r = event_arm_timer(e, &e->boottime_alarm);
2326 if (event_next_pending(e) || e->need_process_child)
2329 e->state = SD_EVENT_PREPARED;
2334 e->state = SD_EVENT_PREPARED;
2335 r = sd_event_wait(e, 0);
2337 e->state = SD_EVENT_PREPARED;
2342 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2343 struct epoll_event *ev_queue;
2344 unsigned ev_queue_max;
2347 assert_return(e, -EINVAL);
2348 assert_return(!event_pid_changed(e), -ECHILD);
2349 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2350 assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
2352 if (e->exit_requested) {
2353 e->state = SD_EVENT_PENDING;
2357 ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
2358 ev_queue = newa(struct epoll_event, ev_queue_max);
2360 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2361 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2363 if (errno == EINTR) {
2364 e->state = SD_EVENT_PENDING;
2373 dual_timestamp_get(&e->timestamp);
2374 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2376 for (i = 0; i < m; i++) {
2378 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
2379 r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
2380 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
2381 r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
2382 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
2383 r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
2384 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
2385 r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
2386 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
2387 r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
2388 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2389 r = process_signal(e, ev_queue[i].events);
2390 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2391 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2393 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2399 r = process_watchdog(e);
2403 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2407 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2411 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2415 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2419 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2423 if (e->need_process_child) {
2424 r = process_child(e);
2429 if (event_next_pending(e)) {
2430 e->state = SD_EVENT_PENDING;
2438 e->state = SD_EVENT_PASSIVE;
2443 _public_ int sd_event_dispatch(sd_event *e) {
2447 assert_return(e, -EINVAL);
2448 assert_return(!event_pid_changed(e), -ECHILD);
2449 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2450 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2452 if (e->exit_requested)
2453 return dispatch_exit(e);
2455 p = event_next_pending(e);
2459 e->state = SD_EVENT_RUNNING;
2460 r = source_dispatch(p);
2461 e->state = SD_EVENT_PASSIVE;
2468 e->state = SD_EVENT_PASSIVE;
2473 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2476 assert_return(e, -EINVAL);
2477 assert_return(!event_pid_changed(e), -ECHILD);
2478 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2479 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2481 r = sd_event_prepare(e);
2483 return sd_event_dispatch(e);
2487 r = sd_event_wait(e, timeout);
2489 return sd_event_dispatch(e);
2494 _public_ int sd_event_loop(sd_event *e) {
2497 assert_return(e, -EINVAL);
2498 assert_return(!event_pid_changed(e), -ECHILD);
2499 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2503 while (e->state != SD_EVENT_FINISHED) {
2504 r = sd_event_run(e, (uint64_t) -1);
2516 _public_ int sd_event_get_fd(sd_event *e) {
2518 assert_return(e, -EINVAL);
2519 assert_return(!event_pid_changed(e), -ECHILD);
2524 _public_ int sd_event_get_state(sd_event *e) {
2525 assert_return(e, -EINVAL);
2526 assert_return(!event_pid_changed(e), -ECHILD);
2531 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2532 assert_return(e, -EINVAL);
2533 assert_return(code, -EINVAL);
2534 assert_return(!event_pid_changed(e), -ECHILD);
2536 if (!e->exit_requested)
2539 *code = e->exit_code;
2543 _public_ int sd_event_exit(sd_event *e, int code) {
2544 assert_return(e, -EINVAL);
2545 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2546 assert_return(!event_pid_changed(e), -ECHILD);
2548 e->exit_requested = true;
2549 e->exit_code = code;
2554 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2555 assert_return(e, -EINVAL);
2556 assert_return(usec, -EINVAL);
2557 assert_return(!event_pid_changed(e), -ECHILD);
2559 /* If we haven't run yet, just get the actual time */
2560 if (!dual_timestamp_is_set(&e->timestamp))
2565 case CLOCK_REALTIME:
2566 case CLOCK_REALTIME_ALARM:
2567 *usec = e->timestamp.realtime;
2570 case CLOCK_MONOTONIC:
2571 *usec = e->timestamp.monotonic;
2574 case CLOCK_BOOTTIME:
2575 case CLOCK_BOOTTIME_ALARM:
2576 *usec = e->timestamp_boottime;
2583 _public_ int sd_event_default(sd_event **ret) {
2585 static thread_local sd_event *default_event = NULL;
2590 return !!default_event;
2592 if (default_event) {
2593 *ret = sd_event_ref(default_event);
2597 r = sd_event_new(&e);
2601 e->default_event_ptr = &default_event;
2609 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2610 assert_return(e, -EINVAL);
2611 assert_return(tid, -EINVAL);
2612 assert_return(!event_pid_changed(e), -ECHILD);
2622 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2625 assert_return(e, -EINVAL);
2626 assert_return(!event_pid_changed(e), -ECHILD);
2628 if (e->watchdog == !!b)
2632 struct epoll_event ev = {};
2634 r = sd_watchdog_enabled(false, &e->watchdog_period);
2638 /* Issue first ping immediately */
2639 sd_notify(false, "WATCHDOG=1");
2640 e->watchdog_last = now(CLOCK_MONOTONIC);
2642 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2643 if (e->watchdog_fd < 0)
2646 r = arm_watchdog(e);
2650 ev.events = EPOLLIN;
2651 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2653 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2660 if (e->watchdog_fd >= 0) {
2661 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2662 e->watchdog_fd = safe_close(e->watchdog_fd);
2670 e->watchdog_fd = safe_close(e->watchdog_fd);
2674 _public_ int sd_event_get_watchdog(sd_event *e) {
2675 assert_return(e, -EINVAL);
2676 assert_return(!event_pid_changed(e), -ECHILD);