1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
28 #include "sd-daemon.h"
33 #include "time-util.h"
38 #define EPOLL_QUEUE_MAX 512U
39 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41 typedef enum EventSourceType {
52 struct sd_event_source {
57 sd_event_handler_t prepare;
59 EventSourceType type:4;
65 unsigned pending_index;
66 unsigned prepare_index;
67 unsigned pending_iteration;
68 unsigned prepare_iteration;
72 sd_event_io_handler_t callback;
79 sd_event_time_handler_t callback;
80 usec_t next, accuracy;
81 unsigned earliest_index;
82 unsigned latest_index;
85 sd_event_signal_handler_t callback;
86 struct signalfd_siginfo siginfo;
90 sd_event_child_handler_t callback;
96 sd_event_handler_t callback;
99 sd_event_handler_t callback;
100 unsigned prioq_index;
117 /* For both clocks we maintain two priority queues each, one
118 * ordered for the earliest times the events may be
119 * dispatched, and one ordered by the latest times they must
120 * have been dispatched. The range between the top entries in
121 * the two prioqs is the time window we can freely schedule
123 Prioq *monotonic_earliest;
124 Prioq *monotonic_latest;
125 Prioq *realtime_earliest;
126 Prioq *realtime_latest;
128 usec_t realtime_next, monotonic_next;
132 sd_event_source **signal_sources;
134 Hashmap *child_sources;
135 unsigned n_enabled_child_sources;
142 dual_timestamp timestamp;
145 bool exit_requested:1;
146 bool need_process_child:1;
152 sd_event **default_event_ptr;
154 usec_t watchdog_last, watchdog_period;
159 static int pending_prioq_compare(const void *a, const void *b) {
160 const sd_event_source *x = a, *y = b;
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
171 /* Lower priority values first */
172 if (x->priority < y->priority)
174 if (x->priority > y->priority)
177 /* Older entries first */
178 if (x->pending_iteration < y->pending_iteration)
180 if (x->pending_iteration > y->pending_iteration)
183 /* Stability for the rest */
192 static int prepare_prioq_compare(const void *a, const void *b) {
193 const sd_event_source *x = a, *y = b;
198 /* Move most recently prepared ones last, so that we can stop
199 * preparing as soon as we hit one that has already been
200 * prepared in the current iteration */
201 if (x->prepare_iteration < y->prepare_iteration)
203 if (x->prepare_iteration > y->prepare_iteration)
206 /* Enabled ones first */
207 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
209 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
212 /* Lower priority values first */
213 if (x->priority < y->priority)
215 if (x->priority > y->priority)
218 /* Stability for the rest */
227 static int earliest_time_prioq_compare(const void *a, const void *b) {
228 const sd_event_source *x = a, *y = b;
230 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
231 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
233 /* Enabled ones first */
234 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
236 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
239 /* Move the pending ones to the end */
240 if (!x->pending && y->pending)
242 if (x->pending && !y->pending)
246 if (x->time.next < y->time.next)
248 if (x->time.next > y->time.next)
251 /* Stability for the rest */
260 static int latest_time_prioq_compare(const void *a, const void *b) {
261 const sd_event_source *x = a, *y = b;
263 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
264 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
266 /* Enabled ones first */
267 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
269 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
272 /* Move the pending ones to the end */
273 if (!x->pending && y->pending)
275 if (x->pending && !y->pending)
279 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
281 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
284 /* Stability for the rest */
293 static int exit_prioq_compare(const void *a, const void *b) {
294 const sd_event_source *x = a, *y = b;
296 assert(x->type == SOURCE_EXIT);
297 assert(y->type == SOURCE_EXIT);
299 /* Enabled ones first */
300 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
302 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
305 /* Lower priority values first */
306 if (x->priority < y->priority)
308 if (x->priority > y->priority)
311 /* Stability for the rest */
320 static void event_free(sd_event *e) {
322 assert(e->n_sources == 0);
324 if (e->default_event_ptr)
325 *(e->default_event_ptr) = NULL;
327 if (e->epoll_fd >= 0)
328 close_nointr_nofail(e->epoll_fd);
330 if (e->signal_fd >= 0)
331 close_nointr_nofail(e->signal_fd);
333 if (e->realtime_fd >= 0)
334 close_nointr_nofail(e->realtime_fd);
336 if (e->monotonic_fd >= 0)
337 close_nointr_nofail(e->monotonic_fd);
339 if (e->watchdog_fd >= 0)
340 close_nointr_nofail(e->watchdog_fd);
342 prioq_free(e->pending);
343 prioq_free(e->prepare);
344 prioq_free(e->monotonic_earliest);
345 prioq_free(e->monotonic_latest);
346 prioq_free(e->realtime_earliest);
347 prioq_free(e->realtime_latest);
350 free(e->signal_sources);
352 hashmap_free(e->child_sources);
356 _public_ int sd_event_new(sd_event** ret) {
360 assert_return(ret, -EINVAL);
362 e = new0(sd_event, 1);
367 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
368 e->realtime_next = e->monotonic_next = (usec_t) -1;
369 e->original_pid = getpid();
371 assert_se(sigemptyset(&e->sigset) == 0);
373 e->pending = prioq_new(pending_prioq_compare);
379 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
380 if (e->epoll_fd < 0) {
393 _public_ sd_event* sd_event_ref(sd_event *e) {
394 assert_return(e, NULL);
396 assert(e->n_ref >= 1);
402 _public_ sd_event* sd_event_unref(sd_event *e) {
407 assert(e->n_ref >= 1);
416 static bool event_pid_changed(sd_event *e) {
419 /* We don't support people creating am event loop and keeping
420 * it around over a fork(). Let's complain. */
422 return e->original_pid != getpid();
425 static int source_io_unregister(sd_event_source *s) {
429 assert(s->type == SOURCE_IO);
431 if (!s->io.registered)
434 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
438 s->io.registered = false;
442 static int source_io_register(
447 struct epoll_event ev = {};
451 assert(s->type == SOURCE_IO);
452 assert(enabled != SD_EVENT_OFF);
457 if (enabled == SD_EVENT_ONESHOT)
458 ev.events |= EPOLLONESHOT;
460 if (s->io.registered)
461 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
463 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
468 s->io.registered = true;
473 static void source_free(sd_event_source *s) {
477 assert(s->event->n_sources > 0);
483 source_io_unregister(s);
487 case SOURCE_MONOTONIC:
488 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
489 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
492 case SOURCE_REALTIME:
493 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
494 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
498 if (s->signal.sig > 0) {
499 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
500 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
502 if (s->event->signal_sources)
503 s->event->signal_sources[s->signal.sig] = NULL;
509 if (s->child.pid > 0) {
510 if (s->enabled != SD_EVENT_OFF) {
511 assert(s->event->n_enabled_child_sources > 0);
512 s->event->n_enabled_child_sources--;
515 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
516 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
518 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
528 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
531 case SOURCE_WATCHDOG:
532 assert_not_reached("Wut? I shouldn't exist.");
536 prioq_remove(s->event->pending, s, &s->pending_index);
539 prioq_remove(s->event->prepare, s, &s->prepare_index);
541 s->event->n_sources--;
542 sd_event_unref(s->event);
548 static int source_set_pending(sd_event_source *s, bool b) {
552 assert(s->type != SOURCE_EXIT);
560 s->pending_iteration = s->event->iteration;
562 r = prioq_put(s->event->pending, s, &s->pending_index);
568 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
570 if (s->type == SOURCE_REALTIME) {
571 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
572 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
573 } else if (s->type == SOURCE_MONOTONIC) {
574 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
575 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
581 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
586 s = new0(sd_event_source, 1);
591 s->event = sd_event_ref(e);
593 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
600 _public_ int sd_event_add_io(
604 sd_event_io_handler_t callback,
606 sd_event_source **ret) {
611 assert_return(e, -EINVAL);
612 assert_return(fd >= 0, -EINVAL);
613 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
614 assert_return(callback, -EINVAL);
615 assert_return(ret, -EINVAL);
616 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
617 assert_return(!event_pid_changed(e), -ECHILD);
619 s = source_new(e, SOURCE_IO);
624 s->io.events = events;
625 s->io.callback = callback;
626 s->userdata = userdata;
627 s->enabled = SD_EVENT_ON;
629 r = source_io_register(s, s->enabled, events);
639 static int event_setup_timer_fd(
641 EventSourceType type,
645 struct epoll_event ev = {};
652 if (_likely_(*timer_fd >= 0))
655 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
660 ev.data.ptr = INT_TO_PTR(type);
662 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
664 close_nointr_nofail(fd);
668 /* When we sleep for longer, we try to realign the wakeup to
669 the same time wihtin each minute/second/250ms, so that
670 events all across the system can be coalesced into a single
671 CPU wakeup. However, let's take some system-specific
672 randomness for this value, so that in a network of systems
673 with synced clocks timer events are distributed a
674 bit. Here, we calculate a perturbation usec offset from the
677 if (sd_id128_get_boot(&bootid) >= 0)
678 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
684 static int event_add_time_internal(
686 EventSourceType type,
693 sd_event_time_handler_t callback,
695 sd_event_source **ret) {
700 assert_return(e, -EINVAL);
701 assert_return(callback, -EINVAL);
702 assert_return(ret, -EINVAL);
703 assert_return(usec != (uint64_t) -1, -EINVAL);
704 assert_return(accuracy != (uint64_t) -1, -EINVAL);
705 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
706 assert_return(!event_pid_changed(e), -ECHILD);
713 *earliest = prioq_new(earliest_time_prioq_compare);
719 *latest = prioq_new(latest_time_prioq_compare);
725 r = event_setup_timer_fd(e, type, timer_fd, id);
730 s = source_new(e, type);
735 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
736 s->time.callback = callback;
737 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
738 s->userdata = userdata;
739 s->enabled = SD_EVENT_ONESHOT;
741 r = prioq_put(*earliest, s, &s->time.earliest_index);
745 r = prioq_put(*latest, s, &s->time.latest_index);
757 _public_ int sd_event_add_monotonic(sd_event *e,
760 sd_event_time_handler_t callback,
762 sd_event_source **ret) {
764 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
767 _public_ int sd_event_add_realtime(sd_event *e,
770 sd_event_time_handler_t callback,
772 sd_event_source **ret) {
774 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
777 static int event_update_signal_fd(sd_event *e) {
778 struct epoll_event ev = {};
784 add_to_epoll = e->signal_fd < 0;
786 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
796 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
798 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
800 close_nointr_nofail(e->signal_fd);
809 _public_ int sd_event_add_signal(
812 sd_event_signal_handler_t callback,
814 sd_event_source **ret) {
820 assert_return(e, -EINVAL);
821 assert_return(sig > 0, -EINVAL);
822 assert_return(sig < _NSIG, -EINVAL);
823 assert_return(callback, -EINVAL);
824 assert_return(ret, -EINVAL);
825 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
826 assert_return(!event_pid_changed(e), -ECHILD);
828 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
832 if (!sigismember(&ss, sig))
835 if (!e->signal_sources) {
836 e->signal_sources = new0(sd_event_source*, _NSIG);
837 if (!e->signal_sources)
839 } else if (e->signal_sources[sig])
842 s = source_new(e, SOURCE_SIGNAL);
847 s->signal.callback = callback;
848 s->userdata = userdata;
849 s->enabled = SD_EVENT_ON;
851 e->signal_sources[sig] = s;
852 assert_se(sigaddset(&e->sigset, sig) == 0);
854 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
855 r = event_update_signal_fd(e);
866 _public_ int sd_event_add_child(
870 sd_event_child_handler_t callback,
872 sd_event_source **ret) {
877 assert_return(e, -EINVAL);
878 assert_return(pid > 1, -EINVAL);
879 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
880 assert_return(options != 0, -EINVAL);
881 assert_return(callback, -EINVAL);
882 assert_return(ret, -EINVAL);
883 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
884 assert_return(!event_pid_changed(e), -ECHILD);
886 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
890 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
893 s = source_new(e, SOURCE_CHILD);
898 s->child.options = options;
899 s->child.callback = callback;
900 s->userdata = userdata;
901 s->enabled = SD_EVENT_ONESHOT;
903 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
909 e->n_enabled_child_sources ++;
911 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
913 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
914 r = event_update_signal_fd(e);
921 e->need_process_child = true;
927 _public_ int sd_event_add_defer(
929 sd_event_handler_t callback,
931 sd_event_source **ret) {
936 assert_return(e, -EINVAL);
937 assert_return(callback, -EINVAL);
938 assert_return(ret, -EINVAL);
939 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
940 assert_return(!event_pid_changed(e), -ECHILD);
942 s = source_new(e, SOURCE_DEFER);
946 s->defer.callback = callback;
947 s->userdata = userdata;
948 s->enabled = SD_EVENT_ONESHOT;
950 r = source_set_pending(s, true);
960 _public_ int sd_event_add_exit(
962 sd_event_handler_t callback,
964 sd_event_source **ret) {
969 assert_return(e, -EINVAL);
970 assert_return(callback, -EINVAL);
971 assert_return(ret, -EINVAL);
972 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
973 assert_return(!event_pid_changed(e), -ECHILD);
976 e->exit = prioq_new(exit_prioq_compare);
981 s = source_new(e, SOURCE_EXIT);
985 s->exit.callback = callback;
986 s->userdata = userdata;
987 s->exit.prioq_index = PRIOQ_IDX_NULL;
988 s->enabled = SD_EVENT_ONESHOT;
990 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1000 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1001 assert_return(s, NULL);
1003 assert(s->n_ref >= 1);
1009 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1014 assert(s->n_ref >= 1);
1017 if (s->n_ref <= 0) {
1018 /* Here's a special hack: when we are called from a
1019 * dispatch handler we won't free the event source
1020 * immediately, but we will detach the fd from the
1021 * epoll. This way it is safe for the caller to unref
1022 * the event source and immediately close the fd, but
1023 * we still retain a valid event source object after
1026 if (s->dispatching) {
1027 if (s->type == SOURCE_IO)
1028 source_io_unregister(s);
1036 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1037 assert_return(s, NULL);
1042 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1043 assert_return(s, -EINVAL);
1044 assert_return(s->type != SOURCE_EXIT, -EDOM);
1045 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1046 assert_return(!event_pid_changed(s->event), -ECHILD);
1051 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1052 assert_return(s, -EINVAL);
1053 assert_return(s->type == SOURCE_IO, -EDOM);
1054 assert_return(!event_pid_changed(s->event), -ECHILD);
1059 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1062 assert_return(s, -EINVAL);
1063 assert_return(fd >= 0, -EINVAL);
1064 assert_return(s->type == SOURCE_IO, -EDOM);
1065 assert_return(!event_pid_changed(s->event), -ECHILD);
1070 if (s->enabled == SD_EVENT_OFF) {
1072 s->io.registered = false;
1076 saved_fd = s->io.fd;
1077 assert(s->io.registered);
1080 s->io.registered = false;
1082 r = source_io_register(s, s->enabled, s->io.events);
1084 s->io.fd = saved_fd;
1085 s->io.registered = true;
1089 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1095 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1096 assert_return(s, -EINVAL);
1097 assert_return(events, -EINVAL);
1098 assert_return(s->type == SOURCE_IO, -EDOM);
1099 assert_return(!event_pid_changed(s->event), -ECHILD);
1101 *events = s->io.events;
1105 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1108 assert_return(s, -EINVAL);
1109 assert_return(s->type == SOURCE_IO, -EDOM);
1110 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1111 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1112 assert_return(!event_pid_changed(s->event), -ECHILD);
1114 if (s->io.events == events)
1117 if (s->enabled != SD_EVENT_OFF) {
1118 r = source_io_register(s, s->enabled, events);
1123 s->io.events = events;
1124 source_set_pending(s, false);
1129 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1130 assert_return(s, -EINVAL);
1131 assert_return(revents, -EINVAL);
1132 assert_return(s->type == SOURCE_IO, -EDOM);
1133 assert_return(s->pending, -ENODATA);
1134 assert_return(!event_pid_changed(s->event), -ECHILD);
1136 *revents = s->io.revents;
1140 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1141 assert_return(s, -EINVAL);
1142 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1143 assert_return(!event_pid_changed(s->event), -ECHILD);
1145 return s->signal.sig;
1148 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1149 assert_return(s, -EINVAL);
1150 assert_return(!event_pid_changed(s->event), -ECHILD);
1155 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1156 assert_return(s, -EINVAL);
1157 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1158 assert_return(!event_pid_changed(s->event), -ECHILD);
1160 if (s->priority == priority)
1163 s->priority = priority;
1166 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1169 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1171 if (s->type == SOURCE_EXIT)
1172 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1177 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1178 assert_return(s, -EINVAL);
1179 assert_return(m, -EINVAL);
1180 assert_return(!event_pid_changed(s->event), -ECHILD);
1186 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1189 assert_return(s, -EINVAL);
1190 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1191 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1192 assert_return(!event_pid_changed(s->event), -ECHILD);
1194 if (s->enabled == m)
1197 if (m == SD_EVENT_OFF) {
1202 r = source_io_unregister(s);
1209 case SOURCE_MONOTONIC:
1211 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1212 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1215 case SOURCE_REALTIME:
1217 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1218 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1223 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1224 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1225 event_update_signal_fd(s->event);
1233 assert(s->event->n_enabled_child_sources > 0);
1234 s->event->n_enabled_child_sources--;
1236 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1237 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1238 event_update_signal_fd(s->event);
1245 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1252 case SOURCE_WATCHDOG:
1253 assert_not_reached("Wut? I shouldn't exist.");
1260 r = source_io_register(s, m, s->io.events);
1267 case SOURCE_MONOTONIC:
1269 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1270 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1273 case SOURCE_REALTIME:
1275 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1276 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1282 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1283 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1284 event_update_signal_fd(s->event);
1291 if (s->enabled == SD_EVENT_OFF) {
1292 s->event->n_enabled_child_sources++;
1294 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1295 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1296 event_update_signal_fd(s->event);
1303 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1310 case SOURCE_WATCHDOG:
1311 assert_not_reached("Wut? I shouldn't exist.");
1316 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1319 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1324 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1325 assert_return(s, -EINVAL);
1326 assert_return(usec, -EINVAL);
1327 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1328 assert_return(!event_pid_changed(s->event), -ECHILD);
1330 *usec = s->time.next;
1334 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1335 assert_return(s, -EINVAL);
1336 assert_return(usec != (uint64_t) -1, -EINVAL);
1337 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1338 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1339 assert_return(!event_pid_changed(s->event), -ECHILD);
1341 s->time.next = usec;
1343 source_set_pending(s, false);
1345 if (s->type == SOURCE_REALTIME) {
1346 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1347 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1349 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1350 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1356 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1357 assert_return(s, -EINVAL);
1358 assert_return(usec, -EINVAL);
1359 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1360 assert_return(!event_pid_changed(s->event), -ECHILD);
1362 *usec = s->time.accuracy;
1366 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1367 assert_return(s, -EINVAL);
1368 assert_return(usec != (uint64_t) -1, -EINVAL);
1369 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1370 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1371 assert_return(!event_pid_changed(s->event), -ECHILD);
1374 usec = DEFAULT_ACCURACY_USEC;
1376 s->time.accuracy = usec;
1378 source_set_pending(s, false);
1380 if (s->type == SOURCE_REALTIME)
1381 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1383 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1388 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1389 assert_return(s, -EINVAL);
1390 assert_return(pid, -EINVAL);
1391 assert_return(s->type == SOURCE_CHILD, -EDOM);
1392 assert_return(!event_pid_changed(s->event), -ECHILD);
1394 *pid = s->child.pid;
1398 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1401 assert_return(s, -EINVAL);
1402 assert_return(s->type != SOURCE_EXIT, -EDOM);
1403 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1404 assert_return(!event_pid_changed(s->event), -ECHILD);
1406 if (s->prepare == callback)
1409 if (callback && s->prepare) {
1410 s->prepare = callback;
1414 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1418 s->prepare = callback;
1421 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1425 prioq_remove(s->event->prepare, s, &s->prepare_index);
1430 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1431 assert_return(s, NULL);
1436 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1439 assert_return(s, NULL);
1442 s->userdata = userdata;
1447 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1459 Find a good time to wake up again between times a and b. We
1460 have two goals here:
1462 a) We want to wake up as seldom as possible, hence prefer
1463 later times over earlier times.
1465 b) But if we have to wake up, then let's make sure to
1466 dispatch as much as possible on the entire system.
1468 We implement this by waking up everywhere at the same time
1469 within any given minute if we can, synchronised via the
1470 perturbation value determined from the boot ID. If we can't,
1471 then we try to find the same spot in every 10s, then 1s and
1472 then 250ms step. Otherwise, we pick the last possible time
1476 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1478 if (_unlikely_(c < USEC_PER_MINUTE))
1481 c -= USEC_PER_MINUTE;
1487 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1489 if (_unlikely_(c < USEC_PER_SEC*10))
1492 c -= USEC_PER_SEC*10;
1498 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1500 if (_unlikely_(c < USEC_PER_SEC))
1509 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1511 if (_unlikely_(c < USEC_PER_MSEC*250))
1514 c -= USEC_PER_MSEC*250;
1523 static int event_arm_timer(
1530 struct itimerspec its = {};
1531 sd_event_source *a, *b;
1538 a = prioq_peek(earliest);
1539 if (!a || a->enabled == SD_EVENT_OFF) {
1544 if (*next == (usec_t) -1)
1548 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1552 *next = (usec_t) -1;
1557 b = prioq_peek(latest);
1558 assert_se(b && b->enabled != SD_EVENT_OFF);
1560 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1564 assert_se(timer_fd >= 0);
1567 /* We don' want to disarm here, just mean some time looooong ago. */
1568 its.it_value.tv_sec = 0;
1569 its.it_value.tv_nsec = 1;
1571 timespec_store(&its.it_value, t);
1573 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1581 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1584 assert(s->type == SOURCE_IO);
1586 /* If the event source was already pending, we just OR in the
1587 * new revents, otherwise we reset the value. The ORing is
1588 * necessary to handle EPOLLONESHOT events properly where
1589 * readability might happen independently of writability, and
1590 * we need to keep track of both */
1593 s->io.revents |= revents;
1595 s->io.revents = revents;
1597 return source_set_pending(s, true);
1600 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1607 assert_return(events == EPOLLIN, -EIO);
1609 ss = read(fd, &x, sizeof(x));
1611 if (errno == EAGAIN || errno == EINTR)
1617 if (_unlikely_(ss != sizeof(x)))
1621 *next = (usec_t) -1;
1626 static int process_timer(
1638 s = prioq_peek(earliest);
1641 s->enabled == SD_EVENT_OFF ||
1645 r = source_set_pending(s, true);
1649 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1650 prioq_reshuffle(latest, s, &s->time.latest_index);
1656 static int process_child(sd_event *e) {
1663 e->need_process_child = false;
1666 So, this is ugly. We iteratively invoke waitid() with P_PID
1667 + WNOHANG for each PID we wait for, instead of using
1668 P_ALL. This is because we only want to get child
1669 information of very specific child processes, and not all
1670 of them. We might not have processed the SIGCHLD even of a
1671 previous invocation and we don't want to maintain a
1672 unbounded *per-child* event queue, hence we really don't
1673 want anything flushed out of the kernel's queue that we
1674 don't care about. Since this is O(n) this means that if you
1675 have a lot of processes you probably want to handle SIGCHLD
1678 We do not reap the children here (by using WNOWAIT), this
1679 is only done after the event source is dispatched so that
1680 the callback still sees the process as a zombie.
1683 HASHMAP_FOREACH(s, e->child_sources, i) {
1684 assert(s->type == SOURCE_CHILD);
1689 if (s->enabled == SD_EVENT_OFF)
1692 zero(s->child.siginfo);
1693 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1694 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1698 if (s->child.siginfo.si_pid != 0) {
1700 s->child.siginfo.si_code == CLD_EXITED ||
1701 s->child.siginfo.si_code == CLD_KILLED ||
1702 s->child.siginfo.si_code == CLD_DUMPED;
1704 if (!zombie && (s->child.options & WEXITED)) {
1705 /* If the child isn't dead then let's
1706 * immediately remove the state change
1707 * from the queue, since there's no
1708 * benefit in leaving it queued */
1710 assert(s->child.options & (WSTOPPED|WCONTINUED));
1711 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1714 r = source_set_pending(s, true);
1723 static int process_signal(sd_event *e, uint32_t events) {
1724 bool read_one = false;
1728 assert(e->signal_sources);
1730 assert_return(events == EPOLLIN, -EIO);
1733 struct signalfd_siginfo si;
1737 ss = read(e->signal_fd, &si, sizeof(si));
1739 if (errno == EAGAIN || errno == EINTR)
1745 if (_unlikely_(ss != sizeof(si)))
1750 s = e->signal_sources[si.ssi_signo];
1751 if (si.ssi_signo == SIGCHLD) {
1752 r = process_child(e);
1761 s->signal.siginfo = si;
1762 r = source_set_pending(s, true);
1770 static int source_dispatch(sd_event_source *s) {
1774 assert(s->pending || s->type == SOURCE_EXIT);
1776 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1777 r = source_set_pending(s, false);
1782 if (s->enabled == SD_EVENT_ONESHOT) {
1783 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1788 s->dispatching = true;
1793 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1796 case SOURCE_MONOTONIC:
1797 r = s->time.callback(s, s->time.next, s->userdata);
1800 case SOURCE_REALTIME:
1801 r = s->time.callback(s, s->time.next, s->userdata);
1805 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1808 case SOURCE_CHILD: {
1811 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1812 s->child.siginfo.si_code == CLD_KILLED ||
1813 s->child.siginfo.si_code == CLD_DUMPED;
1815 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1817 /* Now, reap the PID for good. */
1819 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1825 r = s->defer.callback(s, s->userdata);
1829 r = s->exit.callback(s, s->userdata);
1832 case SOURCE_WATCHDOG:
1833 assert_not_reached("Wut? I shouldn't exist.");
1836 s->dispatching = false;
1839 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1844 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1849 static int event_prepare(sd_event *e) {
1857 s = prioq_peek(e->prepare);
1858 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1861 s->prepare_iteration = e->iteration;
1862 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1868 s->dispatching = true;
1869 r = s->prepare(s, s->userdata);
1870 s->dispatching = false;
1873 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
1878 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1884 static int dispatch_exit(sd_event *e) {
1890 p = prioq_peek(e->exit);
1891 if (!p || p->enabled == SD_EVENT_OFF) {
1892 e->state = SD_EVENT_FINISHED;
1898 e->state = SD_EVENT_EXITING;
1900 r = source_dispatch(p);
1902 e->state = SD_EVENT_PASSIVE;
1908 static sd_event_source* event_next_pending(sd_event *e) {
1913 p = prioq_peek(e->pending);
1917 if (p->enabled == SD_EVENT_OFF)
1923 static int arm_watchdog(sd_event *e) {
1924 struct itimerspec its = {};
1929 assert(e->watchdog_fd >= 0);
1931 t = sleep_between(e,
1932 e->watchdog_last + (e->watchdog_period / 2),
1933 e->watchdog_last + (e->watchdog_period * 3 / 4));
1935 timespec_store(&its.it_value, t);
1937 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1944 static int process_watchdog(sd_event *e) {
1950 /* Don't notify watchdog too often */
1951 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1954 sd_notify(false, "WATCHDOG=1");
1955 e->watchdog_last = e->timestamp.monotonic;
1957 return arm_watchdog(e);
1960 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1961 struct epoll_event *ev_queue;
1962 unsigned ev_queue_max;
1966 assert_return(e, -EINVAL);
1967 assert_return(!event_pid_changed(e), -ECHILD);
1968 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1969 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1971 if (e->exit_requested)
1972 return dispatch_exit(e);
1976 e->state = SD_EVENT_RUNNING;
1978 r = event_prepare(e);
1982 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1986 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1990 if (event_next_pending(e) || e->need_process_child)
1992 ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
1993 ev_queue = newa(struct epoll_event, ev_queue_max);
1995 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
1996 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1998 r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
2002 dual_timestamp_get(&e->timestamp);
2004 for (i = 0; i < m; i++) {
2006 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
2007 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
2008 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
2009 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
2010 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
2011 r = process_signal(e, ev_queue[i].events);
2012 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2013 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2015 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2021 r = process_watchdog(e);
2025 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
2029 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
2033 if (e->need_process_child) {
2034 r = process_child(e);
2039 p = event_next_pending(e);
2045 r = source_dispatch(p);
2048 e->state = SD_EVENT_PASSIVE;
2054 _public_ int sd_event_loop(sd_event *e) {
2057 assert_return(e, -EINVAL);
2058 assert_return(!event_pid_changed(e), -ECHILD);
2059 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2063 while (e->state != SD_EVENT_FINISHED) {
2064 r = sd_event_run(e, (uint64_t) -1);
2076 _public_ int sd_event_get_state(sd_event *e) {
2077 assert_return(e, -EINVAL);
2078 assert_return(!event_pid_changed(e), -ECHILD);
2083 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2084 assert_return(e, -EINVAL);
2085 assert_return(code, -EINVAL);
2086 assert_return(!event_pid_changed(e), -ECHILD);
2088 if (!e->exit_requested)
2091 *code = e->exit_code;
2095 _public_ int sd_event_exit(sd_event *e, int code) {
2096 assert_return(e, -EINVAL);
2097 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2098 assert_return(!event_pid_changed(e), -ECHILD);
2100 e->exit_requested = true;
2101 e->exit_code = code;
2106 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
2107 assert_return(e, -EINVAL);
2108 assert_return(usec, -EINVAL);
2109 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2110 assert_return(!event_pid_changed(e), -ECHILD);
2112 *usec = e->timestamp.realtime;
2116 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
2117 assert_return(e, -EINVAL);
2118 assert_return(usec, -EINVAL);
2119 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2120 assert_return(!event_pid_changed(e), -ECHILD);
2122 *usec = e->timestamp.monotonic;
2126 _public_ int sd_event_default(sd_event **ret) {
2128 static thread_local sd_event *default_event = NULL;
2133 return !!default_event;
2135 if (default_event) {
2136 *ret = sd_event_ref(default_event);
2140 r = sd_event_new(&e);
2144 e->default_event_ptr = &default_event;
2152 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2153 assert_return(e, -EINVAL);
2154 assert_return(tid, -EINVAL);
2155 assert_return(!event_pid_changed(e), -ECHILD);
2165 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2168 assert_return(e, -EINVAL);
2169 assert_return(!event_pid_changed(e), -ECHILD);
2171 if (e->watchdog == !!b)
2175 struct epoll_event ev = {};
2177 r = sd_watchdog_enabled(false, &e->watchdog_period);
2181 /* Issue first ping immediately */
2182 sd_notify(false, "WATCHDOG=1");
2183 e->watchdog_last = now(CLOCK_MONOTONIC);
2185 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2186 if (e->watchdog_fd < 0)
2189 r = arm_watchdog(e);
2193 ev.events = EPOLLIN;
2194 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2196 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2203 if (e->watchdog_fd >= 0) {
2204 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2205 close_nointr_nofail(e->watchdog_fd);
2206 e->watchdog_fd = -1;
2214 close_nointr_nofail(e->watchdog_fd);
2215 e->watchdog_fd = -1;
2219 _public_ int sd_event_get_watchdog(sd_event *e) {
2220 assert_return(e, -EINVAL);
2221 assert_return(!event_pid_changed(e), -ECHILD);