1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
27 #include "sd-daemon.h"
32 #include "time-util.h"
37 #define EPOLL_QUEUE_MAX 64
38 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
40 typedef enum EventSourceType {
51 struct sd_event_source {
56 sd_event_handler_t prepare;
58 EventSourceType type:4;
64 unsigned pending_index;
65 unsigned prepare_index;
66 unsigned pending_iteration;
67 unsigned prepare_iteration;
71 sd_event_io_handler_t callback;
78 sd_event_time_handler_t callback;
79 usec_t next, accuracy;
80 unsigned earliest_index;
81 unsigned latest_index;
84 sd_event_signal_handler_t callback;
85 struct signalfd_siginfo siginfo;
89 sd_event_child_handler_t callback;
95 sd_event_handler_t callback;
98 sd_event_handler_t callback;
116 /* For both clocks we maintain two priority queues each, one
117 * ordered for the earliest times the events may be
118 * dispatched, and one ordered by the latest times they must
119 * have been dispatched. The range between the top entries in
120 * the two prioqs is the time window we can freely schedule
122 Prioq *monotonic_earliest;
123 Prioq *monotonic_latest;
124 Prioq *realtime_earliest;
125 Prioq *realtime_latest;
127 usec_t realtime_next, monotonic_next;
131 sd_event_source **signal_sources;
133 Hashmap *child_sources;
134 unsigned n_enabled_child_sources;
141 dual_timestamp timestamp;
144 bool exit_requested:1;
145 bool need_process_child:1;
151 sd_event **default_event_ptr;
153 usec_t watchdog_last, watchdog_period;
156 static int pending_prioq_compare(const void *a, const void *b) {
157 const sd_event_source *x = a, *y = b;
162 /* Enabled ones first */
163 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
165 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
168 /* Lower priority values first */
169 if (x->priority < y->priority)
171 if (x->priority > y->priority)
174 /* Older entries first */
175 if (x->pending_iteration < y->pending_iteration)
177 if (x->pending_iteration > y->pending_iteration)
180 /* Stability for the rest */
189 static int prepare_prioq_compare(const void *a, const void *b) {
190 const sd_event_source *x = a, *y = b;
195 /* Move most recently prepared ones last, so that we can stop
196 * preparing as soon as we hit one that has already been
197 * prepared in the current iteration */
198 if (x->prepare_iteration < y->prepare_iteration)
200 if (x->prepare_iteration > y->prepare_iteration)
203 /* Enabled ones first */
204 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
206 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
209 /* Lower priority values first */
210 if (x->priority < y->priority)
212 if (x->priority > y->priority)
215 /* Stability for the rest */
224 static int earliest_time_prioq_compare(const void *a, const void *b) {
225 const sd_event_source *x = a, *y = b;
227 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
228 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
230 /* Enabled ones first */
231 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
233 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
236 /* Move the pending ones to the end */
237 if (!x->pending && y->pending)
239 if (x->pending && !y->pending)
243 if (x->time.next < y->time.next)
245 if (x->time.next > y->time.next)
248 /* Stability for the rest */
257 static int latest_time_prioq_compare(const void *a, const void *b) {
258 const sd_event_source *x = a, *y = b;
260 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
261 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
263 /* Enabled ones first */
264 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
266 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
269 /* Move the pending ones to the end */
270 if (!x->pending && y->pending)
272 if (x->pending && !y->pending)
276 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
278 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
281 /* Stability for the rest */
290 static int exit_prioq_compare(const void *a, const void *b) {
291 const sd_event_source *x = a, *y = b;
293 assert(x->type == SOURCE_EXIT);
294 assert(y->type == SOURCE_EXIT);
296 /* Enabled ones first */
297 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
299 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
302 /* Lower priority values first */
303 if (x->priority < y->priority)
305 if (x->priority > y->priority)
308 /* Stability for the rest */
317 static void event_free(sd_event *e) {
320 if (e->default_event_ptr)
321 *(e->default_event_ptr) = NULL;
323 if (e->epoll_fd >= 0)
324 close_nointr_nofail(e->epoll_fd);
326 if (e->signal_fd >= 0)
327 close_nointr_nofail(e->signal_fd);
329 if (e->realtime_fd >= 0)
330 close_nointr_nofail(e->realtime_fd);
332 if (e->monotonic_fd >= 0)
333 close_nointr_nofail(e->monotonic_fd);
335 if (e->watchdog_fd >= 0)
336 close_nointr_nofail(e->watchdog_fd);
338 prioq_free(e->pending);
339 prioq_free(e->prepare);
340 prioq_free(e->monotonic_earliest);
341 prioq_free(e->monotonic_latest);
342 prioq_free(e->realtime_earliest);
343 prioq_free(e->realtime_latest);
346 free(e->signal_sources);
348 hashmap_free(e->child_sources);
352 _public_ int sd_event_new(sd_event** ret) {
356 assert_return(ret, -EINVAL);
358 e = new0(sd_event, 1);
363 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
364 e->realtime_next = e->monotonic_next = (usec_t) -1;
365 e->original_pid = getpid();
367 assert_se(sigemptyset(&e->sigset) == 0);
369 e->pending = prioq_new(pending_prioq_compare);
375 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
376 if (e->epoll_fd < 0) {
389 _public_ sd_event* sd_event_ref(sd_event *e) {
390 assert_return(e, NULL);
392 assert(e->n_ref >= 1);
398 _public_ sd_event* sd_event_unref(sd_event *e) {
403 assert(e->n_ref >= 1);
412 static bool event_pid_changed(sd_event *e) {
415 /* We don't support people creating am event loop and keeping
416 * it around over a fork(). Let's complain. */
418 return e->original_pid != getpid();
421 static int source_io_unregister(sd_event_source *s) {
425 assert(s->type == SOURCE_IO);
427 if (!s->io.registered)
430 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
434 s->io.registered = false;
438 static int source_io_register(
443 struct epoll_event ev = {};
447 assert(s->type == SOURCE_IO);
448 assert(enabled != SD_EVENT_OFF);
453 if (enabled == SD_EVENT_ONESHOT)
454 ev.events |= EPOLLONESHOT;
456 if (s->io.registered)
457 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
459 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
464 s->io.registered = true;
469 static void source_free(sd_event_source *s) {
477 source_io_unregister(s);
481 case SOURCE_MONOTONIC:
482 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
483 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
486 case SOURCE_REALTIME:
487 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
488 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
492 if (s->signal.sig > 0) {
493 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
494 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
496 if (s->event->signal_sources)
497 s->event->signal_sources[s->signal.sig] = NULL;
503 if (s->child.pid > 0) {
504 if (s->enabled != SD_EVENT_OFF) {
505 assert(s->event->n_enabled_child_sources > 0);
506 s->event->n_enabled_child_sources--;
509 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
510 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
512 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
522 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
525 case SOURCE_WATCHDOG:
526 assert_not_reached("Wut? I shouldn't exist.");
530 prioq_remove(s->event->pending, s, &s->pending_index);
533 prioq_remove(s->event->prepare, s, &s->prepare_index);
535 sd_event_unref(s->event);
541 static int source_set_pending(sd_event_source *s, bool b) {
545 assert(s->type != SOURCE_EXIT);
553 s->pending_iteration = s->event->iteration;
555 r = prioq_put(s->event->pending, s, &s->pending_index);
561 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
563 if (s->type == SOURCE_REALTIME) {
564 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
565 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
566 } else if (s->type == SOURCE_MONOTONIC) {
567 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
568 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
574 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
579 s = new0(sd_event_source, 1);
584 s->event = sd_event_ref(e);
586 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
591 _public_ int sd_event_add_io(
595 sd_event_io_handler_t callback,
597 sd_event_source **ret) {
602 assert_return(e, -EINVAL);
603 assert_return(fd >= 0, -EINVAL);
604 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
605 assert_return(callback, -EINVAL);
606 assert_return(ret, -EINVAL);
607 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
608 assert_return(!event_pid_changed(e), -ECHILD);
610 s = source_new(e, SOURCE_IO);
615 s->io.events = events;
616 s->io.callback = callback;
617 s->userdata = userdata;
618 s->enabled = SD_EVENT_ON;
620 r = source_io_register(s, s->enabled, events);
630 static int event_setup_timer_fd(
632 EventSourceType type,
636 struct epoll_event ev = {};
643 if (_likely_(*timer_fd >= 0))
646 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
651 ev.data.ptr = INT_TO_PTR(type);
653 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
655 close_nointr_nofail(fd);
659 /* When we sleep for longer, we try to realign the wakeup to
660 the same time wihtin each minute/second/250ms, so that
661 events all across the system can be coalesced into a single
662 CPU wakeup. However, let's take some system-specific
663 randomness for this value, so that in a network of systems
664 with synced clocks timer events are distributed a
665 bit. Here, we calculate a perturbation usec offset from the
668 if (sd_id128_get_boot(&bootid) >= 0)
669 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
675 static int event_add_time_internal(
677 EventSourceType type,
684 sd_event_time_handler_t callback,
686 sd_event_source **ret) {
691 assert_return(e, -EINVAL);
692 assert_return(callback, -EINVAL);
693 assert_return(ret, -EINVAL);
694 assert_return(usec != (uint64_t) -1, -EINVAL);
695 assert_return(accuracy != (uint64_t) -1, -EINVAL);
696 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
697 assert_return(!event_pid_changed(e), -ECHILD);
704 *earliest = prioq_new(earliest_time_prioq_compare);
710 *latest = prioq_new(latest_time_prioq_compare);
716 r = event_setup_timer_fd(e, type, timer_fd, id);
721 s = source_new(e, type);
726 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
727 s->time.callback = callback;
728 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
729 s->userdata = userdata;
730 s->enabled = SD_EVENT_ONESHOT;
732 r = prioq_put(*earliest, s, &s->time.earliest_index);
736 r = prioq_put(*latest, s, &s->time.latest_index);
748 _public_ int sd_event_add_monotonic(sd_event *e,
751 sd_event_time_handler_t callback,
753 sd_event_source **ret) {
755 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
758 _public_ int sd_event_add_realtime(sd_event *e,
761 sd_event_time_handler_t callback,
763 sd_event_source **ret) {
765 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
768 static int event_update_signal_fd(sd_event *e) {
769 struct epoll_event ev = {};
775 add_to_epoll = e->signal_fd < 0;
777 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
787 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
789 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
791 close_nointr_nofail(e->signal_fd);
800 _public_ int sd_event_add_signal(
803 sd_event_signal_handler_t callback,
805 sd_event_source **ret) {
810 assert_return(e, -EINVAL);
811 assert_return(sig > 0, -EINVAL);
812 assert_return(sig < _NSIG, -EINVAL);
813 assert_return(callback, -EINVAL);
814 assert_return(ret, -EINVAL);
815 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
816 assert_return(!event_pid_changed(e), -ECHILD);
818 if (!e->signal_sources) {
819 e->signal_sources = new0(sd_event_source*, _NSIG);
820 if (!e->signal_sources)
822 } else if (e->signal_sources[sig])
825 s = source_new(e, SOURCE_SIGNAL);
830 s->signal.callback = callback;
831 s->userdata = userdata;
832 s->enabled = SD_EVENT_ON;
834 e->signal_sources[sig] = s;
835 assert_se(sigaddset(&e->sigset, sig) == 0);
837 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
838 r = event_update_signal_fd(e);
849 _public_ int sd_event_add_child(
853 sd_event_child_handler_t callback,
855 sd_event_source **ret) {
860 assert_return(e, -EINVAL);
861 assert_return(pid > 1, -EINVAL);
862 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
863 assert_return(options != 0, -EINVAL);
864 assert_return(callback, -EINVAL);
865 assert_return(ret, -EINVAL);
866 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
867 assert_return(!event_pid_changed(e), -ECHILD);
869 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
873 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
876 s = source_new(e, SOURCE_CHILD);
881 s->child.options = options;
882 s->child.callback = callback;
883 s->userdata = userdata;
884 s->enabled = SD_EVENT_ONESHOT;
886 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
892 e->n_enabled_child_sources ++;
894 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
896 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
897 r = event_update_signal_fd(e);
904 e->need_process_child = true;
910 _public_ int sd_event_add_defer(
912 sd_event_handler_t callback,
914 sd_event_source **ret) {
919 assert_return(e, -EINVAL);
920 assert_return(callback, -EINVAL);
921 assert_return(ret, -EINVAL);
922 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
923 assert_return(!event_pid_changed(e), -ECHILD);
925 s = source_new(e, SOURCE_DEFER);
929 s->defer.callback = callback;
930 s->userdata = userdata;
931 s->enabled = SD_EVENT_ONESHOT;
933 r = source_set_pending(s, true);
943 _public_ int sd_event_add_exit(
945 sd_event_handler_t callback,
947 sd_event_source **ret) {
952 assert_return(e, -EINVAL);
953 assert_return(callback, -EINVAL);
954 assert_return(ret, -EINVAL);
955 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
956 assert_return(!event_pid_changed(e), -ECHILD);
959 e->exit = prioq_new(exit_prioq_compare);
964 s = source_new(e, SOURCE_EXIT);
968 s->exit.callback = callback;
969 s->userdata = userdata;
970 s->exit.prioq_index = PRIOQ_IDX_NULL;
971 s->enabled = SD_EVENT_ONESHOT;
973 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
983 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
984 assert_return(s, NULL);
986 assert(s->n_ref >= 1);
992 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
997 assert(s->n_ref >= 1);
1000 if (s->n_ref <= 0) {
1001 /* Here's a special hack: when we are called from a
1002 * dispatch handler we won't free the event source
1003 * immediately, but we will detach the fd from the
1004 * epoll. This way it is safe for the caller to unref
1005 * the event source and immediately close the fd, but
1006 * we still retain a valid event source object after
1009 if (s->dispatching) {
1010 if (s->type == SOURCE_IO)
1011 source_io_unregister(s);
1019 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1020 assert_return(s, NULL);
1025 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1026 assert_return(s, -EINVAL);
1027 assert_return(s->type != SOURCE_EXIT, -EDOM);
1028 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1029 assert_return(!event_pid_changed(s->event), -ECHILD);
1034 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1035 assert_return(s, -EINVAL);
1036 assert_return(s->type == SOURCE_IO, -EDOM);
1037 assert_return(!event_pid_changed(s->event), -ECHILD);
1042 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1045 assert_return(s, -EINVAL);
1046 assert_return(fd >= 0, -EINVAL);
1047 assert_return(s->type == SOURCE_IO, -EDOM);
1048 assert_return(!event_pid_changed(s->event), -ECHILD);
1053 if (s->enabled == SD_EVENT_OFF) {
1055 s->io.registered = false;
1059 saved_fd = s->io.fd;
1060 assert(s->io.registered);
1063 s->io.registered = false;
1065 r = source_io_register(s, s->enabled, s->io.events);
1067 s->io.fd = saved_fd;
1068 s->io.registered = true;
1072 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1078 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1079 assert_return(s, -EINVAL);
1080 assert_return(events, -EINVAL);
1081 assert_return(s->type == SOURCE_IO, -EDOM);
1082 assert_return(!event_pid_changed(s->event), -ECHILD);
1084 *events = s->io.events;
1088 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1091 assert_return(s, -EINVAL);
1092 assert_return(s->type == SOURCE_IO, -EDOM);
1093 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1094 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1095 assert_return(!event_pid_changed(s->event), -ECHILD);
1097 if (s->io.events == events)
1100 if (s->enabled != SD_EVENT_OFF) {
1101 r = source_io_register(s, s->enabled, events);
1106 s->io.events = events;
1107 source_set_pending(s, false);
1112 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1113 assert_return(s, -EINVAL);
1114 assert_return(revents, -EINVAL);
1115 assert_return(s->type == SOURCE_IO, -EDOM);
1116 assert_return(s->pending, -ENODATA);
1117 assert_return(!event_pid_changed(s->event), -ECHILD);
1119 *revents = s->io.revents;
1123 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1124 assert_return(s, -EINVAL);
1125 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1126 assert_return(!event_pid_changed(s->event), -ECHILD);
1128 return s->signal.sig;
1131 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1132 assert_return(s, -EINVAL);
1133 assert_return(!event_pid_changed(s->event), -ECHILD);
1138 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1139 assert_return(s, -EINVAL);
1140 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1141 assert_return(!event_pid_changed(s->event), -ECHILD);
1143 if (s->priority == priority)
1146 s->priority = priority;
1149 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1152 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1154 if (s->type == SOURCE_EXIT)
1155 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1160 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1161 assert_return(s, -EINVAL);
1162 assert_return(m, -EINVAL);
1163 assert_return(!event_pid_changed(s->event), -ECHILD);
1169 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1172 assert_return(s, -EINVAL);
1173 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1174 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1175 assert_return(!event_pid_changed(s->event), -ECHILD);
1177 if (s->enabled == m)
1180 if (m == SD_EVENT_OFF) {
1185 r = source_io_unregister(s);
1192 case SOURCE_MONOTONIC:
1194 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1195 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1198 case SOURCE_REALTIME:
1200 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1201 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1206 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1207 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1208 event_update_signal_fd(s->event);
1216 assert(s->event->n_enabled_child_sources > 0);
1217 s->event->n_enabled_child_sources--;
1219 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1220 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1221 event_update_signal_fd(s->event);
1228 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1235 case SOURCE_WATCHDOG:
1236 assert_not_reached("Wut? I shouldn't exist.");
1243 r = source_io_register(s, m, s->io.events);
1250 case SOURCE_MONOTONIC:
1252 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1253 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1256 case SOURCE_REALTIME:
1258 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1259 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1265 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1266 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1267 event_update_signal_fd(s->event);
1274 if (s->enabled == SD_EVENT_OFF) {
1275 s->event->n_enabled_child_sources++;
1277 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1278 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1279 event_update_signal_fd(s->event);
1286 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1293 case SOURCE_WATCHDOG:
1294 assert_not_reached("Wut? I shouldn't exist.");
1299 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1302 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1307 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1308 assert_return(s, -EINVAL);
1309 assert_return(usec, -EINVAL);
1310 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1311 assert_return(!event_pid_changed(s->event), -ECHILD);
1313 *usec = s->time.next;
1317 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1318 assert_return(s, -EINVAL);
1319 assert_return(usec != (uint64_t) -1, -EINVAL);
1320 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1321 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1322 assert_return(!event_pid_changed(s->event), -ECHILD);
1324 s->time.next = usec;
1326 source_set_pending(s, false);
1328 if (s->type == SOURCE_REALTIME) {
1329 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1330 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1332 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1333 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1339 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1340 assert_return(s, -EINVAL);
1341 assert_return(usec, -EINVAL);
1342 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1343 assert_return(!event_pid_changed(s->event), -ECHILD);
1345 *usec = s->time.accuracy;
1349 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1350 assert_return(s, -EINVAL);
1351 assert_return(usec != (uint64_t) -1, -EINVAL);
1352 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1353 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1354 assert_return(!event_pid_changed(s->event), -ECHILD);
1357 usec = DEFAULT_ACCURACY_USEC;
1359 s->time.accuracy = usec;
1361 source_set_pending(s, false);
1363 if (s->type == SOURCE_REALTIME)
1364 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1366 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1371 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1372 assert_return(s, -EINVAL);
1373 assert_return(pid, -EINVAL);
1374 assert_return(s->type == SOURCE_CHILD, -EDOM);
1375 assert_return(!event_pid_changed(s->event), -ECHILD);
1377 *pid = s->child.pid;
1381 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1384 assert_return(s, -EINVAL);
1385 assert_return(s->type != SOURCE_EXIT, -EDOM);
1386 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1387 assert_return(!event_pid_changed(s->event), -ECHILD);
1389 if (s->prepare == callback)
1392 if (callback && s->prepare) {
1393 s->prepare = callback;
1397 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1401 s->prepare = callback;
1404 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1408 prioq_remove(s->event->prepare, s, &s->prepare_index);
1413 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1414 assert_return(s, NULL);
1419 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1422 assert_return(s, NULL);
1425 s->userdata = userdata;
1430 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1442 Find a good time to wake up again between times a and b. We
1443 have two goals here:
1445 a) We want to wake up as seldom as possible, hence prefer
1446 later times over earlier times.
1448 b) But if we have to wake up, then let's make sure to
1449 dispatch as much as possible on the entire system.
1451 We implement this by waking up everywhere at the same time
1452 within any given minute if we can, synchronised via the
1453 perturbation value determined from the boot ID. If we can't,
1454 then we try to find the same spot in every 10s, then 1s and
1455 then 250ms step. Otherwise, we pick the last possible time
1459 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1461 if (_unlikely_(c < USEC_PER_MINUTE))
1464 c -= USEC_PER_MINUTE;
1470 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1472 if (_unlikely_(c < USEC_PER_SEC*10))
1475 c -= USEC_PER_SEC*10;
1481 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1483 if (_unlikely_(c < USEC_PER_SEC))
1492 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1494 if (_unlikely_(c < USEC_PER_MSEC*250))
1497 c -= USEC_PER_MSEC*250;
1506 static int event_arm_timer(
1513 struct itimerspec its = {};
1514 sd_event_source *a, *b;
1521 a = prioq_peek(earliest);
1522 if (!a || a->enabled == SD_EVENT_OFF) {
1527 if (*next == (usec_t) -1)
1531 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1535 *next = (usec_t) -1;
1540 b = prioq_peek(latest);
1541 assert_se(b && b->enabled != SD_EVENT_OFF);
1543 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1547 assert_se(timer_fd >= 0);
1550 /* We don' want to disarm here, just mean some time looooong ago. */
1551 its.it_value.tv_sec = 0;
1552 its.it_value.tv_nsec = 1;
1554 timespec_store(&its.it_value, t);
1556 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1564 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1567 assert(s->type == SOURCE_IO);
1569 s->io.revents = events;
1571 return source_set_pending(s, true);
1574 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1581 assert_return(events == EPOLLIN, -EIO);
1583 ss = read(fd, &x, sizeof(x));
1585 if (errno == EAGAIN || errno == EINTR)
1591 if (ss != sizeof(x))
1595 *next = (usec_t) -1;
1600 static int process_timer(
1612 s = prioq_peek(earliest);
1615 s->enabled == SD_EVENT_OFF ||
1619 r = source_set_pending(s, true);
1623 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1624 prioq_reshuffle(latest, s, &s->time.latest_index);
1630 static int process_child(sd_event *e) {
1637 e->need_process_child = false;
1640 So, this is ugly. We iteratively invoke waitid() with P_PID
1641 + WNOHANG for each PID we wait for, instead of using
1642 P_ALL. This is because we only want to get child
1643 information of very specific child processes, and not all
1644 of them. We might not have processed the SIGCHLD even of a
1645 previous invocation and we don't want to maintain a
1646 unbounded *per-child* event queue, hence we really don't
1647 want anything flushed out of the kernel's queue that we
1648 don't care about. Since this is O(n) this means that if you
1649 have a lot of processes you probably want to handle SIGCHLD
1652 We do not reap the children here (by using WNOWAIT), this
1653 is only done after the event source is dispatched so that
1654 the callback still sees the process as a zombie.
1657 HASHMAP_FOREACH(s, e->child_sources, i) {
1658 assert(s->type == SOURCE_CHILD);
1663 if (s->enabled == SD_EVENT_OFF)
1666 zero(s->child.siginfo);
1667 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1668 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1672 if (s->child.siginfo.si_pid != 0) {
1674 s->child.siginfo.si_code == CLD_EXITED ||
1675 s->child.siginfo.si_code == CLD_KILLED ||
1676 s->child.siginfo.si_code == CLD_DUMPED;
1678 if (!zombie && (s->child.options & WEXITED)) {
1679 /* If the child isn't dead then let's
1680 * immediately remove the state change
1681 * from the queue, since there's no
1682 * benefit in leaving it queued */
1684 assert(s->child.options & (WSTOPPED|WCONTINUED));
1685 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1688 r = source_set_pending(s, true);
1697 static int process_signal(sd_event *e, uint32_t events) {
1698 bool read_one = false;
1702 assert(e->signal_sources);
1704 assert_return(events == EPOLLIN, -EIO);
1707 struct signalfd_siginfo si;
1711 ss = read(e->signal_fd, &si, sizeof(si));
1713 if (errno == EAGAIN || errno == EINTR)
1719 if (ss != sizeof(si))
1724 s = e->signal_sources[si.ssi_signo];
1725 if (si.ssi_signo == SIGCHLD) {
1726 r = process_child(e);
1735 s->signal.siginfo = si;
1736 r = source_set_pending(s, true);
1744 static int source_dispatch(sd_event_source *s) {
1748 assert(s->pending || s->type == SOURCE_EXIT);
1750 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
1751 r = source_set_pending(s, false);
1756 if (s->enabled == SD_EVENT_ONESHOT) {
1757 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1762 s->dispatching = true;
1767 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1770 case SOURCE_MONOTONIC:
1771 r = s->time.callback(s, s->time.next, s->userdata);
1774 case SOURCE_REALTIME:
1775 r = s->time.callback(s, s->time.next, s->userdata);
1779 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1782 case SOURCE_CHILD: {
1785 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1786 s->child.siginfo.si_code == CLD_KILLED ||
1787 s->child.siginfo.si_code == CLD_DUMPED;
1789 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1791 /* Now, reap the PID for good. */
1793 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1799 r = s->defer.callback(s, s->userdata);
1803 r = s->exit.callback(s, s->userdata);
1806 case SOURCE_WATCHDOG:
1807 assert_not_reached("Wut? I shouldn't exist.");
1810 s->dispatching = false;
1813 log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
1818 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1823 static int event_prepare(sd_event *e) {
1831 s = prioq_peek(e->prepare);
1832 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1835 s->prepare_iteration = e->iteration;
1836 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1842 s->dispatching = true;
1843 r = s->prepare(s, s->userdata);
1844 s->dispatching = false;
1847 log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
1852 sd_event_source_set_enabled(s, SD_EVENT_OFF);
1858 static int dispatch_exit(sd_event *e) {
1864 p = prioq_peek(e->exit);
1865 if (!p || p->enabled == SD_EVENT_OFF) {
1866 e->state = SD_EVENT_FINISHED;
1872 e->state = SD_EVENT_EXITING;
1874 r = source_dispatch(p);
1876 e->state = SD_EVENT_PASSIVE;
1882 static sd_event_source* event_next_pending(sd_event *e) {
1887 p = prioq_peek(e->pending);
1891 if (p->enabled == SD_EVENT_OFF)
1897 static int arm_watchdog(sd_event *e) {
1898 struct itimerspec its = {};
1903 assert(e->watchdog_fd >= 0);
1905 t = sleep_between(e,
1906 e->watchdog_last + (e->watchdog_period / 2),
1907 e->watchdog_last + (e->watchdog_period * 3 / 4));
1909 timespec_store(&its.it_value, t);
1911 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1918 static int process_watchdog(sd_event *e) {
1924 /* Don't notify watchdog too often */
1925 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1928 sd_notify(false, "WATCHDOG=1");
1929 e->watchdog_last = e->timestamp.monotonic;
1931 return arm_watchdog(e);
1934 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1935 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1939 assert_return(e, -EINVAL);
1940 assert_return(!event_pid_changed(e), -ECHILD);
1941 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1942 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1944 if (e->exit_requested)
1945 return dispatch_exit(e);
1949 e->state = SD_EVENT_RUNNING;
1951 r = event_prepare(e);
1955 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1959 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1963 if (event_next_pending(e) || e->need_process_child)
1966 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1967 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1969 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1973 dual_timestamp_get(&e->timestamp);
1975 for (i = 0; i < m; i++) {
1977 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1978 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1979 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1980 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1981 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1982 r = process_signal(e, ev_queue[i].events);
1983 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
1984 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
1986 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1992 r = process_watchdog(e);
1996 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
2000 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
2004 if (e->need_process_child) {
2005 r = process_child(e);
2010 p = event_next_pending(e);
2016 r = source_dispatch(p);
2019 e->state = SD_EVENT_PASSIVE;
2025 _public_ int sd_event_loop(sd_event *e) {
2028 assert_return(e, -EINVAL);
2029 assert_return(!event_pid_changed(e), -ECHILD);
2030 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
2034 while (e->state != SD_EVENT_FINISHED) {
2035 r = sd_event_run(e, (uint64_t) -1);
2047 _public_ int sd_event_get_state(sd_event *e) {
2048 assert_return(e, -EINVAL);
2049 assert_return(!event_pid_changed(e), -ECHILD);
2054 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2055 assert_return(e, -EINVAL);
2056 assert_return(code, -EINVAL);
2057 assert_return(!event_pid_changed(e), -ECHILD);
2059 if (!e->exit_requested)
2062 *code = e->exit_code;
2066 _public_ int sd_event_exit(sd_event *e, int code) {
2067 assert_return(e, -EINVAL);
2068 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2069 assert_return(!event_pid_changed(e), -ECHILD);
2071 e->exit_requested = true;
2072 e->exit_code = code;
2077 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
2078 assert_return(e, -EINVAL);
2079 assert_return(usec, -EINVAL);
2080 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2081 assert_return(!event_pid_changed(e), -ECHILD);
2083 *usec = e->timestamp.realtime;
2087 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
2088 assert_return(e, -EINVAL);
2089 assert_return(usec, -EINVAL);
2090 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
2091 assert_return(!event_pid_changed(e), -ECHILD);
2093 *usec = e->timestamp.monotonic;
2097 _public_ int sd_event_default(sd_event **ret) {
2099 static __thread sd_event *default_event = NULL;
2104 return !!default_event;
2106 if (default_event) {
2107 *ret = sd_event_ref(default_event);
2111 r = sd_event_new(&e);
2115 e->default_event_ptr = &default_event;
2123 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2124 assert_return(e, -EINVAL);
2125 assert_return(tid, -EINVAL);
2126 assert_return(!event_pid_changed(e), -ECHILD);
2136 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2139 assert_return(e, -EINVAL);
2140 assert_return(!event_pid_changed(e), -ECHILD);
2142 if (e->watchdog == !!b)
2146 struct epoll_event ev = {};
2149 env = getenv("WATCHDOG_USEC");
2153 r = safe_atou64(env, &e->watchdog_period);
2156 if (e->watchdog_period <= 0)
2159 /* Issue first ping immediately */
2160 sd_notify(false, "WATCHDOG=1");
2161 e->watchdog_last = now(CLOCK_MONOTONIC);
2163 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2164 if (e->watchdog_fd < 0)
2167 r = arm_watchdog(e);
2171 ev.events = EPOLLIN;
2172 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2174 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2181 if (e->watchdog_fd >= 0) {
2182 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2183 close_nointr_nofail(e->watchdog_fd);
2184 e->watchdog_fd = -1;
2192 close_nointr_nofail(e->watchdog_fd);
2193 e->watchdog_fd = -1;
2197 _public_ int sd_event_get_watchdog(sd_event *e) {
2198 assert_return(e, -EINVAL);
2199 assert_return(!event_pid_changed(e), -ECHILD);