1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
27 #include "sd-daemon.h"
32 #include "time-util.h"
37 #define EPOLL_QUEUE_MAX 64
38 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
40 typedef enum EventSourceType {
51 struct sd_event_source {
56 sd_event_handler_t prepare;
58 EventSourceType type:4;
63 unsigned pending_index;
64 unsigned prepare_index;
65 unsigned pending_iteration;
66 unsigned prepare_iteration;
70 sd_event_io_handler_t callback;
77 sd_event_time_handler_t callback;
78 usec_t next, accuracy;
79 unsigned earliest_index;
80 unsigned latest_index;
83 sd_event_signal_handler_t callback;
84 struct signalfd_siginfo siginfo;
88 sd_event_child_handler_t callback;
94 sd_event_handler_t callback;
97 sd_event_handler_t callback;
115 /* For both clocks we maintain two priority queues each, one
116 * ordered for the earliest times the events may be
117 * dispatched, and one ordered by the latest times they must
118 * have been dispatched. The range between the top entries in
119 * the two prioqs is the time window we can freely schedule
121 Prioq *monotonic_earliest;
122 Prioq *monotonic_latest;
123 Prioq *realtime_earliest;
124 Prioq *realtime_latest;
126 usec_t realtime_next, monotonic_next;
130 sd_event_source **signal_sources;
132 Hashmap *child_sources;
133 unsigned n_enabled_child_sources;
140 dual_timestamp timestamp;
143 bool quit_requested:1;
144 bool need_process_child:1;
148 sd_event **default_event_ptr;
150 usec_t watchdog_last, watchdog_period;
153 static int pending_prioq_compare(const void *a, const void *b) {
154 const sd_event_source *x = a, *y = b;
159 /* Enabled ones first */
160 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
162 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
165 /* Lower priority values first */
166 if (x->priority < y->priority)
168 if (x->priority > y->priority)
171 /* Older entries first */
172 if (x->pending_iteration < y->pending_iteration)
174 if (x->pending_iteration > y->pending_iteration)
177 /* Stability for the rest */
186 static int prepare_prioq_compare(const void *a, const void *b) {
187 const sd_event_source *x = a, *y = b;
192 /* Move most recently prepared ones last, so that we can stop
193 * preparing as soon as we hit one that has already been
194 * prepared in the current iteration */
195 if (x->prepare_iteration < y->prepare_iteration)
197 if (x->prepare_iteration > y->prepare_iteration)
200 /* Enabled ones first */
201 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206 /* Lower priority values first */
207 if (x->priority < y->priority)
209 if (x->priority > y->priority)
212 /* Stability for the rest */
221 static int earliest_time_prioq_compare(const void *a, const void *b) {
222 const sd_event_source *x = a, *y = b;
224 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
225 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
227 /* Enabled ones first */
228 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
230 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233 /* Move the pending ones to the end */
234 if (!x->pending && y->pending)
236 if (x->pending && !y->pending)
240 if (x->time.next < y->time.next)
242 if (x->time.next > y->time.next)
245 /* Stability for the rest */
254 static int latest_time_prioq_compare(const void *a, const void *b) {
255 const sd_event_source *x = a, *y = b;
257 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
258 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
260 /* Enabled ones first */
261 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
263 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
266 /* Move the pending ones to the end */
267 if (!x->pending && y->pending)
269 if (x->pending && !y->pending)
273 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
275 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
278 /* Stability for the rest */
287 static int quit_prioq_compare(const void *a, const void *b) {
288 const sd_event_source *x = a, *y = b;
290 assert(x->type == SOURCE_QUIT);
291 assert(y->type == SOURCE_QUIT);
293 /* Enabled ones first */
294 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
296 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
299 /* Lower priority values first */
300 if (x->priority < y->priority)
302 if (x->priority > y->priority)
305 /* Stability for the rest */
314 static void event_free(sd_event *e) {
317 if (e->default_event_ptr)
318 *(e->default_event_ptr) = NULL;
320 if (e->epoll_fd >= 0)
321 close_nointr_nofail(e->epoll_fd);
323 if (e->signal_fd >= 0)
324 close_nointr_nofail(e->signal_fd);
326 if (e->realtime_fd >= 0)
327 close_nointr_nofail(e->realtime_fd);
329 if (e->monotonic_fd >= 0)
330 close_nointr_nofail(e->monotonic_fd);
332 if (e->watchdog_fd >= 0)
333 close_nointr_nofail(e->watchdog_fd);
335 prioq_free(e->pending);
336 prioq_free(e->prepare);
337 prioq_free(e->monotonic_earliest);
338 prioq_free(e->monotonic_latest);
339 prioq_free(e->realtime_earliest);
340 prioq_free(e->realtime_latest);
343 free(e->signal_sources);
345 hashmap_free(e->child_sources);
349 _public_ int sd_event_new(sd_event** ret) {
353 assert_return(ret, -EINVAL);
355 e = new0(sd_event, 1);
360 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
361 e->realtime_next = e->monotonic_next = (usec_t) -1;
362 e->original_pid = getpid();
364 assert_se(sigemptyset(&e->sigset) == 0);
366 e->pending = prioq_new(pending_prioq_compare);
372 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
373 if (e->epoll_fd < 0) {
386 _public_ sd_event* sd_event_ref(sd_event *e) {
387 assert_return(e, NULL);
389 assert(e->n_ref >= 1);
395 _public_ sd_event* sd_event_unref(sd_event *e) {
400 assert(e->n_ref >= 1);
409 static bool event_pid_changed(sd_event *e) {
412 /* We don't support people creating am event loop and keeping
413 * it around over a fork(). Let's complain. */
415 return e->original_pid != getpid();
418 static int source_io_unregister(sd_event_source *s) {
422 assert(s->type == SOURCE_IO);
424 if (!s->io.registered)
427 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
431 s->io.registered = false;
435 static int source_io_register(
440 struct epoll_event ev = {};
444 assert(s->type == SOURCE_IO);
445 assert(enabled != SD_EVENT_OFF);
450 if (enabled == SD_EVENT_ONESHOT)
451 ev.events |= EPOLLONESHOT;
453 if (s->io.registered)
454 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
456 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
461 s->io.registered = true;
466 static void source_free(sd_event_source *s) {
474 source_io_unregister(s);
478 case SOURCE_MONOTONIC:
479 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
480 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
483 case SOURCE_REALTIME:
484 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
485 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
489 if (s->signal.sig > 0) {
490 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
491 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
493 if (s->event->signal_sources)
494 s->event->signal_sources[s->signal.sig] = NULL;
500 if (s->child.pid > 0) {
501 if (s->enabled != SD_EVENT_OFF) {
502 assert(s->event->n_enabled_child_sources > 0);
503 s->event->n_enabled_child_sources--;
506 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
507 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
509 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
519 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
524 prioq_remove(s->event->pending, s, &s->pending_index);
527 prioq_remove(s->event->prepare, s, &s->prepare_index);
529 sd_event_unref(s->event);
535 static int source_set_pending(sd_event_source *s, bool b) {
539 assert(s->type != SOURCE_QUIT);
547 s->pending_iteration = s->event->iteration;
549 r = prioq_put(s->event->pending, s, &s->pending_index);
555 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
557 if (s->type == SOURCE_REALTIME) {
558 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
559 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
560 } else if (s->type == SOURCE_MONOTONIC) {
561 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
562 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
568 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
573 s = new0(sd_event_source, 1);
578 s->event = sd_event_ref(e);
580 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
585 _public_ int sd_event_add_io(
589 sd_event_io_handler_t callback,
591 sd_event_source **ret) {
596 assert_return(e, -EINVAL);
597 assert_return(fd >= 0, -EINVAL);
598 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
599 assert_return(callback, -EINVAL);
600 assert_return(ret, -EINVAL);
601 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
602 assert_return(!event_pid_changed(e), -ECHILD);
604 s = source_new(e, SOURCE_IO);
609 s->io.events = events;
610 s->io.callback = callback;
611 s->userdata = userdata;
612 s->enabled = SD_EVENT_ON;
614 r = source_io_register(s, s->enabled, events);
624 static int event_setup_timer_fd(
626 EventSourceType type,
630 struct epoll_event ev = {};
637 if (_likely_(*timer_fd >= 0))
640 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
645 ev.data.ptr = INT_TO_PTR(type);
647 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
649 close_nointr_nofail(fd);
653 /* When we sleep for longer, we try to realign the wakeup to
654 the same time wihtin each minute/second/250ms, so that
655 events all across the system can be coalesced into a single
656 CPU wakeup. However, let's take some system-specific
657 randomness for this value, so that in a network of systems
658 with synced clocks timer events are distributed a
659 bit. Here, we calculate a perturbation usec offset from the
662 if (sd_id128_get_boot(&bootid) >= 0)
663 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
669 static int event_add_time_internal(
671 EventSourceType type,
678 sd_event_time_handler_t callback,
680 sd_event_source **ret) {
685 assert_return(e, -EINVAL);
686 assert_return(callback, -EINVAL);
687 assert_return(ret, -EINVAL);
688 assert_return(usec != (uint64_t) -1, -EINVAL);
689 assert_return(accuracy != (uint64_t) -1, -EINVAL);
690 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
691 assert_return(!event_pid_changed(e), -ECHILD);
698 *earliest = prioq_new(earliest_time_prioq_compare);
704 *latest = prioq_new(latest_time_prioq_compare);
710 r = event_setup_timer_fd(e, type, timer_fd, id);
715 s = source_new(e, type);
720 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
721 s->time.callback = callback;
722 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
723 s->userdata = userdata;
724 s->enabled = SD_EVENT_ONESHOT;
726 r = prioq_put(*earliest, s, &s->time.earliest_index);
730 r = prioq_put(*latest, s, &s->time.latest_index);
742 _public_ int sd_event_add_monotonic(sd_event *e,
745 sd_event_time_handler_t callback,
747 sd_event_source **ret) {
749 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
752 _public_ int sd_event_add_realtime(sd_event *e,
755 sd_event_time_handler_t callback,
757 sd_event_source **ret) {
759 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
762 static int event_update_signal_fd(sd_event *e) {
763 struct epoll_event ev = {};
769 add_to_epoll = e->signal_fd < 0;
771 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
781 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
783 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
785 close_nointr_nofail(e->signal_fd);
794 _public_ int sd_event_add_signal(
797 sd_event_signal_handler_t callback,
799 sd_event_source **ret) {
804 assert_return(e, -EINVAL);
805 assert_return(sig > 0, -EINVAL);
806 assert_return(sig < _NSIG, -EINVAL);
807 assert_return(callback, -EINVAL);
808 assert_return(ret, -EINVAL);
809 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
810 assert_return(!event_pid_changed(e), -ECHILD);
812 if (!e->signal_sources) {
813 e->signal_sources = new0(sd_event_source*, _NSIG);
814 if (!e->signal_sources)
816 } else if (e->signal_sources[sig])
819 s = source_new(e, SOURCE_SIGNAL);
824 s->signal.callback = callback;
825 s->userdata = userdata;
826 s->enabled = SD_EVENT_ON;
828 e->signal_sources[sig] = s;
829 assert_se(sigaddset(&e->sigset, sig) == 0);
831 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
832 r = event_update_signal_fd(e);
843 _public_ int sd_event_add_child(
847 sd_event_child_handler_t callback,
849 sd_event_source **ret) {
854 assert_return(e, -EINVAL);
855 assert_return(pid > 1, -EINVAL);
856 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
857 assert_return(options != 0, -EINVAL);
858 assert_return(callback, -EINVAL);
859 assert_return(ret, -EINVAL);
860 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
861 assert_return(!event_pid_changed(e), -ECHILD);
863 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
867 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
870 s = source_new(e, SOURCE_CHILD);
875 s->child.options = options;
876 s->child.callback = callback;
877 s->userdata = userdata;
878 s->enabled = SD_EVENT_ONESHOT;
880 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
886 e->n_enabled_child_sources ++;
888 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
890 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
891 r = event_update_signal_fd(e);
898 e->need_process_child = true;
904 _public_ int sd_event_add_defer(
906 sd_event_handler_t callback,
908 sd_event_source **ret) {
913 assert_return(e, -EINVAL);
914 assert_return(callback, -EINVAL);
915 assert_return(ret, -EINVAL);
916 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
917 assert_return(!event_pid_changed(e), -ECHILD);
919 s = source_new(e, SOURCE_DEFER);
923 s->defer.callback = callback;
924 s->userdata = userdata;
925 s->enabled = SD_EVENT_ONESHOT;
927 r = source_set_pending(s, true);
937 _public_ int sd_event_add_quit(
939 sd_event_handler_t callback,
941 sd_event_source **ret) {
946 assert_return(e, -EINVAL);
947 assert_return(callback, -EINVAL);
948 assert_return(ret, -EINVAL);
949 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
950 assert_return(!event_pid_changed(e), -ECHILD);
953 e->quit = prioq_new(quit_prioq_compare);
958 s = source_new(e, SOURCE_QUIT);
962 s->quit.callback = callback;
963 s->userdata = userdata;
964 s->quit.prioq_index = PRIOQ_IDX_NULL;
965 s->enabled = SD_EVENT_ONESHOT;
967 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
977 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
978 assert_return(s, NULL);
980 assert(s->n_ref >= 1);
986 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
991 assert(s->n_ref >= 1);
1000 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1001 assert_return(s, NULL);
1006 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1007 assert_return(s, -EINVAL);
1008 assert_return(s->type != SOURCE_QUIT, -EDOM);
1009 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1010 assert_return(!event_pid_changed(s->event), -ECHILD);
1015 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1016 assert_return(s, -EINVAL);
1017 assert_return(s->type == SOURCE_IO, -EDOM);
1018 assert_return(!event_pid_changed(s->event), -ECHILD);
1023 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1024 assert_return(s, -EINVAL);
1025 assert_return(events, -EINVAL);
1026 assert_return(s->type == SOURCE_IO, -EDOM);
1027 assert_return(!event_pid_changed(s->event), -ECHILD);
1029 *events = s->io.events;
1033 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1036 assert_return(s, -EINVAL);
1037 assert_return(s->type == SOURCE_IO, -EDOM);
1038 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1039 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1040 assert_return(!event_pid_changed(s->event), -ECHILD);
1042 if (s->io.events == events)
1045 if (s->enabled != SD_EVENT_OFF) {
1046 r = source_io_register(s, s->enabled, events);
1051 s->io.events = events;
1052 source_set_pending(s, false);
1057 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1058 assert_return(s, -EINVAL);
1059 assert_return(revents, -EINVAL);
1060 assert_return(s->type == SOURCE_IO, -EDOM);
1061 assert_return(s->pending, -ENODATA);
1062 assert_return(!event_pid_changed(s->event), -ECHILD);
1064 *revents = s->io.revents;
1068 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1069 assert_return(s, -EINVAL);
1070 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1071 assert_return(!event_pid_changed(s->event), -ECHILD);
1073 return s->signal.sig;
1076 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1077 assert_return(s, -EINVAL);
1078 assert_return(!event_pid_changed(s->event), -ECHILD);
1083 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1084 assert_return(s, -EINVAL);
1085 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1086 assert_return(!event_pid_changed(s->event), -ECHILD);
1088 if (s->priority == priority)
1091 s->priority = priority;
1094 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1097 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1099 if (s->type == SOURCE_QUIT)
1100 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1105 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1106 assert_return(s, -EINVAL);
1107 assert_return(m, -EINVAL);
1108 assert_return(!event_pid_changed(s->event), -ECHILD);
1114 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1117 assert_return(s, -EINVAL);
1118 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1119 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1120 assert_return(!event_pid_changed(s->event), -ECHILD);
1122 if (s->enabled == m)
1125 if (m == SD_EVENT_OFF) {
1130 r = source_io_unregister(s);
1137 case SOURCE_MONOTONIC:
1139 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1140 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1143 case SOURCE_REALTIME:
1145 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1146 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1151 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1152 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1153 event_update_signal_fd(s->event);
1161 assert(s->event->n_enabled_child_sources > 0);
1162 s->event->n_enabled_child_sources--;
1164 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1165 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1166 event_update_signal_fd(s->event);
1173 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1185 r = source_io_register(s, m, s->io.events);
1192 case SOURCE_MONOTONIC:
1194 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1195 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1198 case SOURCE_REALTIME:
1200 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1201 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1207 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1208 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1209 event_update_signal_fd(s->event);
1216 if (s->enabled == SD_EVENT_OFF) {
1217 s->event->n_enabled_child_sources++;
1219 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1220 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1221 event_update_signal_fd(s->event);
1228 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1238 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1241 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1246 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1247 assert_return(s, -EINVAL);
1248 assert_return(usec, -EINVAL);
1249 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1250 assert_return(!event_pid_changed(s->event), -ECHILD);
1252 *usec = s->time.next;
1256 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1257 assert_return(s, -EINVAL);
1258 assert_return(usec != (uint64_t) -1, -EINVAL);
1259 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1260 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1261 assert_return(!event_pid_changed(s->event), -ECHILD);
1263 s->time.next = usec;
1265 source_set_pending(s, false);
1267 if (s->type == SOURCE_REALTIME) {
1268 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1269 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1271 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1272 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1278 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1279 assert_return(s, -EINVAL);
1280 assert_return(usec, -EINVAL);
1281 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1282 assert_return(!event_pid_changed(s->event), -ECHILD);
1284 *usec = s->time.accuracy;
1288 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1289 assert_return(s, -EINVAL);
1290 assert_return(usec != (uint64_t) -1, -EINVAL);
1291 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1292 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1293 assert_return(!event_pid_changed(s->event), -ECHILD);
1296 usec = DEFAULT_ACCURACY_USEC;
1298 s->time.accuracy = usec;
1300 source_set_pending(s, false);
1302 if (s->type == SOURCE_REALTIME)
1303 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1305 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1310 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1311 assert_return(s, -EINVAL);
1312 assert_return(pid, -EINVAL);
1313 assert_return(s->type == SOURCE_CHILD, -EDOM);
1314 assert_return(!event_pid_changed(s->event), -ECHILD);
1316 *pid = s->child.pid;
1320 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1323 assert_return(s, -EINVAL);
1324 assert_return(s->type != SOURCE_QUIT, -EDOM);
1325 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1326 assert_return(!event_pid_changed(s->event), -ECHILD);
1328 if (s->prepare == callback)
1331 if (callback && s->prepare) {
1332 s->prepare = callback;
1336 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1340 s->prepare = callback;
1343 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1347 prioq_remove(s->event->prepare, s, &s->prepare_index);
1352 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1353 assert_return(s, NULL);
1358 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1370 Find a good time to wake up again between times a and b. We
1371 have two goals here:
1373 a) We want to wake up as seldom as possible, hence prefer
1374 later times over earlier times.
1376 b) But if we have to wake up, then let's make sure to
1377 dispatch as much as possible on the entire system.
1379 We implement this by waking up everywhere at the same time
1380 within any given minute if we can, synchronised via the
1381 perturbation value determined from the boot ID. If we can't,
1382 then we try to find the same spot in every 10s, then 1s and
1383 then 250ms step. Otherwise, we pick the last possible time
1387 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1389 if (_unlikely_(c < USEC_PER_MINUTE))
1392 c -= USEC_PER_MINUTE;
1398 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1400 if (_unlikely_(c < USEC_PER_SEC*10))
1403 c -= USEC_PER_SEC*10;
1409 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1411 if (_unlikely_(c < USEC_PER_SEC))
1420 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1422 if (_unlikely_(c < USEC_PER_MSEC*250))
1425 c -= USEC_PER_MSEC*250;
1434 static int event_arm_timer(
1441 struct itimerspec its = {};
1442 sd_event_source *a, *b;
1449 a = prioq_peek(earliest);
1450 if (!a || a->enabled == SD_EVENT_OFF) {
1455 if (*next == (usec_t) -1)
1459 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1463 *next = (usec_t) -1;
1468 b = prioq_peek(latest);
1469 assert_se(b && b->enabled != SD_EVENT_OFF);
1471 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1475 assert_se(timer_fd >= 0);
1478 /* We don' want to disarm here, just mean some time looooong ago. */
1479 its.it_value.tv_sec = 0;
1480 its.it_value.tv_nsec = 1;
1482 timespec_store(&its.it_value, t);
1484 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1492 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1495 assert(s->type == SOURCE_IO);
1497 s->io.revents = events;
1499 return source_set_pending(s, true);
1502 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1509 assert_return(events == EPOLLIN, -EIO);
1511 ss = read(fd, &x, sizeof(x));
1513 if (errno == EAGAIN || errno == EINTR)
1519 if (ss != sizeof(x))
1523 *next = (usec_t) -1;
1528 static int process_timer(
1540 s = prioq_peek(earliest);
1543 s->enabled == SD_EVENT_OFF ||
1547 r = source_set_pending(s, true);
1551 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1552 prioq_reshuffle(latest, s, &s->time.latest_index);
1558 static int process_child(sd_event *e) {
1565 e->need_process_child = false;
1568 So, this is ugly. We iteratively invoke waitid() with P_PID
1569 + WNOHANG for each PID we wait for, instead of using
1570 P_ALL. This is because we only want to get child
1571 information of very specific child processes, and not all
1572 of them. We might not have processed the SIGCHLD even of a
1573 previous invocation and we don't want to maintain a
1574 unbounded *per-child* event queue, hence we really don't
1575 want anything flushed out of the kernel's queue that we
1576 don't care about. Since this is O(n) this means that if you
1577 have a lot of processes you probably want to handle SIGCHLD
1580 We do not reap the children here (by using WNOWAIT), this
1581 is only done after the event source is dispatched so that
1582 the callback still sees the process as a zombie.
1585 HASHMAP_FOREACH(s, e->child_sources, i) {
1586 assert(s->type == SOURCE_CHILD);
1591 if (s->enabled == SD_EVENT_OFF)
1594 zero(s->child.siginfo);
1595 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1596 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1600 if (s->child.siginfo.si_pid != 0) {
1602 s->child.siginfo.si_code == CLD_EXITED ||
1603 s->child.siginfo.si_code == CLD_KILLED ||
1604 s->child.siginfo.si_code == CLD_DUMPED;
1606 if (!zombie && (s->child.options & WEXITED)) {
1607 /* If the child isn't dead then let's
1608 * immediately remove the state change
1609 * from the queue, since there's no
1610 * benefit in leaving it queued */
1612 assert(s->child.options & (WSTOPPED|WCONTINUED));
1613 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1616 r = source_set_pending(s, true);
1625 static int process_signal(sd_event *e, uint32_t events) {
1626 bool read_one = false;
1630 assert(e->signal_sources);
1632 assert_return(events == EPOLLIN, -EIO);
1635 struct signalfd_siginfo si;
1639 ss = read(e->signal_fd, &si, sizeof(si));
1641 if (errno == EAGAIN || errno == EINTR)
1647 if (ss != sizeof(si))
1652 s = e->signal_sources[si.ssi_signo];
1653 if (si.ssi_signo == SIGCHLD) {
1654 r = process_child(e);
1663 s->signal.siginfo = si;
1664 r = source_set_pending(s, true);
1672 static int source_dispatch(sd_event_source *s) {
1676 assert(s->pending || s->type == SOURCE_QUIT);
1678 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1679 r = source_set_pending(s, false);
1684 if (s->enabled == SD_EVENT_ONESHOT) {
1685 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1690 sd_event_source_ref(s);
1695 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1698 case SOURCE_MONOTONIC:
1699 r = s->time.callback(s, s->time.next, s->userdata);
1702 case SOURCE_REALTIME:
1703 r = s->time.callback(s, s->time.next, s->userdata);
1707 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1710 case SOURCE_CHILD: {
1713 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1714 s->child.siginfo.si_code == CLD_KILLED ||
1715 s->child.siginfo.si_code == CLD_DUMPED;
1717 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1719 /* Now, reap the PID for good. */
1721 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1727 r = s->defer.callback(s, s->userdata);
1731 r = s->quit.callback(s, s->userdata);
1735 sd_event_source_unref(s);
1740 static int event_prepare(sd_event *e) {
1748 s = prioq_peek(e->prepare);
1749 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1752 s->prepare_iteration = e->iteration;
1753 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1758 r = s->prepare(s, s->userdata);
1767 static int dispatch_quit(sd_event *e) {
1773 p = prioq_peek(e->quit);
1774 if (!p || p->enabled == SD_EVENT_OFF) {
1775 e->state = SD_EVENT_FINISHED;
1781 e->state = SD_EVENT_QUITTING;
1783 r = source_dispatch(p);
1785 e->state = SD_EVENT_PASSIVE;
1791 static sd_event_source* event_next_pending(sd_event *e) {
1796 p = prioq_peek(e->pending);
1800 if (p->enabled == SD_EVENT_OFF)
1806 static int arm_watchdog(sd_event *e) {
1807 struct itimerspec its = {};
1812 assert(e->watchdog_fd >= 0);
1814 t = sleep_between(e,
1815 e->watchdog_last + (e->watchdog_period / 2),
1816 e->watchdog_last + (e->watchdog_period * 3 / 4));
1818 timespec_store(&its.it_value, t);
1820 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1827 static int process_watchdog(sd_event *e) {
1833 /* Don't notify watchdog too often */
1834 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1837 sd_notify(false, "WATCHDOG=1");
1838 e->watchdog_last = e->timestamp.monotonic;
1840 return arm_watchdog(e);
1843 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1844 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1848 assert_return(e, -EINVAL);
1849 assert_return(!event_pid_changed(e), -ECHILD);
1850 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1851 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1853 if (e->quit_requested)
1854 return dispatch_quit(e);
1858 e->state = SD_EVENT_RUNNING;
1860 r = event_prepare(e);
1864 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1868 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1872 if (event_next_pending(e) || e->need_process_child)
1875 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1876 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1878 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1882 dual_timestamp_get(&e->timestamp);
1884 for (i = 0; i < m; i++) {
1886 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1887 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1888 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1889 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1890 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1891 r = process_signal(e, ev_queue[i].events);
1892 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
1893 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
1895 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1901 r = process_watchdog(e);
1905 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1909 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1913 if (e->need_process_child) {
1914 r = process_child(e);
1919 p = event_next_pending(e);
1925 r = source_dispatch(p);
1928 e->state = SD_EVENT_PASSIVE;
1934 _public_ int sd_event_loop(sd_event *e) {
1937 assert_return(e, -EINVAL);
1938 assert_return(!event_pid_changed(e), -ECHILD);
1939 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1943 while (e->state != SD_EVENT_FINISHED) {
1944 r = sd_event_run(e, (uint64_t) -1);
1956 _public_ int sd_event_get_state(sd_event *e) {
1957 assert_return(e, -EINVAL);
1958 assert_return(!event_pid_changed(e), -ECHILD);
1963 _public_ int sd_event_get_quit(sd_event *e) {
1964 assert_return(e, -EINVAL);
1965 assert_return(!event_pid_changed(e), -ECHILD);
1967 return e->quit_requested;
1970 _public_ int sd_event_request_quit(sd_event *e) {
1971 assert_return(e, -EINVAL);
1972 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1973 assert_return(!event_pid_changed(e), -ECHILD);
1975 e->quit_requested = true;
1979 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1980 assert_return(e, -EINVAL);
1981 assert_return(usec, -EINVAL);
1982 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1983 assert_return(!event_pid_changed(e), -ECHILD);
1985 *usec = e->timestamp.realtime;
1989 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1990 assert_return(e, -EINVAL);
1991 assert_return(usec, -EINVAL);
1992 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1993 assert_return(!event_pid_changed(e), -ECHILD);
1995 *usec = e->timestamp.monotonic;
1999 _public_ int sd_event_default(sd_event **ret) {
2001 static __thread sd_event *default_event = NULL;
2006 return !!default_event;
2008 if (default_event) {
2009 *ret = sd_event_ref(default_event);
2013 r = sd_event_new(&e);
2017 e->default_event_ptr = &default_event;
2025 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2026 assert_return(e, -EINVAL);
2027 assert_return(tid, -EINVAL);
2028 assert_return(!event_pid_changed(e), -ECHILD);
2038 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2041 assert_return(e, -EINVAL);
2043 if (e->watchdog == !!b)
2047 struct epoll_event ev = {};
2050 env = getenv("WATCHDOG_USEC");
2054 r = safe_atou64(env, &e->watchdog_period);
2057 if (e->watchdog_period <= 0)
2060 /* Issue first ping immediately */
2061 sd_notify(false, "WATCHDOG=1");
2062 e->watchdog_last = now(CLOCK_MONOTONIC);
2064 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2065 if (e->watchdog_fd < 0)
2068 r = arm_watchdog(e);
2072 ev.events = EPOLLIN;
2073 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2075 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2082 if (e->watchdog_fd >= 0) {
2083 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2084 close_nointr_nofail(e->watchdog_fd);
2085 e->watchdog_fd = -1;
2093 close_nointr_nofail(e->watchdog_fd);
2094 e->watchdog_fd = -1;