1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
27 #include "sd-daemon.h"
32 #include "time-util.h"
37 #define EPOLL_QUEUE_MAX 64
38 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
40 typedef enum EventSourceType {
51 struct sd_event_source {
56 sd_event_handler_t prepare;
58 EventSourceType type:4;
63 unsigned pending_index;
64 unsigned prepare_index;
65 unsigned pending_iteration;
66 unsigned prepare_iteration;
70 sd_event_io_handler_t callback;
77 sd_event_time_handler_t callback;
78 usec_t next, accuracy;
79 unsigned earliest_index;
80 unsigned latest_index;
83 sd_event_signal_handler_t callback;
84 struct signalfd_siginfo siginfo;
88 sd_event_child_handler_t callback;
94 sd_event_handler_t callback;
97 sd_event_handler_t callback;
115 /* For both clocks we maintain two priority queues each, one
116 * ordered for the earliest times the events may be
117 * dispatched, and one ordered by the latest times they must
118 * have been dispatched. The range between the top entries in
119 * the two prioqs is the time window we can freely schedule
121 Prioq *monotonic_earliest;
122 Prioq *monotonic_latest;
123 Prioq *realtime_earliest;
124 Prioq *realtime_latest;
126 usec_t realtime_next, monotonic_next;
130 sd_event_source **signal_sources;
132 Hashmap *child_sources;
133 unsigned n_enabled_child_sources;
140 dual_timestamp timestamp;
143 bool quit_requested:1;
144 bool need_process_child:1;
148 sd_event **default_event_ptr;
150 usec_t watchdog_last, watchdog_period;
153 static int pending_prioq_compare(const void *a, const void *b) {
154 const sd_event_source *x = a, *y = b;
159 /* Enabled ones first */
160 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
162 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
165 /* Lower priority values first */
166 if (x->priority < y->priority)
168 if (x->priority > y->priority)
171 /* Older entries first */
172 if (x->pending_iteration < y->pending_iteration)
174 if (x->pending_iteration > y->pending_iteration)
177 /* Stability for the rest */
186 static int prepare_prioq_compare(const void *a, const void *b) {
187 const sd_event_source *x = a, *y = b;
192 /* Move most recently prepared ones last, so that we can stop
193 * preparing as soon as we hit one that has already been
194 * prepared in the current iteration */
195 if (x->prepare_iteration < y->prepare_iteration)
197 if (x->prepare_iteration > y->prepare_iteration)
200 /* Enabled ones first */
201 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206 /* Lower priority values first */
207 if (x->priority < y->priority)
209 if (x->priority > y->priority)
212 /* Stability for the rest */
221 static int earliest_time_prioq_compare(const void *a, const void *b) {
222 const sd_event_source *x = a, *y = b;
224 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
225 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
227 /* Enabled ones first */
228 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
230 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
233 /* Move the pending ones to the end */
234 if (!x->pending && y->pending)
236 if (x->pending && !y->pending)
240 if (x->time.next < y->time.next)
242 if (x->time.next > y->time.next)
245 /* Stability for the rest */
254 static int latest_time_prioq_compare(const void *a, const void *b) {
255 const sd_event_source *x = a, *y = b;
257 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
258 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
260 /* Enabled ones first */
261 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
263 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
266 /* Move the pending ones to the end */
267 if (!x->pending && y->pending)
269 if (x->pending && !y->pending)
273 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
275 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
278 /* Stability for the rest */
287 static int quit_prioq_compare(const void *a, const void *b) {
288 const sd_event_source *x = a, *y = b;
290 assert(x->type == SOURCE_QUIT);
291 assert(y->type == SOURCE_QUIT);
293 /* Enabled ones first */
294 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
296 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
299 /* Lower priority values first */
300 if (x->priority < y->priority)
302 if (x->priority > y->priority)
305 /* Stability for the rest */
314 static void event_free(sd_event *e) {
317 if (e->default_event_ptr)
318 *(e->default_event_ptr) = NULL;
320 if (e->epoll_fd >= 0)
321 close_nointr_nofail(e->epoll_fd);
323 if (e->signal_fd >= 0)
324 close_nointr_nofail(e->signal_fd);
326 if (e->realtime_fd >= 0)
327 close_nointr_nofail(e->realtime_fd);
329 if (e->monotonic_fd >= 0)
330 close_nointr_nofail(e->monotonic_fd);
332 if (e->watchdog_fd >= 0)
333 close_nointr_nofail(e->watchdog_fd);
335 prioq_free(e->pending);
336 prioq_free(e->prepare);
337 prioq_free(e->monotonic_earliest);
338 prioq_free(e->monotonic_latest);
339 prioq_free(e->realtime_earliest);
340 prioq_free(e->realtime_latest);
343 free(e->signal_sources);
345 hashmap_free(e->child_sources);
349 _public_ int sd_event_new(sd_event** ret) {
353 assert_return(ret, -EINVAL);
355 e = new0(sd_event, 1);
360 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
361 e->realtime_next = e->monotonic_next = (usec_t) -1;
362 e->original_pid = getpid();
364 assert_se(sigemptyset(&e->sigset) == 0);
366 e->pending = prioq_new(pending_prioq_compare);
372 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
373 if (e->epoll_fd < 0) {
386 _public_ sd_event* sd_event_ref(sd_event *e) {
387 assert_return(e, NULL);
389 assert(e->n_ref >= 1);
395 _public_ sd_event* sd_event_unref(sd_event *e) {
400 assert(e->n_ref >= 1);
409 static bool event_pid_changed(sd_event *e) {
412 /* We don't support people creating am event loop and keeping
413 * it around over a fork(). Let's complain. */
415 return e->original_pid != getpid();
418 static int source_io_unregister(sd_event_source *s) {
422 assert(s->type == SOURCE_IO);
424 if (!s->io.registered)
427 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
431 s->io.registered = false;
435 static int source_io_register(
440 struct epoll_event ev = {};
444 assert(s->type == SOURCE_IO);
445 assert(enabled != SD_EVENT_OFF);
450 if (enabled == SD_EVENT_ONESHOT)
451 ev.events |= EPOLLONESHOT;
453 if (s->io.registered)
454 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
456 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
461 s->io.registered = true;
466 static void source_free(sd_event_source *s) {
474 source_io_unregister(s);
478 case SOURCE_MONOTONIC:
479 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
480 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
483 case SOURCE_REALTIME:
484 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
485 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
489 if (s->signal.sig > 0) {
490 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
491 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
493 if (s->event->signal_sources)
494 s->event->signal_sources[s->signal.sig] = NULL;
500 if (s->child.pid > 0) {
501 if (s->enabled != SD_EVENT_OFF) {
502 assert(s->event->n_enabled_child_sources > 0);
503 s->event->n_enabled_child_sources--;
506 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
507 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
509 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
519 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
524 prioq_remove(s->event->pending, s, &s->pending_index);
527 prioq_remove(s->event->prepare, s, &s->prepare_index);
529 sd_event_unref(s->event);
535 static int source_set_pending(sd_event_source *s, bool b) {
539 assert(s->type != SOURCE_QUIT);
547 s->pending_iteration = s->event->iteration;
549 r = prioq_put(s->event->pending, s, &s->pending_index);
555 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
557 if (s->type == SOURCE_REALTIME) {
558 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
559 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
560 } else if (s->type == SOURCE_MONOTONIC) {
561 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
562 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
568 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
573 s = new0(sd_event_source, 1);
578 s->event = sd_event_ref(e);
580 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
585 _public_ int sd_event_add_io(
589 sd_event_io_handler_t callback,
591 sd_event_source **ret) {
596 assert_return(e, -EINVAL);
597 assert_return(fd >= 0, -EINVAL);
598 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
599 assert_return(callback, -EINVAL);
600 assert_return(ret, -EINVAL);
601 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
602 assert_return(!event_pid_changed(e), -ECHILD);
604 s = source_new(e, SOURCE_IO);
609 s->io.events = events;
610 s->io.callback = callback;
611 s->userdata = userdata;
612 s->enabled = SD_EVENT_ON;
614 r = source_io_register(s, s->enabled, events);
624 static int event_setup_timer_fd(
626 EventSourceType type,
630 struct epoll_event ev = {};
637 if (_likely_(*timer_fd >= 0))
640 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
645 ev.data.ptr = INT_TO_PTR(type);
647 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
649 close_nointr_nofail(fd);
653 /* When we sleep for longer, we try to realign the wakeup to
654 the same time wihtin each minute/second/250ms, so that
655 events all across the system can be coalesced into a single
656 CPU wakeup. However, let's take some system-specific
657 randomness for this value, so that in a network of systems
658 with synced clocks timer events are distributed a
659 bit. Here, we calculate a perturbation usec offset from the
662 if (sd_id128_get_boot(&bootid) >= 0)
663 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
669 static int event_add_time_internal(
671 EventSourceType type,
678 sd_event_time_handler_t callback,
680 sd_event_source **ret) {
685 assert_return(e, -EINVAL);
686 assert_return(callback, -EINVAL);
687 assert_return(ret, -EINVAL);
688 assert_return(usec != (uint64_t) -1, -EINVAL);
689 assert_return(accuracy != (uint64_t) -1, -EINVAL);
690 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
691 assert_return(!event_pid_changed(e), -ECHILD);
698 *earliest = prioq_new(earliest_time_prioq_compare);
704 *latest = prioq_new(latest_time_prioq_compare);
710 r = event_setup_timer_fd(e, type, timer_fd, id);
715 s = source_new(e, type);
720 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
721 s->time.callback = callback;
722 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
723 s->userdata = userdata;
724 s->enabled = SD_EVENT_ONESHOT;
726 r = prioq_put(*earliest, s, &s->time.earliest_index);
730 r = prioq_put(*latest, s, &s->time.latest_index);
742 _public_ int sd_event_add_monotonic(sd_event *e,
745 sd_event_time_handler_t callback,
747 sd_event_source **ret) {
749 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
752 _public_ int sd_event_add_realtime(sd_event *e,
755 sd_event_time_handler_t callback,
757 sd_event_source **ret) {
759 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
762 static int event_update_signal_fd(sd_event *e) {
763 struct epoll_event ev = {};
769 add_to_epoll = e->signal_fd < 0;
771 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
781 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
783 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
785 close_nointr_nofail(e->signal_fd);
794 _public_ int sd_event_add_signal(
797 sd_event_signal_handler_t callback,
799 sd_event_source **ret) {
804 assert_return(e, -EINVAL);
805 assert_return(sig > 0, -EINVAL);
806 assert_return(sig < _NSIG, -EINVAL);
807 assert_return(callback, -EINVAL);
808 assert_return(ret, -EINVAL);
809 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
810 assert_return(!event_pid_changed(e), -ECHILD);
812 if (!e->signal_sources) {
813 e->signal_sources = new0(sd_event_source*, _NSIG);
814 if (!e->signal_sources)
816 } else if (e->signal_sources[sig])
819 s = source_new(e, SOURCE_SIGNAL);
824 s->signal.callback = callback;
825 s->userdata = userdata;
826 s->enabled = SD_EVENT_ON;
828 e->signal_sources[sig] = s;
829 assert_se(sigaddset(&e->sigset, sig) == 0);
831 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
832 r = event_update_signal_fd(e);
843 _public_ int sd_event_add_child(
847 sd_event_child_handler_t callback,
849 sd_event_source **ret) {
854 assert_return(e, -EINVAL);
855 assert_return(pid > 1, -EINVAL);
856 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
857 assert_return(options != 0, -EINVAL);
858 assert_return(callback, -EINVAL);
859 assert_return(ret, -EINVAL);
860 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
861 assert_return(!event_pid_changed(e), -ECHILD);
863 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
867 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
870 s = source_new(e, SOURCE_CHILD);
875 s->child.options = options;
876 s->child.callback = callback;
877 s->userdata = userdata;
878 s->enabled = SD_EVENT_ONESHOT;
880 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
886 e->n_enabled_child_sources ++;
888 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
890 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
891 r = event_update_signal_fd(e);
898 e->need_process_child = true;
904 _public_ int sd_event_add_defer(
906 sd_event_handler_t callback,
908 sd_event_source **ret) {
913 assert_return(e, -EINVAL);
914 assert_return(callback, -EINVAL);
915 assert_return(ret, -EINVAL);
916 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
917 assert_return(!event_pid_changed(e), -ECHILD);
919 s = source_new(e, SOURCE_DEFER);
923 s->defer.callback = callback;
924 s->userdata = userdata;
925 s->enabled = SD_EVENT_ONESHOT;
927 r = source_set_pending(s, true);
937 _public_ int sd_event_add_quit(
939 sd_event_handler_t callback,
941 sd_event_source **ret) {
946 assert_return(e, -EINVAL);
947 assert_return(callback, -EINVAL);
948 assert_return(ret, -EINVAL);
949 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
950 assert_return(!event_pid_changed(e), -ECHILD);
953 e->quit = prioq_new(quit_prioq_compare);
958 s = source_new(e, SOURCE_QUIT);
962 s->quit.callback = callback;
963 s->userdata = userdata;
964 s->quit.prioq_index = PRIOQ_IDX_NULL;
965 s->enabled = SD_EVENT_ONESHOT;
967 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
977 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
978 assert_return(s, NULL);
980 assert(s->n_ref >= 1);
986 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
991 assert(s->n_ref >= 1);
1000 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1001 assert_return(s, NULL);
1006 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1007 assert_return(s, -EINVAL);
1008 assert_return(s->type != SOURCE_QUIT, -EDOM);
1009 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1010 assert_return(!event_pid_changed(s->event), -ECHILD);
1015 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1016 assert_return(s, -EINVAL);
1017 assert_return(s->type == SOURCE_IO, -EDOM);
1018 assert_return(!event_pid_changed(s->event), -ECHILD);
1023 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1024 assert_return(s, -EINVAL);
1025 assert_return(events, -EINVAL);
1026 assert_return(s->type == SOURCE_IO, -EDOM);
1027 assert_return(!event_pid_changed(s->event), -ECHILD);
1029 *events = s->io.events;
1033 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1036 assert_return(s, -EINVAL);
1037 assert_return(s->type == SOURCE_IO, -EDOM);
1038 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1039 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1040 assert_return(!event_pid_changed(s->event), -ECHILD);
1042 if (s->io.events == events)
1045 if (s->enabled != SD_EVENT_OFF) {
1046 r = source_io_register(s, s->enabled, events);
1051 s->io.events = events;
1052 source_set_pending(s, false);
1057 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1058 assert_return(s, -EINVAL);
1059 assert_return(revents, -EINVAL);
1060 assert_return(s->type == SOURCE_IO, -EDOM);
1061 assert_return(s->pending, -ENODATA);
1062 assert_return(!event_pid_changed(s->event), -ECHILD);
1064 *revents = s->io.revents;
1068 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1069 assert_return(s, -EINVAL);
1070 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1071 assert_return(!event_pid_changed(s->event), -ECHILD);
1073 return s->signal.sig;
1076 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1077 assert_return(s, -EINVAL);
1078 assert_return(!event_pid_changed(s->event), -ECHILD);
1083 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1084 assert_return(s, -EINVAL);
1085 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1086 assert_return(!event_pid_changed(s->event), -ECHILD);
1088 if (s->priority == priority)
1091 s->priority = priority;
1094 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1097 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1099 if (s->type == SOURCE_QUIT)
1100 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1105 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1106 assert_return(s, -EINVAL);
1107 assert_return(m, -EINVAL);
1108 assert_return(!event_pid_changed(s->event), -ECHILD);
1114 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1117 assert_return(s, -EINVAL);
1118 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1119 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1120 assert_return(!event_pid_changed(s->event), -ECHILD);
1122 if (s->enabled == m)
1125 if (m == SD_EVENT_OFF) {
1130 r = source_io_unregister(s);
1137 case SOURCE_MONOTONIC:
1139 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1140 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1143 case SOURCE_REALTIME:
1145 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1146 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1151 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1152 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1153 event_update_signal_fd(s->event);
1161 assert(s->event->n_enabled_child_sources > 0);
1162 s->event->n_enabled_child_sources--;
1164 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1165 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1166 event_update_signal_fd(s->event);
1173 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1185 r = source_io_register(s, m, s->io.events);
1192 case SOURCE_MONOTONIC:
1194 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1195 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1198 case SOURCE_REALTIME:
1200 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1201 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1207 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1208 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1209 event_update_signal_fd(s->event);
1216 if (s->enabled == SD_EVENT_OFF) {
1217 s->event->n_enabled_child_sources++;
1219 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1220 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1221 event_update_signal_fd(s->event);
1228 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1238 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1241 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1246 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1247 assert_return(s, -EINVAL);
1248 assert_return(usec, -EINVAL);
1249 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1250 assert_return(!event_pid_changed(s->event), -ECHILD);
1252 *usec = s->time.next;
1256 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1257 assert_return(s, -EINVAL);
1258 assert_return(usec != (uint64_t) -1, -EINVAL);
1259 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1260 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1261 assert_return(!event_pid_changed(s->event), -ECHILD);
1263 s->time.next = usec;
1265 source_set_pending(s, false);
1267 if (s->type == SOURCE_REALTIME) {
1268 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1269 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1271 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1272 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1278 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1279 assert_return(s, -EINVAL);
1280 assert_return(usec, -EINVAL);
1281 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1282 assert_return(!event_pid_changed(s->event), -ECHILD);
1284 *usec = s->time.accuracy;
1288 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1289 assert_return(s, -EINVAL);
1290 assert_return(usec != (uint64_t) -1, -EINVAL);
1291 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1292 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1293 assert_return(!event_pid_changed(s->event), -ECHILD);
1296 usec = DEFAULT_ACCURACY_USEC;
1298 s->time.accuracy = usec;
1300 source_set_pending(s, false);
1302 if (s->type == SOURCE_REALTIME)
1303 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1305 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1310 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1311 assert_return(s, -EINVAL);
1312 assert_return(pid, -EINVAL);
1313 assert_return(s->type == SOURCE_CHILD, -EDOM);
1314 assert_return(!event_pid_changed(s->event), -ECHILD);
1316 *pid = s->child.pid;
1320 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1323 assert_return(s, -EINVAL);
1324 assert_return(s->type != SOURCE_QUIT, -EDOM);
1325 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1326 assert_return(!event_pid_changed(s->event), -ECHILD);
1328 if (s->prepare == callback)
1331 if (callback && s->prepare) {
1332 s->prepare = callback;
1336 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1340 s->prepare = callback;
1343 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1347 prioq_remove(s->event->prepare, s, &s->prepare_index);
1352 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1353 assert_return(s, NULL);
1358 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1370 Find a good time to wake up again between times a and b. We
1371 have two goals here:
1373 a) We want to wake up as seldom as possible, hence prefer
1374 later times over earlier times.
1376 b) But if we have to wake up, then let's make sure to
1377 dispatch as much as possible on the entire system.
1379 We implement this by waking up everywhere at the same time
1380 within any given minute if we can, synchronised via the
1381 perturbation value determined from the boot ID. If we can't,
1382 then we try to find the same spot in every 1s and then 250ms
1383 step. Otherwise, we pick the last possible time to wake up.
1386 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1388 if (_unlikely_(c < USEC_PER_MINUTE))
1391 c -= USEC_PER_MINUTE;
1397 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1399 if (_unlikely_(c < USEC_PER_SEC))
1408 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1410 if (_unlikely_(c < USEC_PER_MSEC*250))
1413 c -= USEC_PER_MSEC*250;
1422 static int event_arm_timer(
1429 struct itimerspec its = {};
1430 sd_event_source *a, *b;
1437 a = prioq_peek(earliest);
1438 if (!a || a->enabled == SD_EVENT_OFF) {
1443 if (*next == (usec_t) -1)
1447 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1451 *next = (usec_t) -1;
1456 b = prioq_peek(latest);
1457 assert_se(b && b->enabled != SD_EVENT_OFF);
1459 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1463 assert_se(timer_fd >= 0);
1466 /* We don' want to disarm here, just mean some time looooong ago. */
1467 its.it_value.tv_sec = 0;
1468 its.it_value.tv_nsec = 1;
1470 timespec_store(&its.it_value, t);
1472 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1480 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1483 assert(s->type == SOURCE_IO);
1485 s->io.revents = events;
1487 return source_set_pending(s, true);
1490 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1497 assert_return(events == EPOLLIN, -EIO);
1499 ss = read(fd, &x, sizeof(x));
1501 if (errno == EAGAIN || errno == EINTR)
1507 if (ss != sizeof(x))
1511 *next = (usec_t) -1;
1516 static int process_timer(
1528 s = prioq_peek(earliest);
1531 s->enabled == SD_EVENT_OFF ||
1535 r = source_set_pending(s, true);
1539 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1540 prioq_reshuffle(latest, s, &s->time.latest_index);
1546 static int process_child(sd_event *e) {
1553 e->need_process_child = false;
1556 So, this is ugly. We iteratively invoke waitid() with P_PID
1557 + WNOHANG for each PID we wait for, instead of using
1558 P_ALL. This is because we only want to get child
1559 information of very specific child processes, and not all
1560 of them. We might not have processed the SIGCHLD even of a
1561 previous invocation and we don't want to maintain a
1562 unbounded *per-child* event queue, hence we really don't
1563 want anything flushed out of the kernel's queue that we
1564 don't care about. Since this is O(n) this means that if you
1565 have a lot of processes you probably want to handle SIGCHLD
1568 We do not reap the children here (by using WNOWAIT), this
1569 is only done after the event source is dispatched so that
1570 the callback still sees the process as a zombie.
1573 HASHMAP_FOREACH(s, e->child_sources, i) {
1574 assert(s->type == SOURCE_CHILD);
1579 if (s->enabled == SD_EVENT_OFF)
1582 zero(s->child.siginfo);
1583 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
1584 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
1588 if (s->child.siginfo.si_pid != 0) {
1590 s->child.siginfo.si_code == CLD_EXITED ||
1591 s->child.siginfo.si_code == CLD_KILLED ||
1592 s->child.siginfo.si_code == CLD_DUMPED;
1594 if (!zombie && (s->child.options & WEXITED)) {
1595 /* If the child isn't dead then let's
1596 * immediately remove the state change
1597 * from the queue, since there's no
1598 * benefit in leaving it queued */
1600 assert(s->child.options & (WSTOPPED|WCONTINUED));
1601 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
1604 r = source_set_pending(s, true);
1613 static int process_signal(sd_event *e, uint32_t events) {
1614 bool read_one = false;
1618 assert(e->signal_sources);
1620 assert_return(events == EPOLLIN, -EIO);
1623 struct signalfd_siginfo si;
1627 ss = read(e->signal_fd, &si, sizeof(si));
1629 if (errno == EAGAIN || errno == EINTR)
1635 if (ss != sizeof(si))
1640 s = e->signal_sources[si.ssi_signo];
1641 if (si.ssi_signo == SIGCHLD) {
1642 r = process_child(e);
1651 s->signal.siginfo = si;
1652 r = source_set_pending(s, true);
1660 static int source_dispatch(sd_event_source *s) {
1664 assert(s->pending || s->type == SOURCE_QUIT);
1666 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1667 r = source_set_pending(s, false);
1672 if (s->enabled == SD_EVENT_ONESHOT) {
1673 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1678 sd_event_source_ref(s);
1683 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1686 case SOURCE_MONOTONIC:
1687 r = s->time.callback(s, s->time.next, s->userdata);
1690 case SOURCE_REALTIME:
1691 r = s->time.callback(s, s->time.next, s->userdata);
1695 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1698 case SOURCE_CHILD: {
1701 zombie = s->child.siginfo.si_code == CLD_EXITED ||
1702 s->child.siginfo.si_code == CLD_KILLED ||
1703 s->child.siginfo.si_code == CLD_DUMPED;
1705 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1707 /* Now, reap the PID for good. */
1709 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
1715 r = s->defer.callback(s, s->userdata);
1719 r = s->quit.callback(s, s->userdata);
1723 sd_event_source_unref(s);
1728 static int event_prepare(sd_event *e) {
1736 s = prioq_peek(e->prepare);
1737 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1740 s->prepare_iteration = e->iteration;
1741 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1746 r = s->prepare(s, s->userdata);
1755 static int dispatch_quit(sd_event *e) {
1761 p = prioq_peek(e->quit);
1762 if (!p || p->enabled == SD_EVENT_OFF) {
1763 e->state = SD_EVENT_FINISHED;
1769 e->state = SD_EVENT_QUITTING;
1771 r = source_dispatch(p);
1773 e->state = SD_EVENT_PASSIVE;
1779 static sd_event_source* event_next_pending(sd_event *e) {
1784 p = prioq_peek(e->pending);
1788 if (p->enabled == SD_EVENT_OFF)
1794 static int arm_watchdog(sd_event *e) {
1795 struct itimerspec its = {};
1800 assert(e->watchdog_fd >= 0);
1802 t = sleep_between(e,
1803 e->watchdog_last + (e->watchdog_period / 2),
1804 e->watchdog_last + (e->watchdog_period * 3 / 4));
1806 timespec_store(&its.it_value, t);
1808 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
1815 static int process_watchdog(sd_event *e) {
1821 /* Don't notify watchdog too often */
1822 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
1825 sd_notify(false, "WATCHDOG=1");
1826 e->watchdog_last = e->timestamp.monotonic;
1828 return arm_watchdog(e);
1831 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1832 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1836 assert_return(e, -EINVAL);
1837 assert_return(!event_pid_changed(e), -ECHILD);
1838 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1839 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1841 if (e->quit_requested)
1842 return dispatch_quit(e);
1846 e->state = SD_EVENT_RUNNING;
1848 r = event_prepare(e);
1852 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1856 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1860 if (event_next_pending(e) || e->need_process_child)
1863 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1864 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1866 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1870 dual_timestamp_get(&e->timestamp);
1872 for (i = 0; i < m; i++) {
1874 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1875 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1876 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1877 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1878 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1879 r = process_signal(e, ev_queue[i].events);
1880 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
1881 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
1883 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1889 r = process_watchdog(e);
1893 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1897 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1901 if (e->need_process_child) {
1902 r = process_child(e);
1907 p = event_next_pending(e);
1913 r = source_dispatch(p);
1916 e->state = SD_EVENT_PASSIVE;
1922 _public_ int sd_event_loop(sd_event *e) {
1925 assert_return(e, -EINVAL);
1926 assert_return(!event_pid_changed(e), -ECHILD);
1927 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1931 while (e->state != SD_EVENT_FINISHED) {
1932 r = sd_event_run(e, (uint64_t) -1);
1944 _public_ int sd_event_get_state(sd_event *e) {
1945 assert_return(e, -EINVAL);
1946 assert_return(!event_pid_changed(e), -ECHILD);
1951 _public_ int sd_event_get_quit(sd_event *e) {
1952 assert_return(e, -EINVAL);
1953 assert_return(!event_pid_changed(e), -ECHILD);
1955 return e->quit_requested;
1958 _public_ int sd_event_request_quit(sd_event *e) {
1959 assert_return(e, -EINVAL);
1960 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1961 assert_return(!event_pid_changed(e), -ECHILD);
1963 e->quit_requested = true;
1967 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1968 assert_return(e, -EINVAL);
1969 assert_return(usec, -EINVAL);
1970 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1971 assert_return(!event_pid_changed(e), -ECHILD);
1973 *usec = e->timestamp.realtime;
1977 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1978 assert_return(e, -EINVAL);
1979 assert_return(usec, -EINVAL);
1980 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1981 assert_return(!event_pid_changed(e), -ECHILD);
1983 *usec = e->timestamp.monotonic;
1987 _public_ int sd_event_default(sd_event **ret) {
1989 static __thread sd_event *default_event = NULL;
1994 return !!default_event;
1996 if (default_event) {
1997 *ret = sd_event_ref(default_event);
2001 r = sd_event_new(&e);
2005 e->default_event_ptr = &default_event;
2013 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2014 assert_return(e, -EINVAL);
2015 assert_return(tid, -EINVAL);
2016 assert_return(!event_pid_changed(e), -ECHILD);
2026 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2029 assert_return(e, -EINVAL);
2031 if (e->watchdog == !!b)
2035 struct epoll_event ev = {};
2038 env = getenv("WATCHDOG_USEC");
2042 r = safe_atou64(env, &e->watchdog_period);
2045 if (e->watchdog_period <= 0)
2048 /* Issue first ping immediately */
2049 sd_notify(false, "WATCHDOG=1");
2050 e->watchdog_last = now(CLOCK_MONOTONIC);
2052 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2053 if (e->watchdog_fd < 0)
2056 r = arm_watchdog(e);
2060 ev.events = EPOLLIN;
2061 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2063 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2070 if (e->watchdog_fd >= 0) {
2071 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2072 close_nointr_nofail(e->watchdog_fd);
2073 e->watchdog_fd = -1;
2081 close_nointr_nofail(e->watchdog_fd);
2082 e->watchdog_fd = -1;