1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
31 #include "time-util.h"
36 #define EPOLL_QUEUE_MAX 64
37 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
39 typedef enum EventSourceType {
49 struct sd_event_source {
54 sd_event_handler_t prepare;
56 EventSourceType type:4;
61 unsigned pending_index;
62 unsigned prepare_index;
63 unsigned pending_iteration;
64 unsigned prepare_iteration;
68 sd_event_io_handler_t callback;
75 sd_event_time_handler_t callback;
76 usec_t next, accuracy;
77 unsigned earliest_index;
78 unsigned latest_index;
81 sd_event_signal_handler_t callback;
82 struct signalfd_siginfo siginfo;
86 sd_event_child_handler_t callback;
92 sd_event_handler_t callback;
95 sd_event_handler_t callback;
112 /* For both clocks we maintain two priority queues each, one
113 * ordered for the earliest times the events may be
114 * dispatched, and one ordered by the latest times they must
115 * have been dispatched. The range between the top entries in
116 * the two prioqs is the time window we can freely schedule
118 Prioq *monotonic_earliest;
119 Prioq *monotonic_latest;
120 Prioq *realtime_earliest;
121 Prioq *realtime_latest;
123 usec_t realtime_next, monotonic_next;
127 sd_event_source **signal_sources;
129 Hashmap *child_sources;
130 unsigned n_enabled_child_sources;
137 dual_timestamp timestamp;
140 bool quit_requested:1;
141 bool need_process_child:1;
144 sd_event **default_event_ptr;
147 static int pending_prioq_compare(const void *a, const void *b) {
148 const sd_event_source *x = a, *y = b;
153 /* Enabled ones first */
154 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
156 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
159 /* Lower priority values first */
160 if (x->priority < y->priority)
162 if (x->priority > y->priority)
165 /* Older entries first */
166 if (x->pending_iteration < y->pending_iteration)
168 if (x->pending_iteration > y->pending_iteration)
171 /* Stability for the rest */
180 static int prepare_prioq_compare(const void *a, const void *b) {
181 const sd_event_source *x = a, *y = b;
186 /* Move most recently prepared ones last, so that we can stop
187 * preparing as soon as we hit one that has already been
188 * prepared in the current iteration */
189 if (x->prepare_iteration < y->prepare_iteration)
191 if (x->prepare_iteration > y->prepare_iteration)
194 /* Enabled ones first */
195 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
197 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
200 /* Lower priority values first */
201 if (x->priority < y->priority)
203 if (x->priority > y->priority)
206 /* Stability for the rest */
215 static int earliest_time_prioq_compare(const void *a, const void *b) {
216 const sd_event_source *x = a, *y = b;
218 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
219 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
221 /* Enabled ones first */
222 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
224 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 /* Move the pending ones to the end */
228 if (!x->pending && y->pending)
230 if (x->pending && !y->pending)
234 if (x->time.next < y->time.next)
236 if (x->time.next > y->time.next)
239 /* Stability for the rest */
248 static int latest_time_prioq_compare(const void *a, const void *b) {
249 const sd_event_source *x = a, *y = b;
251 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
252 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
254 /* Enabled ones first */
255 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
257 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260 /* Move the pending ones to the end */
261 if (!x->pending && y->pending)
263 if (x->pending && !y->pending)
267 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
269 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
272 /* Stability for the rest */
281 static int quit_prioq_compare(const void *a, const void *b) {
282 const sd_event_source *x = a, *y = b;
284 assert(x->type == SOURCE_QUIT);
285 assert(y->type == SOURCE_QUIT);
287 /* Enabled ones first */
288 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
290 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293 /* Lower priority values first */
294 if (x->priority < y->priority)
296 if (x->priority > y->priority)
299 /* Stability for the rest */
308 static void event_free(sd_event *e) {
311 if (e->default_event_ptr)
312 *(e->default_event_ptr) = NULL;
314 if (e->epoll_fd >= 0)
315 close_nointr_nofail(e->epoll_fd);
317 if (e->signal_fd >= 0)
318 close_nointr_nofail(e->signal_fd);
320 if (e->realtime_fd >= 0)
321 close_nointr_nofail(e->realtime_fd);
323 if (e->monotonic_fd >= 0)
324 close_nointr_nofail(e->monotonic_fd);
326 prioq_free(e->pending);
327 prioq_free(e->prepare);
328 prioq_free(e->monotonic_earliest);
329 prioq_free(e->monotonic_latest);
330 prioq_free(e->realtime_earliest);
331 prioq_free(e->realtime_latest);
334 free(e->signal_sources);
336 hashmap_free(e->child_sources);
340 _public_ int sd_event_new(sd_event** ret) {
344 assert_return(ret, -EINVAL);
346 e = new0(sd_event, 1);
351 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
352 e->realtime_next = e->monotonic_next = (usec_t) -1;
353 e->original_pid = getpid();
355 assert_se(sigemptyset(&e->sigset) == 0);
357 e->pending = prioq_new(pending_prioq_compare);
363 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
364 if (e->epoll_fd < 0) {
377 _public_ sd_event* sd_event_ref(sd_event *e) {
378 assert_return(e, NULL);
380 assert(e->n_ref >= 1);
386 _public_ sd_event* sd_event_unref(sd_event *e) {
387 assert_return(e, NULL);
389 assert(e->n_ref >= 1);
398 static bool event_pid_changed(sd_event *e) {
401 /* We don't support people creating am event loop and keeping
402 * it around over a fork(). Let's complain. */
404 return e->original_pid != getpid();
407 static int source_io_unregister(sd_event_source *s) {
411 assert(s->type == SOURCE_IO);
413 if (!s->io.registered)
416 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
420 s->io.registered = false;
424 static int source_io_register(
429 struct epoll_event ev = {};
433 assert(s->type == SOURCE_IO);
434 assert(enabled != SD_EVENT_OFF);
439 if (enabled == SD_EVENT_ONESHOT)
440 ev.events |= EPOLLONESHOT;
442 if (s->io.registered)
443 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
445 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
450 s->io.registered = true;
455 static void source_free(sd_event_source *s) {
463 source_io_unregister(s);
467 case SOURCE_MONOTONIC:
468 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
469 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
472 case SOURCE_REALTIME:
473 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
474 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
478 if (s->signal.sig > 0) {
479 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
480 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
482 if (s->event->signal_sources)
483 s->event->signal_sources[s->signal.sig] = NULL;
489 if (s->child.pid > 0) {
490 if (s->enabled != SD_EVENT_OFF) {
491 assert(s->event->n_enabled_child_sources > 0);
492 s->event->n_enabled_child_sources--;
495 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
496 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
498 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
508 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
513 prioq_remove(s->event->pending, s, &s->pending_index);
516 prioq_remove(s->event->prepare, s, &s->prepare_index);
518 sd_event_unref(s->event);
524 static int source_set_pending(sd_event_source *s, bool b) {
528 assert(s->type != SOURCE_QUIT);
536 s->pending_iteration = s->event->iteration;
538 r = prioq_put(s->event->pending, s, &s->pending_index);
544 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
546 if (s->type == SOURCE_REALTIME) {
547 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
548 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
549 } else if (s->type == SOURCE_MONOTONIC) {
550 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
551 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
557 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
562 s = new0(sd_event_source, 1);
567 s->event = sd_event_ref(e);
569 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
574 _public_ int sd_event_add_io(
578 sd_event_io_handler_t callback,
580 sd_event_source **ret) {
585 assert_return(e, -EINVAL);
586 assert_return(fd >= 0, -EINVAL);
587 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
588 assert_return(callback, -EINVAL);
589 assert_return(ret, -EINVAL);
590 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
591 assert_return(!event_pid_changed(e), -ECHILD);
593 s = source_new(e, SOURCE_IO);
598 s->io.events = events;
599 s->io.callback = callback;
600 s->userdata = userdata;
601 s->enabled = SD_EVENT_ON;
603 r = source_io_register(s, s->enabled, events);
613 static int event_setup_timer_fd(
615 EventSourceType type,
619 struct epoll_event ev = {};
626 if (_likely_(*timer_fd >= 0))
629 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
634 ev.data.ptr = INT_TO_PTR(type);
636 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
638 close_nointr_nofail(fd);
642 /* When we sleep for longer, we try to realign the wakeup to
643 the same time wihtin each minute/second/250ms, so that
644 events all across the system can be coalesced into a single
645 CPU wakeup. However, let's take some system-specific
646 randomness for this value, so that in a network of systems
647 with synced clocks timer events are distributed a
648 bit. Here, we calculate a perturbation usec offset from the
651 if (sd_id128_get_boot(&bootid) >= 0)
652 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
658 static int event_add_time_internal(
660 EventSourceType type,
667 sd_event_time_handler_t callback,
669 sd_event_source **ret) {
674 assert_return(e, -EINVAL);
675 assert_return(callback, -EINVAL);
676 assert_return(ret, -EINVAL);
677 assert_return(usec != (uint64_t) -1, -EINVAL);
678 assert_return(accuracy != (uint64_t) -1, -EINVAL);
679 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
680 assert_return(!event_pid_changed(e), -ECHILD);
687 *earliest = prioq_new(earliest_time_prioq_compare);
693 *latest = prioq_new(latest_time_prioq_compare);
699 r = event_setup_timer_fd(e, type, timer_fd, id);
704 s = source_new(e, type);
709 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
710 s->time.callback = callback;
711 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
712 s->userdata = userdata;
713 s->enabled = SD_EVENT_ONESHOT;
715 r = prioq_put(*earliest, s, &s->time.earliest_index);
719 r = prioq_put(*latest, s, &s->time.latest_index);
731 _public_ int sd_event_add_monotonic(sd_event *e,
734 sd_event_time_handler_t callback,
736 sd_event_source **ret) {
738 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
741 _public_ int sd_event_add_realtime(sd_event *e,
744 sd_event_time_handler_t callback,
746 sd_event_source **ret) {
748 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
751 static int event_update_signal_fd(sd_event *e) {
752 struct epoll_event ev = {};
758 add_to_epoll = e->signal_fd < 0;
760 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
770 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
772 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
774 close_nointr_nofail(e->signal_fd);
783 _public_ int sd_event_add_signal(
786 sd_event_signal_handler_t callback,
788 sd_event_source **ret) {
793 assert_return(e, -EINVAL);
794 assert_return(sig > 0, -EINVAL);
795 assert_return(sig < _NSIG, -EINVAL);
796 assert_return(callback, -EINVAL);
797 assert_return(ret, -EINVAL);
798 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
799 assert_return(!event_pid_changed(e), -ECHILD);
801 if (!e->signal_sources) {
802 e->signal_sources = new0(sd_event_source*, _NSIG);
803 if (!e->signal_sources)
805 } else if (e->signal_sources[sig])
808 s = source_new(e, SOURCE_SIGNAL);
813 s->signal.callback = callback;
814 s->userdata = userdata;
815 s->enabled = SD_EVENT_ON;
817 e->signal_sources[sig] = s;
818 assert_se(sigaddset(&e->sigset, sig) == 0);
820 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
821 r = event_update_signal_fd(e);
832 _public_ int sd_event_add_child(
836 sd_event_child_handler_t callback,
838 sd_event_source **ret) {
843 assert_return(e, -EINVAL);
844 assert_return(pid > 1, -EINVAL);
845 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
846 assert_return(options != 0, -EINVAL);
847 assert_return(callback, -EINVAL);
848 assert_return(ret, -EINVAL);
849 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
850 assert_return(!event_pid_changed(e), -ECHILD);
852 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
856 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
859 s = source_new(e, SOURCE_CHILD);
864 s->child.options = options;
865 s->child.callback = callback;
866 s->userdata = userdata;
867 s->enabled = SD_EVENT_ONESHOT;
869 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
875 e->n_enabled_child_sources ++;
877 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
879 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
880 r = event_update_signal_fd(e);
887 e->need_process_child = true;
893 _public_ int sd_event_add_defer(
895 sd_event_handler_t callback,
897 sd_event_source **ret) {
902 assert_return(e, -EINVAL);
903 assert_return(callback, -EINVAL);
904 assert_return(ret, -EINVAL);
905 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
906 assert_return(!event_pid_changed(e), -ECHILD);
908 s = source_new(e, SOURCE_DEFER);
912 s->defer.callback = callback;
913 s->userdata = userdata;
914 s->enabled = SD_EVENT_ONESHOT;
916 r = source_set_pending(s, true);
926 _public_ int sd_event_add_quit(
928 sd_event_handler_t callback,
930 sd_event_source **ret) {
935 assert_return(e, -EINVAL);
936 assert_return(callback, -EINVAL);
937 assert_return(ret, -EINVAL);
938 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
939 assert_return(!event_pid_changed(e), -ECHILD);
942 e->quit = prioq_new(quit_prioq_compare);
947 s = source_new(e, SOURCE_QUIT);
951 s->quit.callback = callback;
952 s->userdata = userdata;
953 s->quit.prioq_index = PRIOQ_IDX_NULL;
954 s->enabled = SD_EVENT_ONESHOT;
956 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
966 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
967 assert_return(s, NULL);
969 assert(s->n_ref >= 1);
975 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
976 assert_return(s, NULL);
978 assert(s->n_ref >= 1);
987 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
988 assert_return(s, NULL);
993 _public_ int sd_event_source_get_pending(sd_event_source *s) {
994 assert_return(s, -EINVAL);
995 assert_return(s->type != SOURCE_QUIT, -EDOM);
996 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
997 assert_return(!event_pid_changed(s->event), -ECHILD);
1002 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1003 assert_return(s, -EINVAL);
1004 assert_return(s->type == SOURCE_IO, -EDOM);
1005 assert_return(!event_pid_changed(s->event), -ECHILD);
1010 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1011 assert_return(s, -EINVAL);
1012 assert_return(events, -EINVAL);
1013 assert_return(s->type == SOURCE_IO, -EDOM);
1014 assert_return(!event_pid_changed(s->event), -ECHILD);
1016 *events = s->io.events;
1020 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1023 assert_return(s, -EINVAL);
1024 assert_return(s->type == SOURCE_IO, -EDOM);
1025 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1026 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1027 assert_return(!event_pid_changed(s->event), -ECHILD);
1029 if (s->io.events == events)
1032 if (s->enabled != SD_EVENT_OFF) {
1033 r = source_io_register(s, s->enabled, events);
1038 s->io.events = events;
1039 source_set_pending(s, false);
1044 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1045 assert_return(s, -EINVAL);
1046 assert_return(revents, -EINVAL);
1047 assert_return(s->type == SOURCE_IO, -EDOM);
1048 assert_return(s->pending, -ENODATA);
1049 assert_return(!event_pid_changed(s->event), -ECHILD);
1051 *revents = s->io.revents;
1055 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1056 assert_return(s, -EINVAL);
1057 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1058 assert_return(!event_pid_changed(s->event), -ECHILD);
1060 return s->signal.sig;
1063 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1064 assert_return(s, -EINVAL);
1065 assert_return(!event_pid_changed(s->event), -ECHILD);
1070 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1071 assert_return(s, -EINVAL);
1072 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1073 assert_return(!event_pid_changed(s->event), -ECHILD);
1075 if (s->priority == priority)
1078 s->priority = priority;
1081 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1084 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1086 if (s->type == SOURCE_QUIT)
1087 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1092 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1093 assert_return(s, -EINVAL);
1094 assert_return(m, -EINVAL);
1095 assert_return(!event_pid_changed(s->event), -ECHILD);
1101 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1104 assert_return(s, -EINVAL);
1105 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1106 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1107 assert_return(!event_pid_changed(s->event), -ECHILD);
1109 if (s->enabled == m)
1112 if (m == SD_EVENT_OFF) {
1117 r = source_io_unregister(s);
1124 case SOURCE_MONOTONIC:
1126 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1127 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1130 case SOURCE_REALTIME:
1132 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1133 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1138 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1139 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1140 event_update_signal_fd(s->event);
1148 assert(s->event->n_enabled_child_sources > 0);
1149 s->event->n_enabled_child_sources--;
1151 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1152 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1153 event_update_signal_fd(s->event);
1160 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1172 r = source_io_register(s, m, s->io.events);
1179 case SOURCE_MONOTONIC:
1181 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1182 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1185 case SOURCE_REALTIME:
1187 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1188 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1194 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1195 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1196 event_update_signal_fd(s->event);
1203 if (s->enabled == SD_EVENT_OFF) {
1204 s->event->n_enabled_child_sources++;
1206 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1207 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1208 event_update_signal_fd(s->event);
1215 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1225 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1228 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1233 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1234 assert_return(s, -EINVAL);
1235 assert_return(usec, -EINVAL);
1236 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1237 assert_return(!event_pid_changed(s->event), -ECHILD);
1239 *usec = s->time.next;
1243 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1244 assert_return(s, -EINVAL);
1245 assert_return(usec != (uint64_t) -1, -EINVAL);
1246 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1247 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1248 assert_return(!event_pid_changed(s->event), -ECHILD);
1250 s->time.next = usec;
1252 source_set_pending(s, false);
1254 if (s->type == SOURCE_REALTIME) {
1255 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1256 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1258 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1259 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1265 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1266 assert_return(s, -EINVAL);
1267 assert_return(usec, -EINVAL);
1268 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1269 assert_return(!event_pid_changed(s->event), -ECHILD);
1271 *usec = s->time.accuracy;
1275 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1276 assert_return(s, -EINVAL);
1277 assert_return(usec != (uint64_t) -1, -EINVAL);
1278 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1279 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1280 assert_return(!event_pid_changed(s->event), -ECHILD);
1283 usec = DEFAULT_ACCURACY_USEC;
1285 s->time.accuracy = usec;
1287 source_set_pending(s, false);
1289 if (s->type == SOURCE_REALTIME)
1290 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1292 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1297 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1298 assert_return(s, -EINVAL);
1299 assert_return(pid, -EINVAL);
1300 assert_return(s->type == SOURCE_CHILD, -EDOM);
1301 assert_return(!event_pid_changed(s->event), -ECHILD);
1303 *pid = s->child.pid;
1307 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1310 assert_return(s, -EINVAL);
1311 assert_return(s->type != SOURCE_QUIT, -EDOM);
1312 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1313 assert_return(!event_pid_changed(s->event), -ECHILD);
1315 if (s->prepare == callback)
1318 if (callback && s->prepare) {
1319 s->prepare = callback;
1323 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1327 s->prepare = callback;
1330 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1334 prioq_remove(s->event->prepare, s, &s->prepare_index);
1339 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1340 assert_return(s, NULL);
1345 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1357 Find a good time to wake up again between times a and b. We
1358 have two goals here:
1360 a) We want to wake up as seldom as possible, hence prefer
1361 later times over earlier times.
1363 b) But if we have to wake up, then let's make sure to
1364 dispatch as much as possible on the entire system.
1366 We implement this by waking up everywhere at the same time
1367 within any given minute if we can, synchronised via the
1368 perturbation value determined from the boot ID. If we can't,
1369 then we try to find the same spot in every 1s and then 250ms
1370 step. Otherwise, we pick the last possible time to wake up.
1373 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1375 if (_unlikely_(c < USEC_PER_MINUTE))
1378 c -= USEC_PER_MINUTE;
1384 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1386 if (_unlikely_(c < USEC_PER_SEC))
1395 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1397 if (_unlikely_(c < USEC_PER_MSEC*250))
1400 c -= USEC_PER_MSEC*250;
1409 static int event_arm_timer(
1416 struct itimerspec its = {};
1417 sd_event_source *a, *b;
1424 a = prioq_peek(earliest);
1425 if (!a || a->enabled == SD_EVENT_OFF) {
1430 if (*next == (usec_t) -1)
1434 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1438 *next = (usec_t) -1;
1443 b = prioq_peek(latest);
1444 assert_se(b && b->enabled != SD_EVENT_OFF);
1446 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1450 assert_se(timer_fd >= 0);
1453 /* We don' want to disarm here, just mean some time looooong ago. */
1454 its.it_value.tv_sec = 0;
1455 its.it_value.tv_nsec = 1;
1457 timespec_store(&its.it_value, t);
1459 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1467 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1470 assert(s->type == SOURCE_IO);
1472 s->io.revents = events;
1474 return source_set_pending(s, true);
1477 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1485 assert_return(events == EPOLLIN, -EIO);
1487 ss = read(fd, &x, sizeof(x));
1489 if (errno == EAGAIN || errno == EINTR)
1495 if (ss != sizeof(x))
1498 *next = (usec_t) -1;
1503 static int process_timer(
1515 s = prioq_peek(earliest);
1518 s->enabled == SD_EVENT_OFF ||
1522 r = source_set_pending(s, true);
1526 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1527 prioq_reshuffle(latest, s, &s->time.latest_index);
1533 static int process_child(sd_event *e) {
1540 e->need_process_child = false;
1543 So, this is ugly. We iteratively invoke waitid() with P_PID
1544 + WNOHANG for each PID we wait for, instead of using
1545 P_ALL. This is because we only want to get child
1546 information of very specific child processes, and not all
1547 of them. We might not have processed the SIGCHLD even of a
1548 previous invocation and we don't want to maintain a
1549 unbounded *per-child* event queue, hence we really don't
1550 want anything flushed out of the kernel's queue that we
1551 don't care about. Since this is O(n) this means that if you
1552 have a lot of processes you probably want to handle SIGCHLD
1556 HASHMAP_FOREACH(s, e->child_sources, i) {
1557 assert(s->type == SOURCE_CHILD);
1562 if (s->enabled == SD_EVENT_OFF)
1565 zero(s->child.siginfo);
1566 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1570 if (s->child.siginfo.si_pid != 0) {
1571 r = source_set_pending(s, true);
1580 static int process_signal(sd_event *e, uint32_t events) {
1581 bool read_one = false;
1585 assert(e->signal_sources);
1587 assert_return(events == EPOLLIN, -EIO);
1590 struct signalfd_siginfo si;
1594 ss = read(e->signal_fd, &si, sizeof(si));
1596 if (errno == EAGAIN || errno == EINTR)
1602 if (ss != sizeof(si))
1607 s = e->signal_sources[si.ssi_signo];
1608 if (si.ssi_signo == SIGCHLD) {
1609 r = process_child(e);
1618 s->signal.siginfo = si;
1619 r = source_set_pending(s, true);
1628 static int source_dispatch(sd_event_source *s) {
1632 assert(s->pending || s->type == SOURCE_QUIT);
1634 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1635 r = source_set_pending(s, false);
1640 if (s->enabled == SD_EVENT_ONESHOT) {
1641 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1646 sd_event_source_ref(s);
1651 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1654 case SOURCE_MONOTONIC:
1655 r = s->time.callback(s, s->time.next, s->userdata);
1658 case SOURCE_REALTIME:
1659 r = s->time.callback(s, s->time.next, s->userdata);
1663 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1667 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1671 r = s->defer.callback(s, s->userdata);
1675 r = s->quit.callback(s, s->userdata);
1679 sd_event_source_unref(s);
1684 static int event_prepare(sd_event *e) {
1692 s = prioq_peek(e->prepare);
1693 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1696 s->prepare_iteration = e->iteration;
1697 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1702 r = s->prepare(s, s->userdata);
1711 static int dispatch_quit(sd_event *e) {
1717 p = prioq_peek(e->quit);
1718 if (!p || p->enabled == SD_EVENT_OFF) {
1719 e->state = SD_EVENT_FINISHED;
1725 e->state = SD_EVENT_QUITTING;
1727 r = source_dispatch(p);
1729 e->state = SD_EVENT_PASSIVE;
1735 static sd_event_source* event_next_pending(sd_event *e) {
1740 p = prioq_peek(e->pending);
1744 if (p->enabled == SD_EVENT_OFF)
1750 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1751 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1755 assert_return(e, -EINVAL);
1756 assert_return(!event_pid_changed(e), -ECHILD);
1757 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1758 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1760 if (e->quit_requested)
1761 return dispatch_quit(e);
1765 e->state = SD_EVENT_RUNNING;
1767 r = event_prepare(e);
1771 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1775 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1779 if (event_next_pending(e) || e->need_process_child)
1782 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1783 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1785 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1789 dual_timestamp_get(&e->timestamp);
1791 for (i = 0; i < m; i++) {
1793 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1794 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1795 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1796 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1797 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1798 r = process_signal(e, ev_queue[i].events);
1800 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1806 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1810 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1814 if (e->need_process_child) {
1815 r = process_child(e);
1820 p = event_next_pending(e);
1826 r = source_dispatch(p);
1829 e->state = SD_EVENT_PASSIVE;
1835 _public_ int sd_event_loop(sd_event *e) {
1838 assert_return(e, -EINVAL);
1839 assert_return(!event_pid_changed(e), -ECHILD);
1840 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1844 while (e->state != SD_EVENT_FINISHED) {
1845 r = sd_event_run(e, (uint64_t) -1);
1857 _public_ int sd_event_get_state(sd_event *e) {
1858 assert_return(e, -EINVAL);
1859 assert_return(!event_pid_changed(e), -ECHILD);
1864 _public_ int sd_event_get_quit(sd_event *e) {
1865 assert_return(e, -EINVAL);
1866 assert_return(!event_pid_changed(e), -ECHILD);
1868 return e->quit_requested;
1871 _public_ int sd_event_request_quit(sd_event *e) {
1872 assert_return(e, -EINVAL);
1873 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1874 assert_return(!event_pid_changed(e), -ECHILD);
1876 e->quit_requested = true;
1880 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1881 assert_return(e, -EINVAL);
1882 assert_return(usec, -EINVAL);
1883 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1884 assert_return(!event_pid_changed(e), -ECHILD);
1886 *usec = e->timestamp.realtime;
1890 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1891 assert_return(e, -EINVAL);
1892 assert_return(usec, -EINVAL);
1893 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1894 assert_return(!event_pid_changed(e), -ECHILD);
1896 *usec = e->timestamp.monotonic;
1900 _public_ int sd_event_default(sd_event **ret) {
1902 static __thread sd_event *default_event = NULL;
1907 return !!default_event;
1909 if (default_event) {
1910 *ret = sd_event_ref(default_event);
1914 r = sd_event_new(&e);
1918 e->default_event_ptr = &default_event;
1926 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
1927 assert_return(e, -EINVAL);
1928 assert_return(tid, -EINVAL);
1929 assert_return(!event_pid_changed(e), -ECHILD);