1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
30 #include "time-util.h"
35 #define EPOLL_QUEUE_MAX 64
36 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
38 typedef enum EventSourceType {
48 struct sd_event_source {
53 sd_prepare_handler_t prepare;
55 EventSourceType type:4;
60 unsigned pending_index;
61 unsigned prepare_index;
62 unsigned pending_iteration;
63 unsigned prepare_iteration;
67 sd_io_handler_t callback;
74 sd_time_handler_t callback;
75 usec_t next, accuracy;
76 unsigned earliest_index;
77 unsigned latest_index;
80 sd_signal_handler_t callback;
81 struct signalfd_siginfo siginfo;
85 sd_child_handler_t callback;
91 sd_defer_handler_t callback;
94 sd_quit_handler_t callback;
111 /* For both clocks we maintain two priority queues each, one
112 * ordered for the earliest times the events may be
113 * dispatched, and one ordered by the latest times they must
114 * have been dispatched. The range between the top entries in
115 * the two prioqs is the time window we can freely schedule
117 Prioq *monotonic_earliest;
118 Prioq *monotonic_latest;
119 Prioq *realtime_earliest;
120 Prioq *realtime_latest;
122 usec_t realtime_next, monotonic_next;
126 sd_event_source **signal_sources;
128 Hashmap *child_sources;
129 unsigned n_enabled_child_sources;
138 bool quit_requested:1;
139 bool need_process_child:1;
142 static int pending_prioq_compare(const void *a, const void *b) {
143 const sd_event_source *x = a, *y = b;
148 /* Enabled ones first */
149 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
151 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
154 /* Lower priority values first */
155 if (x->priority < y->priority)
157 if (x->priority > y->priority)
160 /* Older entries first */
161 if (x->pending_iteration < y->pending_iteration)
163 if (x->pending_iteration > y->pending_iteration)
166 /* Stability for the rest */
175 static int prepare_prioq_compare(const void *a, const void *b) {
176 const sd_event_source *x = a, *y = b;
181 /* Move most recently prepared ones last, so that we can stop
182 * preparing as soon as we hit one that has already been
183 * prepared in the current iteration */
184 if (x->prepare_iteration < y->prepare_iteration)
186 if (x->prepare_iteration > y->prepare_iteration)
189 /* Enabled ones first */
190 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
192 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
195 /* Lower priority values first */
196 if (x->priority < y->priority)
198 if (x->priority > y->priority)
201 /* Stability for the rest */
210 static int earliest_time_prioq_compare(const void *a, const void *b) {
211 const sd_event_source *x = a, *y = b;
213 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
214 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
216 /* Enabled ones first */
217 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
219 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
222 /* Move the pending ones to the end */
223 if (!x->pending && y->pending)
225 if (x->pending && !y->pending)
229 if (x->time.next < y->time.next)
231 if (x->time.next > y->time.next)
234 /* Stability for the rest */
243 static int latest_time_prioq_compare(const void *a, const void *b) {
244 const sd_event_source *x = a, *y = b;
246 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
247 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
249 /* Enabled ones first */
250 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
252 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
255 /* Move the pending ones to the end */
256 if (!x->pending && y->pending)
258 if (x->pending && !y->pending)
262 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
264 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
267 /* Stability for the rest */
276 static int quit_prioq_compare(const void *a, const void *b) {
277 const sd_event_source *x = a, *y = b;
279 assert(x->type == SOURCE_QUIT);
280 assert(y->type == SOURCE_QUIT);
282 /* Enabled ones first */
283 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
285 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
288 /* Lower priority values first */
289 if (x->priority < y->priority)
291 if (x->priority > y->priority)
294 /* Stability for the rest */
303 static void event_free(sd_event *e) {
306 if (e->epoll_fd >= 0)
307 close_nointr_nofail(e->epoll_fd);
309 if (e->signal_fd >= 0)
310 close_nointr_nofail(e->signal_fd);
312 if (e->realtime_fd >= 0)
313 close_nointr_nofail(e->realtime_fd);
315 if (e->monotonic_fd >= 0)
316 close_nointr_nofail(e->monotonic_fd);
318 prioq_free(e->pending);
319 prioq_free(e->prepare);
320 prioq_free(e->monotonic_earliest);
321 prioq_free(e->monotonic_latest);
322 prioq_free(e->realtime_earliest);
323 prioq_free(e->realtime_latest);
326 free(e->signal_sources);
328 hashmap_free(e->child_sources);
332 int sd_event_new(sd_event** ret) {
336 assert_return(ret, -EINVAL);
338 e = new0(sd_event, 1);
343 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
344 e->realtime_next = e->monotonic_next = (usec_t) -1;
345 e->original_pid = getpid();
347 assert_se(sigemptyset(&e->sigset) == 0);
349 e->pending = prioq_new(pending_prioq_compare);
355 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
356 if (e->epoll_fd < 0) {
369 sd_event* sd_event_ref(sd_event *e) {
370 assert_return(e, NULL);
372 assert(e->n_ref >= 1);
378 sd_event* sd_event_unref(sd_event *e) {
379 assert_return(e, NULL);
381 assert(e->n_ref >= 1);
390 static bool event_pid_changed(sd_event *e) {
393 /* We don't support people creating am event loop and keeping
394 * it around over a fork(). Let's complain. */
396 return e->original_pid != getpid();
399 static int source_io_unregister(sd_event_source *s) {
403 assert(s->type == SOURCE_IO);
405 if (!s->io.registered)
408 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
412 s->io.registered = false;
416 static int source_io_register(
421 struct epoll_event ev = {};
425 assert(s->type == SOURCE_IO);
426 assert(enabled != SD_EVENT_OFF);
431 if (enabled == SD_EVENT_ONESHOT)
432 ev.events |= EPOLLONESHOT;
434 if (s->io.registered)
435 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
437 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
442 s->io.registered = true;
447 static void source_free(sd_event_source *s) {
455 source_io_unregister(s);
459 case SOURCE_MONOTONIC:
460 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
461 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
464 case SOURCE_REALTIME:
465 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
466 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
470 if (s->signal.sig > 0) {
471 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
472 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
474 if (s->event->signal_sources)
475 s->event->signal_sources[s->signal.sig] = NULL;
481 if (s->child.pid > 0) {
482 if (s->enabled != SD_EVENT_OFF) {
483 assert(s->event->n_enabled_child_sources > 0);
484 s->event->n_enabled_child_sources--;
487 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
488 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
490 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
496 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
501 prioq_remove(s->event->pending, s, &s->pending_index);
504 prioq_remove(s->event->prepare, s, &s->prepare_index);
506 sd_event_unref(s->event);
512 static int source_set_pending(sd_event_source *s, bool b) {
516 assert(s->type != SOURCE_QUIT);
524 s->pending_iteration = s->event->iteration;
526 r = prioq_put(s->event->pending, s, &s->pending_index);
532 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
537 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
542 s = new0(sd_event_source, 1);
547 s->event = sd_event_ref(e);
549 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
558 sd_io_handler_t callback,
560 sd_event_source **ret) {
565 assert_return(e, -EINVAL);
566 assert_return(fd >= 0, -EINVAL);
567 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
568 assert_return(callback, -EINVAL);
569 assert_return(ret, -EINVAL);
570 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
571 assert_return(!event_pid_changed(e), -ECHILD);
573 s = source_new(e, SOURCE_IO);
578 s->io.events = events;
579 s->io.callback = callback;
580 s->userdata = userdata;
581 s->enabled = SD_EVENT_ON;
583 r = source_io_register(s, s->enabled, events);
593 static int event_setup_timer_fd(
595 EventSourceType type,
599 struct epoll_event ev = {};
606 if (_likely_(*timer_fd >= 0))
609 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
614 ev.data.ptr = INT_TO_PTR(type);
616 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
618 close_nointr_nofail(fd);
622 /* When we sleep for longer, we try to realign the wakeup to
623 the same time wihtin each second, so that events all across
624 the system can be coalesced into a single CPU
625 wakeup. However, let's take some system-specific randomness
626 for this value, so that in a network of systems with synced
627 clocks timer events are distributed a bit. Here, we
628 calculate a perturbation usec offset from the boot ID. */
630 if (sd_id128_get_boot(&bootid) >= 0)
631 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
637 static int event_add_time_internal(
639 EventSourceType type,
646 sd_time_handler_t callback,
648 sd_event_source **ret) {
653 assert_return(e, -EINVAL);
654 assert_return(callback, -EINVAL);
655 assert_return(ret, -EINVAL);
656 assert_return(usec != (uint64_t) -1, -EINVAL);
657 assert_return(accuracy != (uint64_t) -1, -EINVAL);
658 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
659 assert_return(!event_pid_changed(e), -ECHILD);
666 *earliest = prioq_new(earliest_time_prioq_compare);
672 *latest = prioq_new(latest_time_prioq_compare);
678 r = event_setup_timer_fd(e, type, timer_fd, id);
683 s = source_new(e, type);
688 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
689 s->time.callback = callback;
690 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
691 s->userdata = userdata;
692 s->enabled = SD_EVENT_ONESHOT;
694 r = prioq_put(*earliest, s, &s->time.earliest_index);
698 r = prioq_put(*latest, s, &s->time.latest_index);
710 int sd_event_add_monotonic(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
711 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
714 int sd_event_add_realtime(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
715 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
718 static int event_update_signal_fd(sd_event *e) {
719 struct epoll_event ev = {};
725 add_to_epoll = e->signal_fd < 0;
727 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
737 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
739 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
741 close_nointr_nofail(e->signal_fd);
750 int sd_event_add_signal(
753 sd_signal_handler_t callback,
755 sd_event_source **ret) {
760 assert_return(e, -EINVAL);
761 assert_return(sig > 0, -EINVAL);
762 assert_return(sig < _NSIG, -EINVAL);
763 assert_return(callback, -EINVAL);
764 assert_return(ret, -EINVAL);
765 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
766 assert_return(!event_pid_changed(e), -ECHILD);
768 if (!e->signal_sources) {
769 e->signal_sources = new0(sd_event_source*, _NSIG);
770 if (!e->signal_sources)
772 } else if (e->signal_sources[sig])
775 s = source_new(e, SOURCE_SIGNAL);
780 s->signal.callback = callback;
781 s->userdata = userdata;
782 s->enabled = SD_EVENT_ON;
784 e->signal_sources[sig] = s;
785 assert_se(sigaddset(&e->sigset, sig) == 0);
787 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
788 r = event_update_signal_fd(e);
799 int sd_event_add_child(
803 sd_child_handler_t callback,
805 sd_event_source **ret) {
810 assert_return(e, -EINVAL);
811 assert_return(pid > 1, -EINVAL);
812 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
813 assert_return(options != 0, -EINVAL);
814 assert_return(callback, -EINVAL);
815 assert_return(ret, -EINVAL);
816 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
817 assert_return(!event_pid_changed(e), -ECHILD);
819 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
823 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
826 s = source_new(e, SOURCE_CHILD);
831 s->child.options = options;
832 s->child.callback = callback;
833 s->userdata = userdata;
834 s->enabled = SD_EVENT_ONESHOT;
836 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
842 e->n_enabled_child_sources ++;
844 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
846 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
847 r = event_update_signal_fd(e);
854 e->need_process_child = true;
860 int sd_event_add_defer(
862 sd_defer_handler_t callback,
864 sd_event_source **ret) {
869 assert_return(e, -EINVAL);
870 assert_return(callback, -EINVAL);
871 assert_return(ret, -EINVAL);
872 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
873 assert_return(!event_pid_changed(e), -ECHILD);
875 s = source_new(e, SOURCE_DEFER);
879 s->defer.callback = callback;
880 s->userdata = userdata;
881 s->enabled = SD_EVENT_ONESHOT;
883 r = source_set_pending(s, true);
893 int sd_event_add_quit(
895 sd_quit_handler_t callback,
897 sd_event_source **ret) {
902 assert_return(e, -EINVAL);
903 assert_return(callback, -EINVAL);
904 assert_return(ret, -EINVAL);
905 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
906 assert_return(!event_pid_changed(e), -ECHILD);
909 e->quit = prioq_new(quit_prioq_compare);
914 s = source_new(e, SOURCE_QUIT);
918 s->quit.callback = callback;
919 s->userdata = userdata;
920 s->quit.prioq_index = PRIOQ_IDX_NULL;
921 s->enabled = SD_EVENT_ONESHOT;
923 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
933 sd_event_source* sd_event_source_ref(sd_event_source *s) {
934 assert_return(s, NULL);
936 assert(s->n_ref >= 1);
942 sd_event_source* sd_event_source_unref(sd_event_source *s) {
943 assert_return(s, NULL);
945 assert(s->n_ref >= 1);
954 sd_event *sd_event_get(sd_event_source *s) {
955 assert_return(s, NULL);
960 int sd_event_source_get_pending(sd_event_source *s) {
961 assert_return(s, -EINVAL);
962 assert_return(s->type != SOURCE_QUIT, -EDOM);
963 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
964 assert_return(!event_pid_changed(s->event), -ECHILD);
969 int sd_event_source_get_io_fd(sd_event_source *s) {
970 assert_return(s, -EINVAL);
971 assert_return(s->type == SOURCE_IO, -EDOM);
972 assert_return(!event_pid_changed(s->event), -ECHILD);
977 int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
978 assert_return(s, -EINVAL);
979 assert_return(events, -EINVAL);
980 assert_return(s->type == SOURCE_IO, -EDOM);
981 assert_return(!event_pid_changed(s->event), -ECHILD);
983 *events = s->io.events;
987 int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
990 assert_return(s, -EINVAL);
991 assert_return(s->type == SOURCE_IO, -EDOM);
992 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
993 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
994 assert_return(!event_pid_changed(s->event), -ECHILD);
996 if (s->io.events == events)
999 if (s->enabled != SD_EVENT_OFF) {
1000 r = source_io_register(s, s->io.events, events);
1005 s->io.events = events;
1010 int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1011 assert_return(s, -EINVAL);
1012 assert_return(revents, -EINVAL);
1013 assert_return(s->type == SOURCE_IO, -EDOM);
1014 assert_return(s->pending, -ENODATA);
1015 assert_return(!event_pid_changed(s->event), -ECHILD);
1017 *revents = s->io.revents;
1021 int sd_event_source_get_signal(sd_event_source *s) {
1022 assert_return(s, -EINVAL);
1023 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1024 assert_return(!event_pid_changed(s->event), -ECHILD);
1026 return s->signal.sig;
1029 int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1030 assert_return(s, -EINVAL);
1031 assert_return(!event_pid_changed(s->event), -ECHILD);
1036 int sd_event_source_set_priority(sd_event_source *s, int priority) {
1037 assert_return(s, -EINVAL);
1038 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1039 assert_return(!event_pid_changed(s->event), -ECHILD);
1041 if (s->priority == priority)
1044 s->priority = priority;
1047 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1050 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1052 if (s->type == SOURCE_QUIT)
1053 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1058 int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1059 assert_return(s, -EINVAL);
1060 assert_return(m, -EINVAL);
1061 assert_return(!event_pid_changed(s->event), -ECHILD);
1067 int sd_event_source_set_enabled(sd_event_source *s, int m) {
1070 assert_return(s, -EINVAL);
1071 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1072 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1073 assert_return(!event_pid_changed(s->event), -ECHILD);
1075 if (s->enabled == m)
1078 if (m == SD_EVENT_OFF) {
1083 r = source_io_unregister(s);
1090 case SOURCE_MONOTONIC:
1092 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1093 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1096 case SOURCE_REALTIME:
1098 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1099 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1104 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1105 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1106 event_update_signal_fd(s->event);
1114 assert(s->event->n_enabled_child_sources > 0);
1115 s->event->n_enabled_child_sources--;
1117 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1118 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1119 event_update_signal_fd(s->event);
1126 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1138 r = source_io_register(s, m, s->io.events);
1145 case SOURCE_MONOTONIC:
1147 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1148 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1151 case SOURCE_REALTIME:
1153 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1154 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1160 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1161 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1162 event_update_signal_fd(s->event);
1169 if (s->enabled == SD_EVENT_OFF) {
1170 s->event->n_enabled_child_sources++;
1172 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1173 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1174 event_update_signal_fd(s->event);
1181 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1191 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1194 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1199 int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1200 assert_return(s, -EINVAL);
1201 assert_return(usec, -EINVAL);
1202 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1203 assert_return(!event_pid_changed(s->event), -ECHILD);
1205 *usec = s->time.next;
1209 int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1210 assert_return(s, -EINVAL);
1211 assert_return(usec != (uint64_t) -1, -EINVAL);
1212 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1213 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1214 assert_return(!event_pid_changed(s->event), -ECHILD);
1216 if (s->time.next == usec)
1219 s->time.next = usec;
1221 if (s->type == SOURCE_REALTIME) {
1222 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1223 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1225 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1226 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1232 int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1233 assert_return(s, -EINVAL);
1234 assert_return(usec, -EINVAL);
1235 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1236 assert_return(!event_pid_changed(s->event), -ECHILD);
1238 *usec = s->time.accuracy;
1242 int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1243 assert_return(s, -EINVAL);
1244 assert_return(usec != (uint64_t) -1, -EINVAL);
1245 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1246 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1247 assert_return(!event_pid_changed(s->event), -ECHILD);
1250 usec = DEFAULT_ACCURACY_USEC;
1252 if (s->time.accuracy == usec)
1255 s->time.accuracy = usec;
1257 if (s->type == SOURCE_REALTIME)
1258 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1260 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1265 int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1266 assert_return(s, -EINVAL);
1267 assert_return(pid, -EINVAL);
1268 assert_return(s->type == SOURCE_CHILD, -EDOM);
1269 assert_return(!event_pid_changed(s->event), -ECHILD);
1271 *pid = s->child.pid;
1275 int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) {
1278 assert_return(s, -EINVAL);
1279 assert_return(s->type != SOURCE_QUIT, -EDOM);
1280 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1281 assert_return(!event_pid_changed(s->event), -ECHILD);
1283 if (s->prepare == callback)
1286 if (callback && s->prepare) {
1287 s->prepare = callback;
1291 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1295 s->prepare = callback;
1298 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1302 prioq_remove(s->event->prepare, s, &s->prepare_index);
1307 void* sd_event_source_get_userdata(sd_event_source *s) {
1308 assert_return(s, NULL);
1313 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1325 Find a good time to wake up again between times a and b. We
1326 have two goals here:
1328 a) We want to wake up as seldom as possible, hence prefer
1329 later times over earlier times.
1331 b) But if we have to wake up, then let's make sure to
1332 dispatch as much as possible on the entire system.
1334 We implement this by waking up everywhere at the same time
1335 within any given second if we can, synchronised via the
1336 perturbation value determined from the boot ID. If we can't,
1337 then we try to find the same spot in every a 250ms
1338 step. Otherwise, we pick the last possible time to wake up.
1341 c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1343 if (_unlikely_(c < USEC_PER_SEC))
1352 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1354 if (_unlikely_(c < USEC_PER_MSEC*250))
1357 c -= USEC_PER_MSEC*250;
1366 static int event_arm_timer(
1373 struct itimerspec its = {};
1374 sd_event_source *a, *b;
1381 a = prioq_peek(earliest);
1382 if (!a || a->enabled == SD_EVENT_OFF)
1385 b = prioq_peek(latest);
1386 assert_se(b && b->enabled != SD_EVENT_OFF);
1388 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1392 assert_se(timer_fd >= 0);
1395 /* We don' want to disarm here, just mean some time looooong ago. */
1396 its.it_value.tv_sec = 0;
1397 its.it_value.tv_nsec = 1;
1399 timespec_store(&its.it_value, t);
1401 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1409 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1412 assert(s->type == SOURCE_IO);
1414 s->io.revents = events;
1417 If this is a oneshot event source, then we added it to the
1418 epoll with EPOLLONESHOT, hence we know it's not registered
1419 anymore. We can save a syscall here...
1422 if (s->enabled == SD_EVENT_ONESHOT)
1423 s->io.registered = false;
1425 return source_set_pending(s, true);
1428 static int flush_timer(sd_event *e, int fd, uint32_t events) {
1434 assert_return(events == EPOLLIN, -EIO);
1436 ss = read(fd, &x, sizeof(x));
1438 if (errno == EAGAIN || errno == EINTR)
1444 if (ss != sizeof(x))
1450 static int process_timer(
1462 s = prioq_peek(earliest);
1465 s->enabled == SD_EVENT_OFF ||
1469 r = source_set_pending(s, true);
1473 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1474 prioq_reshuffle(latest, s, &s->time.latest_index);
1480 static int process_child(sd_event *e) {
1487 e->need_process_child = false;
1490 So, this is ugly. We iteratively invoke waitid() with P_PID
1491 + WNOHANG for each PID we wait for, instead of using
1492 P_ALL. This is because we only want to get child
1493 information of very specific child processes, and not all
1494 of them. We might not have processed the SIGCHLD even of a
1495 previous invocation and we don't want to maintain a
1496 unbounded *per-child* event queue, hence we really don't
1497 want anything flushed out of the kernel's queue that we
1498 don't care about. Since this is O(n) this means that if you
1499 have a lot of processes you probably want to handle SIGCHLD
1503 HASHMAP_FOREACH(s, e->child_sources, i) {
1504 assert(s->type == SOURCE_CHILD);
1509 if (s->enabled == SD_EVENT_OFF)
1512 zero(s->child.siginfo);
1513 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1517 if (s->child.siginfo.si_pid != 0) {
1518 r = source_set_pending(s, true);
1527 static int process_signal(sd_event *e, uint32_t events) {
1528 struct signalfd_siginfo si;
1529 bool read_one = false;
1534 assert_return(events == EPOLLIN, -EIO);
1539 ss = read(e->signal_fd, &si, sizeof(si));
1541 if (errno == EAGAIN || errno == EINTR)
1547 if (ss != sizeof(si))
1552 if (si.ssi_signo == SIGCHLD) {
1553 r = process_child(e);
1556 if (r > 0 || !e->signal_sources[si.ssi_signo])
1559 s = e->signal_sources[si.ssi_signo];
1564 s->signal.siginfo = si;
1565 r = source_set_pending(s, true);
1574 static int source_dispatch(sd_event_source *s) {
1578 assert(s->pending || s->type == SOURCE_QUIT);
1580 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1581 r = source_set_pending(s, false);
1586 if (s->enabled == SD_EVENT_ONESHOT) {
1587 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1595 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1598 case SOURCE_MONOTONIC:
1599 r = s->time.callback(s, s->time.next, s->userdata);
1602 case SOURCE_REALTIME:
1603 r = s->time.callback(s, s->time.next, s->userdata);
1607 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1611 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1615 r = s->defer.callback(s, s->userdata);
1619 r = s->quit.callback(s, s->userdata);
1626 static int event_prepare(sd_event *e) {
1634 s = prioq_peek(e->prepare);
1635 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1638 s->prepare_iteration = e->iteration;
1639 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1644 r = s->prepare(s, s->userdata);
1653 static int dispatch_quit(sd_event *e) {
1659 p = prioq_peek(e->quit);
1660 if (!p || p->enabled == SD_EVENT_OFF) {
1661 e->state = SD_EVENT_FINISHED;
1667 e->state = SD_EVENT_QUITTING;
1669 r = source_dispatch(p);
1671 e->state = SD_EVENT_PASSIVE;
1677 static sd_event_source* event_next_pending(sd_event *e) {
1682 p = prioq_peek(e->pending);
1686 if (p->enabled == SD_EVENT_OFF)
1692 int sd_event_run(sd_event *e, uint64_t timeout) {
1693 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1698 assert_return(e, -EINVAL);
1699 assert_return(!event_pid_changed(e), -ECHILD);
1700 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1701 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1703 if (e->quit_requested)
1704 return dispatch_quit(e);
1708 e->state = SD_EVENT_RUNNING;
1710 r = event_prepare(e);
1714 if (event_next_pending(e) || e->need_process_child)
1718 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1722 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1727 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1728 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1734 dual_timestamp_get(&n);
1736 for (i = 0; i < m; i++) {
1738 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1739 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events);
1740 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1741 r = flush_timer(e, e->realtime_fd, ev_queue[i].events);
1742 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1743 r = process_signal(e, ev_queue[i].events);
1745 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1751 r = process_timer(e, n.monotonic, e->monotonic_earliest, e->monotonic_latest);
1755 r = process_timer(e, n.realtime, e->realtime_earliest, e->realtime_latest);
1759 if (e->need_process_child) {
1760 r = process_child(e);
1765 p = event_next_pending(e);
1771 r = source_dispatch(p);
1774 e->state = SD_EVENT_PASSIVE;
1780 int sd_event_loop(sd_event *e) {
1783 assert_return(e, -EINVAL);
1784 assert_return(!event_pid_changed(e), -ECHILD);
1785 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1789 while (e->state != SD_EVENT_FINISHED) {
1790 r = sd_event_run(e, (uint64_t) -1);
1802 int sd_event_get_state(sd_event *e) {
1803 assert_return(e, -EINVAL);
1804 assert_return(!event_pid_changed(e), -ECHILD);
1809 int sd_event_get_quit(sd_event *e) {
1810 assert_return(e, -EINVAL);
1811 assert_return(!event_pid_changed(e), -ECHILD);
1813 return e->quit_requested;
1816 int sd_event_request_quit(sd_event *e) {
1817 assert_return(e, -EINVAL);
1818 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1819 assert_return(!event_pid_changed(e), -ECHILD);
1821 e->quit_requested = true;