1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
31 #include "time-util.h"
36 #define EPOLL_QUEUE_MAX 64
37 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
39 typedef enum EventSourceType {
49 struct sd_event_source {
54 sd_event_handler_t prepare;
56 EventSourceType type:4;
61 unsigned pending_index;
62 unsigned prepare_index;
63 unsigned pending_iteration;
64 unsigned prepare_iteration;
68 sd_event_io_handler_t callback;
75 sd_event_time_handler_t callback;
76 usec_t next, accuracy;
77 unsigned earliest_index;
78 unsigned latest_index;
81 sd_event_signal_handler_t callback;
82 struct signalfd_siginfo siginfo;
86 sd_event_child_handler_t callback;
92 sd_event_handler_t callback;
95 sd_event_handler_t callback;
112 /* For both clocks we maintain two priority queues each, one
113 * ordered for the earliest times the events may be
114 * dispatched, and one ordered by the latest times they must
115 * have been dispatched. The range between the top entries in
116 * the two prioqs is the time window we can freely schedule
118 Prioq *monotonic_earliest;
119 Prioq *monotonic_latest;
120 Prioq *realtime_earliest;
121 Prioq *realtime_latest;
123 usec_t realtime_next, monotonic_next;
127 sd_event_source **signal_sources;
129 Hashmap *child_sources;
130 unsigned n_enabled_child_sources;
137 dual_timestamp timestamp;
140 bool quit_requested:1;
141 bool need_process_child:1;
144 sd_event **default_event_ptr;
147 static int pending_prioq_compare(const void *a, const void *b) {
148 const sd_event_source *x = a, *y = b;
153 /* Enabled ones first */
154 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
156 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
159 /* Lower priority values first */
160 if (x->priority < y->priority)
162 if (x->priority > y->priority)
165 /* Older entries first */
166 if (x->pending_iteration < y->pending_iteration)
168 if (x->pending_iteration > y->pending_iteration)
171 /* Stability for the rest */
180 static int prepare_prioq_compare(const void *a, const void *b) {
181 const sd_event_source *x = a, *y = b;
186 /* Move most recently prepared ones last, so that we can stop
187 * preparing as soon as we hit one that has already been
188 * prepared in the current iteration */
189 if (x->prepare_iteration < y->prepare_iteration)
191 if (x->prepare_iteration > y->prepare_iteration)
194 /* Enabled ones first */
195 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
197 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
200 /* Lower priority values first */
201 if (x->priority < y->priority)
203 if (x->priority > y->priority)
206 /* Stability for the rest */
215 static int earliest_time_prioq_compare(const void *a, const void *b) {
216 const sd_event_source *x = a, *y = b;
218 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
219 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
221 /* Enabled ones first */
222 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
224 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 /* Move the pending ones to the end */
228 if (!x->pending && y->pending)
230 if (x->pending && !y->pending)
234 if (x->time.next < y->time.next)
236 if (x->time.next > y->time.next)
239 /* Stability for the rest */
248 static int latest_time_prioq_compare(const void *a, const void *b) {
249 const sd_event_source *x = a, *y = b;
251 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
252 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
254 /* Enabled ones first */
255 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
257 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260 /* Move the pending ones to the end */
261 if (!x->pending && y->pending)
263 if (x->pending && !y->pending)
267 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
269 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
272 /* Stability for the rest */
281 static int quit_prioq_compare(const void *a, const void *b) {
282 const sd_event_source *x = a, *y = b;
284 assert(x->type == SOURCE_QUIT);
285 assert(y->type == SOURCE_QUIT);
287 /* Enabled ones first */
288 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
290 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293 /* Lower priority values first */
294 if (x->priority < y->priority)
296 if (x->priority > y->priority)
299 /* Stability for the rest */
308 static void event_free(sd_event *e) {
311 if (e->default_event_ptr)
312 *(e->default_event_ptr) = NULL;
314 if (e->epoll_fd >= 0)
315 close_nointr_nofail(e->epoll_fd);
317 if (e->signal_fd >= 0)
318 close_nointr_nofail(e->signal_fd);
320 if (e->realtime_fd >= 0)
321 close_nointr_nofail(e->realtime_fd);
323 if (e->monotonic_fd >= 0)
324 close_nointr_nofail(e->monotonic_fd);
326 prioq_free(e->pending);
327 prioq_free(e->prepare);
328 prioq_free(e->monotonic_earliest);
329 prioq_free(e->monotonic_latest);
330 prioq_free(e->realtime_earliest);
331 prioq_free(e->realtime_latest);
334 free(e->signal_sources);
336 hashmap_free(e->child_sources);
340 _public_ int sd_event_new(sd_event** ret) {
344 assert_return(ret, -EINVAL);
346 e = new0(sd_event, 1);
351 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
352 e->realtime_next = e->monotonic_next = (usec_t) -1;
353 e->original_pid = getpid();
355 assert_se(sigemptyset(&e->sigset) == 0);
357 e->pending = prioq_new(pending_prioq_compare);
363 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
364 if (e->epoll_fd < 0) {
377 _public_ sd_event* sd_event_ref(sd_event *e) {
378 assert_return(e, NULL);
380 assert(e->n_ref >= 1);
386 _public_ sd_event* sd_event_unref(sd_event *e) {
391 assert(e->n_ref >= 1);
400 static bool event_pid_changed(sd_event *e) {
403 /* We don't support people creating am event loop and keeping
404 * it around over a fork(). Let's complain. */
406 return e->original_pid != getpid();
409 static int source_io_unregister(sd_event_source *s) {
413 assert(s->type == SOURCE_IO);
415 if (!s->io.registered)
418 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
422 s->io.registered = false;
426 static int source_io_register(
431 struct epoll_event ev = {};
435 assert(s->type == SOURCE_IO);
436 assert(enabled != SD_EVENT_OFF);
441 if (enabled == SD_EVENT_ONESHOT)
442 ev.events |= EPOLLONESHOT;
444 if (s->io.registered)
445 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
447 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
452 s->io.registered = true;
457 static void source_free(sd_event_source *s) {
465 source_io_unregister(s);
469 case SOURCE_MONOTONIC:
470 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
471 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
474 case SOURCE_REALTIME:
475 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
476 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
480 if (s->signal.sig > 0) {
481 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
482 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
484 if (s->event->signal_sources)
485 s->event->signal_sources[s->signal.sig] = NULL;
491 if (s->child.pid > 0) {
492 if (s->enabled != SD_EVENT_OFF) {
493 assert(s->event->n_enabled_child_sources > 0);
494 s->event->n_enabled_child_sources--;
497 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
498 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
500 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
510 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
515 prioq_remove(s->event->pending, s, &s->pending_index);
518 prioq_remove(s->event->prepare, s, &s->prepare_index);
520 sd_event_unref(s->event);
526 static int source_set_pending(sd_event_source *s, bool b) {
530 assert(s->type != SOURCE_QUIT);
538 s->pending_iteration = s->event->iteration;
540 r = prioq_put(s->event->pending, s, &s->pending_index);
546 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
548 if (s->type == SOURCE_REALTIME) {
549 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
550 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
551 } else if (s->type == SOURCE_MONOTONIC) {
552 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
553 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
559 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
564 s = new0(sd_event_source, 1);
569 s->event = sd_event_ref(e);
571 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
576 _public_ int sd_event_add_io(
580 sd_event_io_handler_t callback,
582 sd_event_source **ret) {
587 assert_return(e, -EINVAL);
588 assert_return(fd >= 0, -EINVAL);
589 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
590 assert_return(callback, -EINVAL);
591 assert_return(ret, -EINVAL);
592 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
593 assert_return(!event_pid_changed(e), -ECHILD);
595 s = source_new(e, SOURCE_IO);
600 s->io.events = events;
601 s->io.callback = callback;
602 s->userdata = userdata;
603 s->enabled = SD_EVENT_ON;
605 r = source_io_register(s, s->enabled, events);
615 static int event_setup_timer_fd(
617 EventSourceType type,
621 struct epoll_event ev = {};
628 if (_likely_(*timer_fd >= 0))
631 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
636 ev.data.ptr = INT_TO_PTR(type);
638 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
640 close_nointr_nofail(fd);
644 /* When we sleep for longer, we try to realign the wakeup to
645 the same time wihtin each minute/second/250ms, so that
646 events all across the system can be coalesced into a single
647 CPU wakeup. However, let's take some system-specific
648 randomness for this value, so that in a network of systems
649 with synced clocks timer events are distributed a
650 bit. Here, we calculate a perturbation usec offset from the
653 if (sd_id128_get_boot(&bootid) >= 0)
654 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
660 static int event_add_time_internal(
662 EventSourceType type,
669 sd_event_time_handler_t callback,
671 sd_event_source **ret) {
676 assert_return(e, -EINVAL);
677 assert_return(callback, -EINVAL);
678 assert_return(ret, -EINVAL);
679 assert_return(usec != (uint64_t) -1, -EINVAL);
680 assert_return(accuracy != (uint64_t) -1, -EINVAL);
681 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
682 assert_return(!event_pid_changed(e), -ECHILD);
689 *earliest = prioq_new(earliest_time_prioq_compare);
695 *latest = prioq_new(latest_time_prioq_compare);
701 r = event_setup_timer_fd(e, type, timer_fd, id);
706 s = source_new(e, type);
711 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
712 s->time.callback = callback;
713 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
714 s->userdata = userdata;
715 s->enabled = SD_EVENT_ONESHOT;
717 r = prioq_put(*earliest, s, &s->time.earliest_index);
721 r = prioq_put(*latest, s, &s->time.latest_index);
733 _public_ int sd_event_add_monotonic(sd_event *e,
736 sd_event_time_handler_t callback,
738 sd_event_source **ret) {
740 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
743 _public_ int sd_event_add_realtime(sd_event *e,
746 sd_event_time_handler_t callback,
748 sd_event_source **ret) {
750 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
753 static int event_update_signal_fd(sd_event *e) {
754 struct epoll_event ev = {};
760 add_to_epoll = e->signal_fd < 0;
762 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
772 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
774 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
776 close_nointr_nofail(e->signal_fd);
785 _public_ int sd_event_add_signal(
788 sd_event_signal_handler_t callback,
790 sd_event_source **ret) {
795 assert_return(e, -EINVAL);
796 assert_return(sig > 0, -EINVAL);
797 assert_return(sig < _NSIG, -EINVAL);
798 assert_return(callback, -EINVAL);
799 assert_return(ret, -EINVAL);
800 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
801 assert_return(!event_pid_changed(e), -ECHILD);
803 if (!e->signal_sources) {
804 e->signal_sources = new0(sd_event_source*, _NSIG);
805 if (!e->signal_sources)
807 } else if (e->signal_sources[sig])
810 s = source_new(e, SOURCE_SIGNAL);
815 s->signal.callback = callback;
816 s->userdata = userdata;
817 s->enabled = SD_EVENT_ON;
819 e->signal_sources[sig] = s;
820 assert_se(sigaddset(&e->sigset, sig) == 0);
822 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
823 r = event_update_signal_fd(e);
834 _public_ int sd_event_add_child(
838 sd_event_child_handler_t callback,
840 sd_event_source **ret) {
845 assert_return(e, -EINVAL);
846 assert_return(pid > 1, -EINVAL);
847 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
848 assert_return(options != 0, -EINVAL);
849 assert_return(callback, -EINVAL);
850 assert_return(ret, -EINVAL);
851 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
852 assert_return(!event_pid_changed(e), -ECHILD);
854 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
858 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
861 s = source_new(e, SOURCE_CHILD);
866 s->child.options = options;
867 s->child.callback = callback;
868 s->userdata = userdata;
869 s->enabled = SD_EVENT_ONESHOT;
871 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
877 e->n_enabled_child_sources ++;
879 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
881 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
882 r = event_update_signal_fd(e);
889 e->need_process_child = true;
895 _public_ int sd_event_add_defer(
897 sd_event_handler_t callback,
899 sd_event_source **ret) {
904 assert_return(e, -EINVAL);
905 assert_return(callback, -EINVAL);
906 assert_return(ret, -EINVAL);
907 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
908 assert_return(!event_pid_changed(e), -ECHILD);
910 s = source_new(e, SOURCE_DEFER);
914 s->defer.callback = callback;
915 s->userdata = userdata;
916 s->enabled = SD_EVENT_ONESHOT;
918 r = source_set_pending(s, true);
928 _public_ int sd_event_add_quit(
930 sd_event_handler_t callback,
932 sd_event_source **ret) {
937 assert_return(e, -EINVAL);
938 assert_return(callback, -EINVAL);
939 assert_return(ret, -EINVAL);
940 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
941 assert_return(!event_pid_changed(e), -ECHILD);
944 e->quit = prioq_new(quit_prioq_compare);
949 s = source_new(e, SOURCE_QUIT);
953 s->quit.callback = callback;
954 s->userdata = userdata;
955 s->quit.prioq_index = PRIOQ_IDX_NULL;
956 s->enabled = SD_EVENT_ONESHOT;
958 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
968 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
969 assert_return(s, NULL);
971 assert(s->n_ref >= 1);
977 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
982 assert(s->n_ref >= 1);
991 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
992 assert_return(s, NULL);
997 _public_ int sd_event_source_get_pending(sd_event_source *s) {
998 assert_return(s, -EINVAL);
999 assert_return(s->type != SOURCE_QUIT, -EDOM);
1000 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1001 assert_return(!event_pid_changed(s->event), -ECHILD);
1006 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1007 assert_return(s, -EINVAL);
1008 assert_return(s->type == SOURCE_IO, -EDOM);
1009 assert_return(!event_pid_changed(s->event), -ECHILD);
1014 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1015 assert_return(s, -EINVAL);
1016 assert_return(events, -EINVAL);
1017 assert_return(s->type == SOURCE_IO, -EDOM);
1018 assert_return(!event_pid_changed(s->event), -ECHILD);
1020 *events = s->io.events;
1024 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1027 assert_return(s, -EINVAL);
1028 assert_return(s->type == SOURCE_IO, -EDOM);
1029 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1030 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1031 assert_return(!event_pid_changed(s->event), -ECHILD);
1033 if (s->io.events == events)
1036 if (s->enabled != SD_EVENT_OFF) {
1037 r = source_io_register(s, s->enabled, events);
1042 s->io.events = events;
1043 source_set_pending(s, false);
1048 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1049 assert_return(s, -EINVAL);
1050 assert_return(revents, -EINVAL);
1051 assert_return(s->type == SOURCE_IO, -EDOM);
1052 assert_return(s->pending, -ENODATA);
1053 assert_return(!event_pid_changed(s->event), -ECHILD);
1055 *revents = s->io.revents;
1059 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1060 assert_return(s, -EINVAL);
1061 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1062 assert_return(!event_pid_changed(s->event), -ECHILD);
1064 return s->signal.sig;
1067 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1068 assert_return(s, -EINVAL);
1069 assert_return(!event_pid_changed(s->event), -ECHILD);
1074 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1075 assert_return(s, -EINVAL);
1076 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1077 assert_return(!event_pid_changed(s->event), -ECHILD);
1079 if (s->priority == priority)
1082 s->priority = priority;
1085 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1088 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1090 if (s->type == SOURCE_QUIT)
1091 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1096 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1097 assert_return(s, -EINVAL);
1098 assert_return(m, -EINVAL);
1099 assert_return(!event_pid_changed(s->event), -ECHILD);
1105 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1108 assert_return(s, -EINVAL);
1109 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1110 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1111 assert_return(!event_pid_changed(s->event), -ECHILD);
1113 if (s->enabled == m)
1116 if (m == SD_EVENT_OFF) {
1121 r = source_io_unregister(s);
1128 case SOURCE_MONOTONIC:
1130 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1131 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1134 case SOURCE_REALTIME:
1136 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1137 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1142 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1143 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1144 event_update_signal_fd(s->event);
1152 assert(s->event->n_enabled_child_sources > 0);
1153 s->event->n_enabled_child_sources--;
1155 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1156 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1157 event_update_signal_fd(s->event);
1164 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1176 r = source_io_register(s, m, s->io.events);
1183 case SOURCE_MONOTONIC:
1185 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1186 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1189 case SOURCE_REALTIME:
1191 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1192 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1198 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1199 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1200 event_update_signal_fd(s->event);
1207 if (s->enabled == SD_EVENT_OFF) {
1208 s->event->n_enabled_child_sources++;
1210 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1211 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1212 event_update_signal_fd(s->event);
1219 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1229 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1232 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1237 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1238 assert_return(s, -EINVAL);
1239 assert_return(usec, -EINVAL);
1240 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1241 assert_return(!event_pid_changed(s->event), -ECHILD);
1243 *usec = s->time.next;
1247 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1248 assert_return(s, -EINVAL);
1249 assert_return(usec != (uint64_t) -1, -EINVAL);
1250 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1251 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1252 assert_return(!event_pid_changed(s->event), -ECHILD);
1254 s->time.next = usec;
1256 source_set_pending(s, false);
1258 if (s->type == SOURCE_REALTIME) {
1259 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1260 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1262 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1263 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1269 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1270 assert_return(s, -EINVAL);
1271 assert_return(usec, -EINVAL);
1272 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1273 assert_return(!event_pid_changed(s->event), -ECHILD);
1275 *usec = s->time.accuracy;
1279 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1280 assert_return(s, -EINVAL);
1281 assert_return(usec != (uint64_t) -1, -EINVAL);
1282 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1283 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1284 assert_return(!event_pid_changed(s->event), -ECHILD);
1287 usec = DEFAULT_ACCURACY_USEC;
1289 s->time.accuracy = usec;
1291 source_set_pending(s, false);
1293 if (s->type == SOURCE_REALTIME)
1294 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1296 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1301 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1302 assert_return(s, -EINVAL);
1303 assert_return(pid, -EINVAL);
1304 assert_return(s->type == SOURCE_CHILD, -EDOM);
1305 assert_return(!event_pid_changed(s->event), -ECHILD);
1307 *pid = s->child.pid;
1311 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1314 assert_return(s, -EINVAL);
1315 assert_return(s->type != SOURCE_QUIT, -EDOM);
1316 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1317 assert_return(!event_pid_changed(s->event), -ECHILD);
1319 if (s->prepare == callback)
1322 if (callback && s->prepare) {
1323 s->prepare = callback;
1327 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1331 s->prepare = callback;
1334 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1338 prioq_remove(s->event->prepare, s, &s->prepare_index);
1343 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1344 assert_return(s, NULL);
1349 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1361 Find a good time to wake up again between times a and b. We
1362 have two goals here:
1364 a) We want to wake up as seldom as possible, hence prefer
1365 later times over earlier times.
1367 b) But if we have to wake up, then let's make sure to
1368 dispatch as much as possible on the entire system.
1370 We implement this by waking up everywhere at the same time
1371 within any given minute if we can, synchronised via the
1372 perturbation value determined from the boot ID. If we can't,
1373 then we try to find the same spot in every 1s and then 250ms
1374 step. Otherwise, we pick the last possible time to wake up.
1377 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1379 if (_unlikely_(c < USEC_PER_MINUTE))
1382 c -= USEC_PER_MINUTE;
1388 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1390 if (_unlikely_(c < USEC_PER_SEC))
1399 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1401 if (_unlikely_(c < USEC_PER_MSEC*250))
1404 c -= USEC_PER_MSEC*250;
1413 static int event_arm_timer(
1420 struct itimerspec its = {};
1421 sd_event_source *a, *b;
1428 a = prioq_peek(earliest);
1429 if (!a || a->enabled == SD_EVENT_OFF) {
1434 if (*next == (usec_t) -1)
1438 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1442 *next = (usec_t) -1;
1447 b = prioq_peek(latest);
1448 assert_se(b && b->enabled != SD_EVENT_OFF);
1450 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1454 assert_se(timer_fd >= 0);
1457 /* We don' want to disarm here, just mean some time looooong ago. */
1458 its.it_value.tv_sec = 0;
1459 its.it_value.tv_nsec = 1;
1461 timespec_store(&its.it_value, t);
1463 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1471 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1474 assert(s->type == SOURCE_IO);
1476 s->io.revents = events;
1478 return source_set_pending(s, true);
1481 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1489 assert_return(events == EPOLLIN, -EIO);
1491 ss = read(fd, &x, sizeof(x));
1493 if (errno == EAGAIN || errno == EINTR)
1499 if (ss != sizeof(x))
1502 *next = (usec_t) -1;
1507 static int process_timer(
1519 s = prioq_peek(earliest);
1522 s->enabled == SD_EVENT_OFF ||
1526 r = source_set_pending(s, true);
1530 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1531 prioq_reshuffle(latest, s, &s->time.latest_index);
1537 static int process_child(sd_event *e) {
1544 e->need_process_child = false;
1547 So, this is ugly. We iteratively invoke waitid() with P_PID
1548 + WNOHANG for each PID we wait for, instead of using
1549 P_ALL. This is because we only want to get child
1550 information of very specific child processes, and not all
1551 of them. We might not have processed the SIGCHLD even of a
1552 previous invocation and we don't want to maintain a
1553 unbounded *per-child* event queue, hence we really don't
1554 want anything flushed out of the kernel's queue that we
1555 don't care about. Since this is O(n) this means that if you
1556 have a lot of processes you probably want to handle SIGCHLD
1560 HASHMAP_FOREACH(s, e->child_sources, i) {
1561 assert(s->type == SOURCE_CHILD);
1566 if (s->enabled == SD_EVENT_OFF)
1569 zero(s->child.siginfo);
1570 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1574 if (s->child.siginfo.si_pid != 0) {
1575 r = source_set_pending(s, true);
1584 static int process_signal(sd_event *e, uint32_t events) {
1585 bool read_one = false;
1589 assert(e->signal_sources);
1591 assert_return(events == EPOLLIN, -EIO);
1594 struct signalfd_siginfo si;
1598 ss = read(e->signal_fd, &si, sizeof(si));
1600 if (errno == EAGAIN || errno == EINTR)
1606 if (ss != sizeof(si))
1611 s = e->signal_sources[si.ssi_signo];
1612 if (si.ssi_signo == SIGCHLD) {
1613 r = process_child(e);
1622 s->signal.siginfo = si;
1623 r = source_set_pending(s, true);
1632 static int source_dispatch(sd_event_source *s) {
1636 assert(s->pending || s->type == SOURCE_QUIT);
1638 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1639 r = source_set_pending(s, false);
1644 if (s->enabled == SD_EVENT_ONESHOT) {
1645 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1650 sd_event_source_ref(s);
1655 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1658 case SOURCE_MONOTONIC:
1659 r = s->time.callback(s, s->time.next, s->userdata);
1662 case SOURCE_REALTIME:
1663 r = s->time.callback(s, s->time.next, s->userdata);
1667 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1671 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1675 r = s->defer.callback(s, s->userdata);
1679 r = s->quit.callback(s, s->userdata);
1683 sd_event_source_unref(s);
1688 static int event_prepare(sd_event *e) {
1696 s = prioq_peek(e->prepare);
1697 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1700 s->prepare_iteration = e->iteration;
1701 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1706 r = s->prepare(s, s->userdata);
1715 static int dispatch_quit(sd_event *e) {
1721 p = prioq_peek(e->quit);
1722 if (!p || p->enabled == SD_EVENT_OFF) {
1723 e->state = SD_EVENT_FINISHED;
1729 e->state = SD_EVENT_QUITTING;
1731 r = source_dispatch(p);
1733 e->state = SD_EVENT_PASSIVE;
1739 static sd_event_source* event_next_pending(sd_event *e) {
1744 p = prioq_peek(e->pending);
1748 if (p->enabled == SD_EVENT_OFF)
1754 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1755 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1759 assert_return(e, -EINVAL);
1760 assert_return(!event_pid_changed(e), -ECHILD);
1761 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1762 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1764 if (e->quit_requested)
1765 return dispatch_quit(e);
1769 e->state = SD_EVENT_RUNNING;
1771 r = event_prepare(e);
1775 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1779 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1783 if (event_next_pending(e) || e->need_process_child)
1786 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1787 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1789 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1793 dual_timestamp_get(&e->timestamp);
1795 for (i = 0; i < m; i++) {
1797 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1798 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1799 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1800 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1801 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1802 r = process_signal(e, ev_queue[i].events);
1804 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1810 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1814 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1818 if (e->need_process_child) {
1819 r = process_child(e);
1824 p = event_next_pending(e);
1830 r = source_dispatch(p);
1833 e->state = SD_EVENT_PASSIVE;
1839 _public_ int sd_event_loop(sd_event *e) {
1842 assert_return(e, -EINVAL);
1843 assert_return(!event_pid_changed(e), -ECHILD);
1844 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1848 while (e->state != SD_EVENT_FINISHED) {
1849 r = sd_event_run(e, (uint64_t) -1);
1861 _public_ int sd_event_get_state(sd_event *e) {
1862 assert_return(e, -EINVAL);
1863 assert_return(!event_pid_changed(e), -ECHILD);
1868 _public_ int sd_event_get_quit(sd_event *e) {
1869 assert_return(e, -EINVAL);
1870 assert_return(!event_pid_changed(e), -ECHILD);
1872 return e->quit_requested;
1875 _public_ int sd_event_request_quit(sd_event *e) {
1876 assert_return(e, -EINVAL);
1877 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1878 assert_return(!event_pid_changed(e), -ECHILD);
1880 e->quit_requested = true;
1884 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1885 assert_return(e, -EINVAL);
1886 assert_return(usec, -EINVAL);
1887 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1888 assert_return(!event_pid_changed(e), -ECHILD);
1890 *usec = e->timestamp.realtime;
1894 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1895 assert_return(e, -EINVAL);
1896 assert_return(usec, -EINVAL);
1897 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1898 assert_return(!event_pid_changed(e), -ECHILD);
1900 *usec = e->timestamp.monotonic;
1904 _public_ int sd_event_default(sd_event **ret) {
1906 static __thread sd_event *default_event = NULL;
1911 return !!default_event;
1913 if (default_event) {
1914 *ret = sd_event_ref(default_event);
1918 r = sd_event_new(&e);
1922 e->default_event_ptr = &default_event;
1930 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
1931 assert_return(e, -EINVAL);
1932 assert_return(tid, -EINVAL);
1933 assert_return(!event_pid_changed(e), -ECHILD);