1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
31 #include "time-util.h"
36 #define EPOLL_QUEUE_MAX 64
37 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
39 typedef enum EventSourceType {
49 struct sd_event_source {
54 sd_event_handler_t prepare;
56 EventSourceType type:4;
61 unsigned pending_index;
62 unsigned prepare_index;
63 unsigned pending_iteration;
64 unsigned prepare_iteration;
68 sd_event_io_handler_t callback;
75 sd_event_time_handler_t callback;
76 usec_t next, accuracy;
77 unsigned earliest_index;
78 unsigned latest_index;
81 sd_event_signal_handler_t callback;
82 struct signalfd_siginfo siginfo;
86 sd_event_child_handler_t callback;
92 sd_event_handler_t callback;
95 sd_event_handler_t callback;
112 /* For both clocks we maintain two priority queues each, one
113 * ordered for the earliest times the events may be
114 * dispatched, and one ordered by the latest times they must
115 * have been dispatched. The range between the top entries in
116 * the two prioqs is the time window we can freely schedule
118 Prioq *monotonic_earliest;
119 Prioq *monotonic_latest;
120 Prioq *realtime_earliest;
121 Prioq *realtime_latest;
123 usec_t realtime_next, monotonic_next;
127 sd_event_source **signal_sources;
129 Hashmap *child_sources;
130 unsigned n_enabled_child_sources;
137 dual_timestamp timestamp;
140 bool quit_requested:1;
141 bool need_process_child:1;
144 sd_event **default_event_ptr;
147 static int pending_prioq_compare(const void *a, const void *b) {
148 const sd_event_source *x = a, *y = b;
153 /* Enabled ones first */
154 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
156 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
159 /* Lower priority values first */
160 if (x->priority < y->priority)
162 if (x->priority > y->priority)
165 /* Older entries first */
166 if (x->pending_iteration < y->pending_iteration)
168 if (x->pending_iteration > y->pending_iteration)
171 /* Stability for the rest */
180 static int prepare_prioq_compare(const void *a, const void *b) {
181 const sd_event_source *x = a, *y = b;
186 /* Move most recently prepared ones last, so that we can stop
187 * preparing as soon as we hit one that has already been
188 * prepared in the current iteration */
189 if (x->prepare_iteration < y->prepare_iteration)
191 if (x->prepare_iteration > y->prepare_iteration)
194 /* Enabled ones first */
195 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
197 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
200 /* Lower priority values first */
201 if (x->priority < y->priority)
203 if (x->priority > y->priority)
206 /* Stability for the rest */
215 static int earliest_time_prioq_compare(const void *a, const void *b) {
216 const sd_event_source *x = a, *y = b;
218 assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
219 assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
221 /* Enabled ones first */
222 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
224 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 /* Move the pending ones to the end */
228 if (!x->pending && y->pending)
230 if (x->pending && !y->pending)
234 if (x->time.next < y->time.next)
236 if (x->time.next > y->time.next)
239 /* Stability for the rest */
248 static int latest_time_prioq_compare(const void *a, const void *b) {
249 const sd_event_source *x = a, *y = b;
251 assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
252 (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
254 /* Enabled ones first */
255 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
257 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
260 /* Move the pending ones to the end */
261 if (!x->pending && y->pending)
263 if (x->pending && !y->pending)
267 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
269 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
272 /* Stability for the rest */
281 static int quit_prioq_compare(const void *a, const void *b) {
282 const sd_event_source *x = a, *y = b;
284 assert(x->type == SOURCE_QUIT);
285 assert(y->type == SOURCE_QUIT);
287 /* Enabled ones first */
288 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
290 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
293 /* Lower priority values first */
294 if (x->priority < y->priority)
296 if (x->priority > y->priority)
299 /* Stability for the rest */
308 static void event_free(sd_event *e) {
311 if (e->default_event_ptr)
312 *(e->default_event_ptr) = NULL;
314 if (e->epoll_fd >= 0)
315 close_nointr_nofail(e->epoll_fd);
317 if (e->signal_fd >= 0)
318 close_nointr_nofail(e->signal_fd);
320 if (e->realtime_fd >= 0)
321 close_nointr_nofail(e->realtime_fd);
323 if (e->monotonic_fd >= 0)
324 close_nointr_nofail(e->monotonic_fd);
326 prioq_free(e->pending);
327 prioq_free(e->prepare);
328 prioq_free(e->monotonic_earliest);
329 prioq_free(e->monotonic_latest);
330 prioq_free(e->realtime_earliest);
331 prioq_free(e->realtime_latest);
334 free(e->signal_sources);
336 hashmap_free(e->child_sources);
340 _public_ int sd_event_new(sd_event** ret) {
344 assert_return(ret, -EINVAL);
346 e = new0(sd_event, 1);
351 e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
352 e->realtime_next = e->monotonic_next = (usec_t) -1;
353 e->original_pid = getpid();
355 assert_se(sigemptyset(&e->sigset) == 0);
357 e->pending = prioq_new(pending_prioq_compare);
363 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
364 if (e->epoll_fd < 0) {
377 _public_ sd_event* sd_event_ref(sd_event *e) {
378 assert_return(e, NULL);
380 assert(e->n_ref >= 1);
386 _public_ sd_event* sd_event_unref(sd_event *e) {
387 assert_return(e, NULL);
389 assert(e->n_ref >= 1);
398 static bool event_pid_changed(sd_event *e) {
401 /* We don't support people creating am event loop and keeping
402 * it around over a fork(). Let's complain. */
404 return e->original_pid != getpid();
407 static int source_io_unregister(sd_event_source *s) {
411 assert(s->type == SOURCE_IO);
413 if (!s->io.registered)
416 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
420 s->io.registered = false;
424 static int source_io_register(
429 struct epoll_event ev = {};
433 assert(s->type == SOURCE_IO);
434 assert(enabled != SD_EVENT_OFF);
439 if (enabled == SD_EVENT_ONESHOT)
440 ev.events |= EPOLLONESHOT;
442 if (s->io.registered)
443 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
445 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
450 s->io.registered = true;
455 static void source_free(sd_event_source *s) {
463 source_io_unregister(s);
467 case SOURCE_MONOTONIC:
468 prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
469 prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
472 case SOURCE_REALTIME:
473 prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
474 prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
478 if (s->signal.sig > 0) {
479 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
480 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
482 if (s->event->signal_sources)
483 s->event->signal_sources[s->signal.sig] = NULL;
489 if (s->child.pid > 0) {
490 if (s->enabled != SD_EVENT_OFF) {
491 assert(s->event->n_enabled_child_sources > 0);
492 s->event->n_enabled_child_sources--;
495 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
496 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
498 hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
508 prioq_remove(s->event->quit, s, &s->quit.prioq_index);
513 prioq_remove(s->event->pending, s, &s->pending_index);
516 prioq_remove(s->event->prepare, s, &s->prepare_index);
518 sd_event_unref(s->event);
524 static int source_set_pending(sd_event_source *s, bool b) {
528 assert(s->type != SOURCE_QUIT);
536 s->pending_iteration = s->event->iteration;
538 r = prioq_put(s->event->pending, s, &s->pending_index);
544 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
546 if (s->type == SOURCE_REALTIME) {
547 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
548 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
549 } else if (s->type == SOURCE_MONOTONIC) {
550 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
551 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
557 static sd_event_source *source_new(sd_event *e, EventSourceType type) {
562 s = new0(sd_event_source, 1);
567 s->event = sd_event_ref(e);
569 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
574 _public_ int sd_event_add_io(
578 sd_event_io_handler_t callback,
580 sd_event_source **ret) {
585 assert_return(e, -EINVAL);
586 assert_return(fd >= 0, -EINVAL);
587 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
588 assert_return(callback, -EINVAL);
589 assert_return(ret, -EINVAL);
590 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
591 assert_return(!event_pid_changed(e), -ECHILD);
593 s = source_new(e, SOURCE_IO);
598 s->io.events = events;
599 s->io.callback = callback;
600 s->userdata = userdata;
601 s->enabled = SD_EVENT_ON;
603 r = source_io_register(s, s->enabled, events);
613 static int event_setup_timer_fd(
615 EventSourceType type,
619 struct epoll_event ev = {};
626 if (_likely_(*timer_fd >= 0))
629 fd = timerfd_create(id, TFD_NONBLOCK|TFD_CLOEXEC);
634 ev.data.ptr = INT_TO_PTR(type);
636 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
638 close_nointr_nofail(fd);
642 /* When we sleep for longer, we try to realign the wakeup to
643 the same time wihtin each second, so that events all across
644 the system can be coalesced into a single CPU
645 wakeup. However, let's take some system-specific randomness
646 for this value, so that in a network of systems with synced
647 clocks timer events are distributed a bit. Here, we
648 calculate a perturbation usec offset from the boot ID. */
650 if (sd_id128_get_boot(&bootid) >= 0)
651 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
657 static int event_add_time_internal(
659 EventSourceType type,
666 sd_event_time_handler_t callback,
668 sd_event_source **ret) {
673 assert_return(e, -EINVAL);
674 assert_return(callback, -EINVAL);
675 assert_return(ret, -EINVAL);
676 assert_return(usec != (uint64_t) -1, -EINVAL);
677 assert_return(accuracy != (uint64_t) -1, -EINVAL);
678 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
679 assert_return(!event_pid_changed(e), -ECHILD);
686 *earliest = prioq_new(earliest_time_prioq_compare);
692 *latest = prioq_new(latest_time_prioq_compare);
698 r = event_setup_timer_fd(e, type, timer_fd, id);
703 s = source_new(e, type);
708 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
709 s->time.callback = callback;
710 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
711 s->userdata = userdata;
712 s->enabled = SD_EVENT_ONESHOT;
714 r = prioq_put(*earliest, s, &s->time.earliest_index);
718 r = prioq_put(*latest, s, &s->time.latest_index);
730 _public_ int sd_event_add_monotonic(sd_event *e,
733 sd_event_time_handler_t callback,
735 sd_event_source **ret) {
737 return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
740 _public_ int sd_event_add_realtime(sd_event *e,
743 sd_event_time_handler_t callback,
745 sd_event_source **ret) {
747 return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
750 static int event_update_signal_fd(sd_event *e) {
751 struct epoll_event ev = {};
757 add_to_epoll = e->signal_fd < 0;
759 r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
769 ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
771 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
773 close_nointr_nofail(e->signal_fd);
782 _public_ int sd_event_add_signal(
785 sd_event_signal_handler_t callback,
787 sd_event_source **ret) {
792 assert_return(e, -EINVAL);
793 assert_return(sig > 0, -EINVAL);
794 assert_return(sig < _NSIG, -EINVAL);
795 assert_return(callback, -EINVAL);
796 assert_return(ret, -EINVAL);
797 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
798 assert_return(!event_pid_changed(e), -ECHILD);
800 if (!e->signal_sources) {
801 e->signal_sources = new0(sd_event_source*, _NSIG);
802 if (!e->signal_sources)
804 } else if (e->signal_sources[sig])
807 s = source_new(e, SOURCE_SIGNAL);
812 s->signal.callback = callback;
813 s->userdata = userdata;
814 s->enabled = SD_EVENT_ON;
816 e->signal_sources[sig] = s;
817 assert_se(sigaddset(&e->sigset, sig) == 0);
819 if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
820 r = event_update_signal_fd(e);
831 _public_ int sd_event_add_child(
835 sd_event_child_handler_t callback,
837 sd_event_source **ret) {
842 assert_return(e, -EINVAL);
843 assert_return(pid > 1, -EINVAL);
844 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
845 assert_return(options != 0, -EINVAL);
846 assert_return(callback, -EINVAL);
847 assert_return(ret, -EINVAL);
848 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
849 assert_return(!event_pid_changed(e), -ECHILD);
851 r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
855 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
858 s = source_new(e, SOURCE_CHILD);
863 s->child.options = options;
864 s->child.callback = callback;
865 s->userdata = userdata;
866 s->enabled = SD_EVENT_ONESHOT;
868 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
874 e->n_enabled_child_sources ++;
876 assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
878 if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
879 r = event_update_signal_fd(e);
886 e->need_process_child = true;
892 _public_ int sd_event_add_defer(
894 sd_event_handler_t callback,
896 sd_event_source **ret) {
901 assert_return(e, -EINVAL);
902 assert_return(callback, -EINVAL);
903 assert_return(ret, -EINVAL);
904 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
905 assert_return(!event_pid_changed(e), -ECHILD);
907 s = source_new(e, SOURCE_DEFER);
911 s->defer.callback = callback;
912 s->userdata = userdata;
913 s->enabled = SD_EVENT_ONESHOT;
915 r = source_set_pending(s, true);
925 _public_ int sd_event_add_quit(
927 sd_event_handler_t callback,
929 sd_event_source **ret) {
934 assert_return(e, -EINVAL);
935 assert_return(callback, -EINVAL);
936 assert_return(ret, -EINVAL);
937 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
938 assert_return(!event_pid_changed(e), -ECHILD);
941 e->quit = prioq_new(quit_prioq_compare);
946 s = source_new(e, SOURCE_QUIT);
950 s->quit.callback = callback;
951 s->userdata = userdata;
952 s->quit.prioq_index = PRIOQ_IDX_NULL;
953 s->enabled = SD_EVENT_ONESHOT;
955 r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
965 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
966 assert_return(s, NULL);
968 assert(s->n_ref >= 1);
974 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
975 assert_return(s, NULL);
977 assert(s->n_ref >= 1);
986 _public_ sd_event *sd_event_get(sd_event_source *s) {
987 assert_return(s, NULL);
992 _public_ int sd_event_source_get_pending(sd_event_source *s) {
993 assert_return(s, -EINVAL);
994 assert_return(s->type != SOURCE_QUIT, -EDOM);
995 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
996 assert_return(!event_pid_changed(s->event), -ECHILD);
1001 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1002 assert_return(s, -EINVAL);
1003 assert_return(s->type == SOURCE_IO, -EDOM);
1004 assert_return(!event_pid_changed(s->event), -ECHILD);
1009 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1010 assert_return(s, -EINVAL);
1011 assert_return(events, -EINVAL);
1012 assert_return(s->type == SOURCE_IO, -EDOM);
1013 assert_return(!event_pid_changed(s->event), -ECHILD);
1015 *events = s->io.events;
1019 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1022 assert_return(s, -EINVAL);
1023 assert_return(s->type == SOURCE_IO, -EDOM);
1024 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
1025 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1026 assert_return(!event_pid_changed(s->event), -ECHILD);
1028 if (s->io.events == events)
1031 if (s->enabled != SD_EVENT_OFF) {
1032 r = source_io_register(s, s->enabled, events);
1037 s->io.events = events;
1038 source_set_pending(s, false);
1043 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1044 assert_return(s, -EINVAL);
1045 assert_return(revents, -EINVAL);
1046 assert_return(s->type == SOURCE_IO, -EDOM);
1047 assert_return(s->pending, -ENODATA);
1048 assert_return(!event_pid_changed(s->event), -ECHILD);
1050 *revents = s->io.revents;
1054 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1055 assert_return(s, -EINVAL);
1056 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1057 assert_return(!event_pid_changed(s->event), -ECHILD);
1059 return s->signal.sig;
1062 _public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
1063 assert_return(s, -EINVAL);
1064 assert_return(!event_pid_changed(s->event), -ECHILD);
1069 _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
1070 assert_return(s, -EINVAL);
1071 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1072 assert_return(!event_pid_changed(s->event), -ECHILD);
1074 if (s->priority == priority)
1077 s->priority = priority;
1080 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1083 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1085 if (s->type == SOURCE_QUIT)
1086 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1091 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1092 assert_return(s, -EINVAL);
1093 assert_return(m, -EINVAL);
1094 assert_return(!event_pid_changed(s->event), -ECHILD);
1100 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1103 assert_return(s, -EINVAL);
1104 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1105 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1106 assert_return(!event_pid_changed(s->event), -ECHILD);
1108 if (s->enabled == m)
1111 if (m == SD_EVENT_OFF) {
1116 r = source_io_unregister(s);
1123 case SOURCE_MONOTONIC:
1125 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1126 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1129 case SOURCE_REALTIME:
1131 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1132 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1137 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1138 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
1139 event_update_signal_fd(s->event);
1147 assert(s->event->n_enabled_child_sources > 0);
1148 s->event->n_enabled_child_sources--;
1150 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1151 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
1152 event_update_signal_fd(s->event);
1159 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1171 r = source_io_register(s, m, s->io.events);
1178 case SOURCE_MONOTONIC:
1180 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1181 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1184 case SOURCE_REALTIME:
1186 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1187 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1193 if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
1194 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
1195 event_update_signal_fd(s->event);
1202 if (s->enabled == SD_EVENT_OFF) {
1203 s->event->n_enabled_child_sources++;
1205 if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
1206 assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
1207 event_update_signal_fd(s->event);
1214 prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
1224 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1227 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1232 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1233 assert_return(s, -EINVAL);
1234 assert_return(usec, -EINVAL);
1235 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1236 assert_return(!event_pid_changed(s->event), -ECHILD);
1238 *usec = s->time.next;
1242 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1243 assert_return(s, -EINVAL);
1244 assert_return(usec != (uint64_t) -1, -EINVAL);
1245 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1246 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1247 assert_return(!event_pid_changed(s->event), -ECHILD);
1249 s->time.next = usec;
1251 source_set_pending(s, false);
1253 if (s->type == SOURCE_REALTIME) {
1254 prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
1255 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1257 prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
1258 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1264 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1265 assert_return(s, -EINVAL);
1266 assert_return(usec, -EINVAL);
1267 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1268 assert_return(!event_pid_changed(s->event), -ECHILD);
1270 *usec = s->time.accuracy;
1274 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1275 assert_return(s, -EINVAL);
1276 assert_return(usec != (uint64_t) -1, -EINVAL);
1277 assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
1278 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1279 assert_return(!event_pid_changed(s->event), -ECHILD);
1282 usec = DEFAULT_ACCURACY_USEC;
1284 s->time.accuracy = usec;
1286 source_set_pending(s, false);
1288 if (s->type == SOURCE_REALTIME)
1289 prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
1291 prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
1296 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1297 assert_return(s, -EINVAL);
1298 assert_return(pid, -EINVAL);
1299 assert_return(s->type == SOURCE_CHILD, -EDOM);
1300 assert_return(!event_pid_changed(s->event), -ECHILD);
1302 *pid = s->child.pid;
1306 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1309 assert_return(s, -EINVAL);
1310 assert_return(s->type != SOURCE_QUIT, -EDOM);
1311 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1312 assert_return(!event_pid_changed(s->event), -ECHILD);
1314 if (s->prepare == callback)
1317 if (callback && s->prepare) {
1318 s->prepare = callback;
1322 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1326 s->prepare = callback;
1329 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1333 prioq_remove(s->event->prepare, s, &s->prepare_index);
1338 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1339 assert_return(s, NULL);
1344 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1356 Find a good time to wake up again between times a and b. We
1357 have two goals here:
1359 a) We want to wake up as seldom as possible, hence prefer
1360 later times over earlier times.
1362 b) But if we have to wake up, then let's make sure to
1363 dispatch as much as possible on the entire system.
1365 We implement this by waking up everywhere at the same time
1366 within any given second if we can, synchronised via the
1367 perturbation value determined from the boot ID. If we can't,
1368 then we try to find the same spot in every a 250ms
1369 step. Otherwise, we pick the last possible time to wake up.
1372 c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
1374 if (_unlikely_(c < USEC_PER_SEC))
1383 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1385 if (_unlikely_(c < USEC_PER_MSEC*250))
1388 c -= USEC_PER_MSEC*250;
1397 static int event_arm_timer(
1404 struct itimerspec its = {};
1405 sd_event_source *a, *b;
1412 a = prioq_peek(earliest);
1413 if (!a || a->enabled == SD_EVENT_OFF) {
1415 if (*next == (usec_t) -1)
1419 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1423 *next = (usec_t) -1;
1428 b = prioq_peek(latest);
1429 assert_se(b && b->enabled != SD_EVENT_OFF);
1431 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1435 assert_se(timer_fd >= 0);
1438 /* We don' want to disarm here, just mean some time looooong ago. */
1439 its.it_value.tv_sec = 0;
1440 its.it_value.tv_nsec = 1;
1442 timespec_store(&its.it_value, t);
1444 r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
1452 static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
1455 assert(s->type == SOURCE_IO);
1457 s->io.revents = events;
1459 return source_set_pending(s, true);
1462 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
1470 assert_return(events == EPOLLIN, -EIO);
1472 ss = read(fd, &x, sizeof(x));
1474 if (errno == EAGAIN || errno == EINTR)
1480 if (ss != sizeof(x))
1483 *next = (usec_t) -1;
1488 static int process_timer(
1500 s = prioq_peek(earliest);
1503 s->enabled == SD_EVENT_OFF ||
1507 r = source_set_pending(s, true);
1511 prioq_reshuffle(earliest, s, &s->time.earliest_index);
1512 prioq_reshuffle(latest, s, &s->time.latest_index);
1518 static int process_child(sd_event *e) {
1525 e->need_process_child = false;
1528 So, this is ugly. We iteratively invoke waitid() with P_PID
1529 + WNOHANG for each PID we wait for, instead of using
1530 P_ALL. This is because we only want to get child
1531 information of very specific child processes, and not all
1532 of them. We might not have processed the SIGCHLD even of a
1533 previous invocation and we don't want to maintain a
1534 unbounded *per-child* event queue, hence we really don't
1535 want anything flushed out of the kernel's queue that we
1536 don't care about. Since this is O(n) this means that if you
1537 have a lot of processes you probably want to handle SIGCHLD
1541 HASHMAP_FOREACH(s, e->child_sources, i) {
1542 assert(s->type == SOURCE_CHILD);
1547 if (s->enabled == SD_EVENT_OFF)
1550 zero(s->child.siginfo);
1551 r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
1555 if (s->child.siginfo.si_pid != 0) {
1556 r = source_set_pending(s, true);
1565 static int process_signal(sd_event *e, uint32_t events) {
1566 bool read_one = false;
1570 assert(e->signal_sources);
1572 assert_return(events == EPOLLIN, -EIO);
1575 struct signalfd_siginfo si;
1579 ss = read(e->signal_fd, &si, sizeof(si));
1581 if (errno == EAGAIN || errno == EINTR)
1587 if (ss != sizeof(si))
1592 s = e->signal_sources[si.ssi_signo];
1593 if (si.ssi_signo == SIGCHLD) {
1594 r = process_child(e);
1603 s->signal.siginfo = si;
1604 r = source_set_pending(s, true);
1613 static int source_dispatch(sd_event_source *s) {
1617 assert(s->pending || s->type == SOURCE_QUIT);
1619 if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
1620 r = source_set_pending(s, false);
1625 if (s->enabled == SD_EVENT_ONESHOT) {
1626 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
1631 sd_event_source_ref(s);
1636 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
1639 case SOURCE_MONOTONIC:
1640 r = s->time.callback(s, s->time.next, s->userdata);
1643 case SOURCE_REALTIME:
1644 r = s->time.callback(s, s->time.next, s->userdata);
1648 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
1652 r = s->child.callback(s, &s->child.siginfo, s->userdata);
1656 r = s->defer.callback(s, s->userdata);
1660 r = s->quit.callback(s, s->userdata);
1664 sd_event_source_unref(s);
1669 static int event_prepare(sd_event *e) {
1677 s = prioq_peek(e->prepare);
1678 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
1681 s->prepare_iteration = e->iteration;
1682 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
1687 r = s->prepare(s, s->userdata);
1696 static int dispatch_quit(sd_event *e) {
1702 p = prioq_peek(e->quit);
1703 if (!p || p->enabled == SD_EVENT_OFF) {
1704 e->state = SD_EVENT_FINISHED;
1710 e->state = SD_EVENT_QUITTING;
1712 r = source_dispatch(p);
1714 e->state = SD_EVENT_PASSIVE;
1720 static sd_event_source* event_next_pending(sd_event *e) {
1725 p = prioq_peek(e->pending);
1729 if (p->enabled == SD_EVENT_OFF)
1735 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
1736 struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
1740 assert_return(e, -EINVAL);
1741 assert_return(!event_pid_changed(e), -ECHILD);
1742 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1743 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1745 if (e->quit_requested)
1746 return dispatch_quit(e);
1750 e->state = SD_EVENT_RUNNING;
1752 r = event_prepare(e);
1756 if (event_next_pending(e) || e->need_process_child)
1760 r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
1764 r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
1769 m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
1770 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
1772 r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
1776 dual_timestamp_get(&e->timestamp);
1778 for (i = 0; i < m; i++) {
1780 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
1781 r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
1782 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
1783 r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
1784 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
1785 r = process_signal(e, ev_queue[i].events);
1787 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
1793 r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
1797 r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
1801 if (e->need_process_child) {
1802 r = process_child(e);
1807 p = event_next_pending(e);
1813 r = source_dispatch(p);
1816 e->state = SD_EVENT_PASSIVE;
1822 _public_ int sd_event_loop(sd_event *e) {
1825 assert_return(e, -EINVAL);
1826 assert_return(!event_pid_changed(e), -ECHILD);
1827 assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
1831 while (e->state != SD_EVENT_FINISHED) {
1832 r = sd_event_run(e, (uint64_t) -1);
1844 _public_ int sd_event_get_state(sd_event *e) {
1845 assert_return(e, -EINVAL);
1846 assert_return(!event_pid_changed(e), -ECHILD);
1851 _public_ int sd_event_get_quit(sd_event *e) {
1852 assert_return(e, -EINVAL);
1853 assert_return(!event_pid_changed(e), -ECHILD);
1855 return e->quit_requested;
1858 _public_ int sd_event_request_quit(sd_event *e) {
1859 assert_return(e, -EINVAL);
1860 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1861 assert_return(!event_pid_changed(e), -ECHILD);
1863 e->quit_requested = true;
1867 _public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
1868 assert_return(e, -EINVAL);
1869 assert_return(usec, -EINVAL);
1870 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1871 assert_return(!event_pid_changed(e), -ECHILD);
1873 *usec = e->timestamp.realtime;
1877 _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
1878 assert_return(e, -EINVAL);
1879 assert_return(usec, -EINVAL);
1880 assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
1881 assert_return(!event_pid_changed(e), -ECHILD);
1883 *usec = e->timestamp.monotonic;
1887 _public_ int sd_event_default(sd_event **ret) {
1889 static __thread sd_event *default_event = NULL;
1894 return !!default_event;
1896 if (default_event) {
1897 *ret = sd_event_ref(default_event);
1901 r = sd_event_new(&e);
1905 e->default_event_ptr = &default_event;
1913 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
1914 assert_return(e, -EINVAL);
1915 assert_return(tid, -EINVAL);
1916 assert_return(!event_pid_changed(e), -ECHILD);