1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
18 //#include "fs-util.h"
24 #include "process-util.h"
26 #include "signal-util.h"
27 #include "string-table.h"
28 #include "string-util.h"
29 #include "time-util.h"
32 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
34 typedef enum EventSourceType {
38 SOURCE_TIME_MONOTONIC,
39 SOURCE_TIME_REALTIME_ALARM,
40 SOURCE_TIME_BOOTTIME_ALARM,
48 _SOURCE_EVENT_SOURCE_TYPE_MAX,
49 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
52 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
54 [SOURCE_TIME_REALTIME] = "realtime",
55 [SOURCE_TIME_BOOTTIME] = "bootime",
56 [SOURCE_TIME_MONOTONIC] = "monotonic",
57 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
58 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
59 [SOURCE_SIGNAL] = "signal",
60 [SOURCE_CHILD] = "child",
61 [SOURCE_DEFER] = "defer",
62 [SOURCE_POST] = "post",
63 [SOURCE_EXIT] = "exit",
64 [SOURCE_WATCHDOG] = "watchdog",
65 [SOURCE_INOTIFY] = "inotify",
68 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
70 /* All objects we use in epoll events start with this value, so that
71 * we know how to dispatch it */
72 typedef enum WakeupType {
79 _WAKEUP_TYPE_INVALID = -1,
82 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
86 struct sd_event_source {
93 sd_event_handler_t prepare;
97 EventSourceType type:5;
104 unsigned pending_index;
105 unsigned prepare_index;
106 uint64_t pending_iteration;
107 uint64_t prepare_iteration;
109 LIST_FIELDS(sd_event_source, sources);
113 sd_event_io_handler_t callback;
121 sd_event_time_handler_t callback;
122 usec_t next, accuracy;
123 unsigned earliest_index;
124 unsigned latest_index;
127 sd_event_signal_handler_t callback;
128 struct signalfd_siginfo siginfo;
132 sd_event_child_handler_t callback;
138 sd_event_handler_t callback;
141 sd_event_handler_t callback;
144 sd_event_handler_t callback;
145 unsigned prioq_index;
148 sd_event_inotify_handler_t callback;
150 struct inode_data *inode_data;
151 LIST_FIELDS(sd_event_source, by_inode_data);
160 /* For all clocks we maintain two priority queues each, one
161 * ordered for the earliest times the events may be
162 * dispatched, and one ordered by the latest times they must
163 * have been dispatched. The range between the top entries in
164 * the two prioqs is the time window we can freely schedule
177 /* For each priority we maintain one signal fd, so that we
178 * only have to dequeue a single event per priority at a
184 sd_event_source *current;
187 /* A structure listing all event sources currently watching a specific inode */
189 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
193 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
194 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
195 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
196 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
197 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
201 /* The inotify "watch descriptor" */
204 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
205 * most recently been set on the watch descriptor. */
206 uint32_t combined_mask;
208 /* All event sources subscribed to this inode */
209 LIST_HEAD(sd_event_source, event_sources);
211 /* The inotify object we watch this inode with */
212 struct inotify_data *inotify_data;
214 /* A linked list of all inode data objects with fds to close (see above) */
215 LIST_FIELDS(struct inode_data, to_close);
218 /* A structure encapsulating an inotify fd */
219 struct inotify_data {
222 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
228 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
229 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
231 /* The buffer we read inotify events into */
232 union inotify_event_buffer buffer;
233 size_t buffer_filled; /* fill level of the buffer */
235 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
236 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
237 * the events locally if they can't be coalesced). */
240 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
241 * to make it efficient to figure out what inotify objects to process data on next. */
242 LIST_FIELDS(struct inotify_data, buffered);
254 /* timerfd_create() only supports these five clocks so far. We
255 * can add support for more clocks when the kernel learns to
256 * deal with them, too. */
257 struct clock_data realtime;
258 struct clock_data boottime;
259 struct clock_data monotonic;
260 struct clock_data realtime_alarm;
261 struct clock_data boottime_alarm;
265 sd_event_source **signal_sources; /* indexed by signal number */
266 Hashmap *signal_data; /* indexed by priority */
268 Hashmap *child_sources;
269 unsigned n_enabled_child_sources;
275 Hashmap *inotify_data; /* indexed by priority */
277 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
278 LIST_HEAD(struct inode_data, inode_data_to_close);
280 /* A list of inotify objects that already have events buffered which aren't processed yet */
281 LIST_HEAD(struct inotify_data, inotify_data_buffered);
286 triple_timestamp timestamp;
289 bool exit_requested:1;
290 bool need_process_child:1;
292 bool profile_delays:1;
297 sd_event **default_event_ptr;
299 usec_t watchdog_last, watchdog_period;
303 LIST_HEAD(sd_event_source, sources);
305 usec_t last_run, last_log;
306 unsigned delays[sizeof(usec_t) * 8];
309 static thread_local sd_event *default_event = NULL;
311 static void source_disconnect(sd_event_source *s);
312 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
314 static sd_event *event_resolve(sd_event *e) {
315 return e == SD_EVENT_DEFAULT ? default_event : e;
318 static int pending_prioq_compare(const void *a, const void *b) {
319 const sd_event_source *x = a, *y = b;
324 /* Enabled ones first */
325 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
327 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
330 /* Lower priority values first */
331 if (x->priority < y->priority)
333 if (x->priority > y->priority)
336 /* Older entries first */
337 if (x->pending_iteration < y->pending_iteration)
339 if (x->pending_iteration > y->pending_iteration)
345 static int prepare_prioq_compare(const void *a, const void *b) {
346 const sd_event_source *x = a, *y = b;
351 /* Enabled ones first */
352 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
354 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
357 /* Move most recently prepared ones last, so that we can stop
358 * preparing as soon as we hit one that has already been
359 * prepared in the current iteration */
360 if (x->prepare_iteration < y->prepare_iteration)
362 if (x->prepare_iteration > y->prepare_iteration)
365 /* Lower priority values first */
366 if (x->priority < y->priority)
368 if (x->priority > y->priority)
374 static int earliest_time_prioq_compare(const void *a, const void *b) {
375 const sd_event_source *x = a, *y = b;
377 assert(EVENT_SOURCE_IS_TIME(x->type));
378 assert(x->type == y->type);
380 /* Enabled ones first */
381 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
383 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
386 /* Move the pending ones to the end */
387 if (!x->pending && y->pending)
389 if (x->pending && !y->pending)
393 if (x->time.next < y->time.next)
395 if (x->time.next > y->time.next)
401 static usec_t time_event_source_latest(const sd_event_source *s) {
402 return usec_add(s->time.next, s->time.accuracy);
405 static int latest_time_prioq_compare(const void *a, const void *b) {
406 const sd_event_source *x = a, *y = b;
408 assert(EVENT_SOURCE_IS_TIME(x->type));
409 assert(x->type == y->type);
411 /* Enabled ones first */
412 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
414 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
417 /* Move the pending ones to the end */
418 if (!x->pending && y->pending)
420 if (x->pending && !y->pending)
424 if (time_event_source_latest(x) < time_event_source_latest(y))
426 if (time_event_source_latest(x) > time_event_source_latest(y))
432 static int exit_prioq_compare(const void *a, const void *b) {
433 const sd_event_source *x = a, *y = b;
435 assert(x->type == SOURCE_EXIT);
436 assert(y->type == SOURCE_EXIT);
438 /* Enabled ones first */
439 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
441 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
444 /* Lower priority values first */
445 if (x->priority < y->priority)
447 if (x->priority > y->priority)
453 static void free_clock_data(struct clock_data *d) {
455 assert(d->wakeup == WAKEUP_CLOCK_DATA);
458 prioq_free(d->earliest);
459 prioq_free(d->latest);
462 static void event_free(sd_event *e) {
467 while ((s = e->sources)) {
469 source_disconnect(s);
470 sd_event_source_unref(s);
473 assert(e->n_sources == 0);
475 if (e->default_event_ptr)
476 *(e->default_event_ptr) = NULL;
478 safe_close(e->epoll_fd);
479 safe_close(e->watchdog_fd);
481 free_clock_data(&e->realtime);
482 free_clock_data(&e->boottime);
483 free_clock_data(&e->monotonic);
484 free_clock_data(&e->realtime_alarm);
485 free_clock_data(&e->boottime_alarm);
487 prioq_free(e->pending);
488 prioq_free(e->prepare);
491 free(e->signal_sources);
492 hashmap_free(e->signal_data);
494 hashmap_free(e->inotify_data);
496 hashmap_free(e->child_sources);
497 set_free(e->post_sources);
501 _public_ int sd_event_new(sd_event** ret) {
505 assert_return(ret, -EINVAL);
507 e = new0(sd_event, 1);
512 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
513 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
514 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
515 e->original_pid = getpid_cached();
516 e->perturb = USEC_INFINITY;
518 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
522 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
523 if (e->epoll_fd < 0) {
528 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
530 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
531 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
532 e->profile_delays = true;
543 _public_ sd_event* sd_event_ref(sd_event *e) {
548 assert(e->n_ref >= 1);
554 _public_ sd_event* sd_event_unref(sd_event *e) {
559 assert(e->n_ref >= 1);
568 static bool event_pid_changed(sd_event *e) {
571 /* We don't support people creating an event loop and keeping
572 * it around over a fork(). Let's complain. */
574 return e->original_pid != getpid_cached();
577 static void source_io_unregister(sd_event_source *s) {
581 assert(s->type == SOURCE_IO);
583 if (event_pid_changed(s->event))
586 if (!s->io.registered)
589 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
591 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
592 strna(s->description), event_source_type_to_string(s->type));
594 s->io.registered = false;
597 static int source_io_register(
602 struct epoll_event ev;
606 assert(s->type == SOURCE_IO);
607 assert(enabled != SD_EVENT_OFF);
609 ev = (struct epoll_event) {
610 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
614 if (s->io.registered)
615 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
617 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
621 s->io.registered = true;
626 static clockid_t event_source_type_to_clock(EventSourceType t) {
630 case SOURCE_TIME_REALTIME:
631 return CLOCK_REALTIME;
633 case SOURCE_TIME_BOOTTIME:
634 return CLOCK_BOOTTIME;
636 case SOURCE_TIME_MONOTONIC:
637 return CLOCK_MONOTONIC;
639 case SOURCE_TIME_REALTIME_ALARM:
640 return CLOCK_REALTIME_ALARM;
642 case SOURCE_TIME_BOOTTIME_ALARM:
643 return CLOCK_BOOTTIME_ALARM;
646 return (clockid_t) -1;
650 static EventSourceType clock_to_event_source_type(clockid_t clock) {
655 return SOURCE_TIME_REALTIME;
658 return SOURCE_TIME_BOOTTIME;
660 case CLOCK_MONOTONIC:
661 return SOURCE_TIME_MONOTONIC;
663 case CLOCK_REALTIME_ALARM:
664 return SOURCE_TIME_REALTIME_ALARM;
666 case CLOCK_BOOTTIME_ALARM:
667 return SOURCE_TIME_BOOTTIME_ALARM;
670 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
674 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
679 case SOURCE_TIME_REALTIME:
682 case SOURCE_TIME_BOOTTIME:
685 case SOURCE_TIME_MONOTONIC:
686 return &e->monotonic;
688 case SOURCE_TIME_REALTIME_ALARM:
689 return &e->realtime_alarm;
691 case SOURCE_TIME_BOOTTIME_ALARM:
692 return &e->boottime_alarm;
699 static int event_make_signal_data(
702 struct signal_data **ret) {
704 struct epoll_event ev;
705 struct signal_data *d;
713 if (event_pid_changed(e))
716 if (e->signal_sources && e->signal_sources[sig])
717 priority = e->signal_sources[sig]->priority;
719 priority = SD_EVENT_PRIORITY_NORMAL;
721 d = hashmap_get(e->signal_data, &priority);
723 if (sigismember(&d->sigset, sig) > 0) {
729 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
733 d = new0(struct signal_data, 1);
737 d->wakeup = WAKEUP_SIGNAL_DATA;
739 d->priority = priority;
741 r = hashmap_put(e->signal_data, &d->priority, d);
751 assert_se(sigaddset(&ss_copy, sig) >= 0);
753 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
767 d->fd = fd_move_above_stdio(r);
769 ev = (struct epoll_event) {
774 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
787 d->fd = safe_close(d->fd);
788 hashmap_remove(e->signal_data, &d->priority);
795 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
799 /* Turns off the specified signal in the signal data
800 * object. If the signal mask of the object becomes empty that
803 if (sigismember(&d->sigset, sig) == 0)
806 assert_se(sigdelset(&d->sigset, sig) >= 0);
808 if (sigisemptyset(&d->sigset)) {
810 /* If all the mask is all-zero we can get rid of the structure */
811 hashmap_remove(e->signal_data, &d->priority);
819 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
820 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
823 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
824 struct signal_data *d;
825 static const int64_t zero_priority = 0;
829 /* Rechecks if the specified signal is still something we are
830 * interested in. If not, we'll unmask it, and possibly drop
831 * the signalfd for it. */
833 if (sig == SIGCHLD &&
834 e->n_enabled_child_sources > 0)
837 if (e->signal_sources &&
838 e->signal_sources[sig] &&
839 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
843 * The specified signal might be enabled in three different queues:
845 * 1) the one that belongs to the priority passed (if it is non-NULL)
846 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
847 * 3) the 0 priority (to cover the SIGCHLD case)
849 * Hence, let's remove it from all three here.
853 d = hashmap_get(e->signal_data, priority);
855 event_unmask_signal_data(e, d, sig);
858 if (e->signal_sources && e->signal_sources[sig]) {
859 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
861 event_unmask_signal_data(e, d, sig);
864 d = hashmap_get(e->signal_data, &zero_priority);
866 event_unmask_signal_data(e, d, sig);
869 static void source_disconnect(sd_event_source *s) {
877 assert(s->event->n_sources > 0);
883 source_io_unregister(s);
887 case SOURCE_TIME_REALTIME:
888 case SOURCE_TIME_BOOTTIME:
889 case SOURCE_TIME_MONOTONIC:
890 case SOURCE_TIME_REALTIME_ALARM:
891 case SOURCE_TIME_BOOTTIME_ALARM: {
892 struct clock_data *d;
894 d = event_get_clock_data(s->event, s->type);
897 prioq_remove(d->earliest, s, &s->time.earliest_index);
898 prioq_remove(d->latest, s, &s->time.latest_index);
899 d->needs_rearm = true;
904 if (s->signal.sig > 0) {
906 if (s->event->signal_sources)
907 s->event->signal_sources[s->signal.sig] = NULL;
909 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
915 if (s->child.pid > 0) {
916 if (s->enabled != SD_EVENT_OFF) {
917 assert(s->event->n_enabled_child_sources > 0);
918 s->event->n_enabled_child_sources--;
921 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
922 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
932 set_remove(s->event->post_sources, s);
936 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
939 case SOURCE_INOTIFY: {
940 struct inode_data *inode_data;
942 inode_data = s->inotify.inode_data;
944 struct inotify_data *inotify_data;
945 assert_se(inotify_data = inode_data->inotify_data);
947 /* Detach this event source from the inode object */
948 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
949 s->inotify.inode_data = NULL;
952 assert(inotify_data->n_pending > 0);
953 inotify_data->n_pending--;
956 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
957 * continued to being watched. That's because inotify doesn't really have an API for that: we
958 * can only change watch masks with access to the original inode either by fd or by path. But
959 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
960 * continously and keeping the mount busy which we can't really do. We could reconstruct the
961 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
962 * there), but given the need for open_by_handle_at() which is privileged and not universally
963 * available this would be quite an incomplete solution. Hence we go the other way, leave the
964 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
965 * anymore after reception. Yes, this sucks, but … Linux … */
967 /* Maybe release the inode data (and its inotify) */
968 event_gc_inode_data(s->event, inode_data);
975 assert_not_reached("Wut? I shouldn't exist.");
979 prioq_remove(s->event->pending, s, &s->pending_index);
982 prioq_remove(s->event->prepare, s, &s->prepare_index);
986 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
988 LIST_REMOVE(sources, event->sources, s);
992 sd_event_unref(event);
995 static void source_free(sd_event_source *s) {
998 source_disconnect(s);
1000 if (s->type == SOURCE_IO && s->io.owned)
1001 safe_close(s->io.fd);
1003 free(s->description);
1007 static int source_set_pending(sd_event_source *s, bool b) {
1011 assert(s->type != SOURCE_EXIT);
1013 if (s->pending == b)
1019 s->pending_iteration = s->event->iteration;
1021 r = prioq_put(s->event->pending, s, &s->pending_index);
1027 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1029 if (EVENT_SOURCE_IS_TIME(s->type)) {
1030 struct clock_data *d;
1032 d = event_get_clock_data(s->event, s->type);
1035 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1036 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1037 d->needs_rearm = true;
1040 if (s->type == SOURCE_SIGNAL && !b) {
1041 struct signal_data *d;
1043 d = hashmap_get(s->event->signal_data, &s->priority);
1044 if (d && d->current == s)
1048 if (s->type == SOURCE_INOTIFY) {
1050 assert(s->inotify.inode_data);
1051 assert(s->inotify.inode_data->inotify_data);
1054 s->inotify.inode_data->inotify_data->n_pending ++;
1056 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1057 s->inotify.inode_data->inotify_data->n_pending --;
1064 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1069 s = new0(sd_event_source, 1);
1075 s->floating = floating;
1077 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
1082 LIST_PREPEND(sources, e->sources, s);
1088 _public_ int sd_event_add_io(
1090 sd_event_source **ret,
1093 sd_event_io_handler_t callback,
1099 assert_return(e, -EINVAL);
1100 assert_return(e = event_resolve(e), -ENOPKG);
1101 assert_return(fd >= 0, -EBADF);
1102 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1103 assert_return(callback, -EINVAL);
1104 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1105 assert_return(!event_pid_changed(e), -ECHILD);
1107 s = source_new(e, !ret, SOURCE_IO);
1111 s->wakeup = WAKEUP_EVENT_SOURCE;
1113 s->io.events = events;
1114 s->io.callback = callback;
1115 s->userdata = userdata;
1116 s->enabled = SD_EVENT_ON;
1118 r = source_io_register(s, s->enabled, events);
1130 static void initialize_perturb(sd_event *e) {
1131 sd_id128_t bootid = {};
1133 /* When we sleep for longer, we try to realign the wakeup to
1134 the same time wihtin each minute/second/250ms, so that
1135 events all across the system can be coalesced into a single
1136 CPU wakeup. However, let's take some system-specific
1137 randomness for this value, so that in a network of systems
1138 with synced clocks timer events are distributed a
1139 bit. Here, we calculate a perturbation usec offset from the
1142 if (_likely_(e->perturb != USEC_INFINITY))
1145 if (sd_id128_get_boot(&bootid) >= 0)
1146 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1149 static int event_setup_timer_fd(
1151 struct clock_data *d,
1154 struct epoll_event ev;
1160 if (_likely_(d->fd >= 0))
1163 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1167 fd = fd_move_above_stdio(fd);
1169 ev = (struct epoll_event) {
1174 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1184 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1187 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1190 _public_ int sd_event_add_time(
1192 sd_event_source **ret,
1196 sd_event_time_handler_t callback,
1199 EventSourceType type;
1201 struct clock_data *d;
1204 assert_return(e, -EINVAL);
1205 assert_return(e = event_resolve(e), -ENOPKG);
1206 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1207 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1208 assert_return(!event_pid_changed(e), -ECHILD);
1210 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1213 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1218 callback = time_exit_callback;
1220 d = event_get_clock_data(e, type);
1223 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1227 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1232 r = event_setup_timer_fd(e, d, clock);
1237 s = source_new(e, !ret, type);
1241 s->time.next = usec;
1242 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1243 s->time.callback = callback;
1244 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1245 s->userdata = userdata;
1246 s->enabled = SD_EVENT_ONESHOT;
1248 d->needs_rearm = true;
1250 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1254 r = prioq_put(d->latest, s, &s->time.latest_index);
1268 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1271 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1274 _public_ int sd_event_add_signal(
1276 sd_event_source **ret,
1278 sd_event_signal_handler_t callback,
1282 struct signal_data *d;
1286 assert_return(e, -EINVAL);
1287 assert_return(e = event_resolve(e), -ENOPKG);
1288 assert_return(SIGNAL_VALID(sig), -EINVAL);
1289 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1290 assert_return(!event_pid_changed(e), -ECHILD);
1293 callback = signal_exit_callback;
1295 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1299 if (!sigismember(&ss, sig))
1302 if (!e->signal_sources) {
1303 e->signal_sources = new0(sd_event_source*, _NSIG);
1304 if (!e->signal_sources)
1306 } else if (e->signal_sources[sig])
1309 s = source_new(e, !ret, SOURCE_SIGNAL);
1313 s->signal.sig = sig;
1314 s->signal.callback = callback;
1315 s->userdata = userdata;
1316 s->enabled = SD_EVENT_ON;
1318 e->signal_sources[sig] = s;
1320 r = event_make_signal_data(e, sig, &d);
1326 /* Use the signal name as description for the event source by default */
1327 (void) sd_event_source_set_description(s, signal_to_string(sig));
1335 _public_ int sd_event_add_child(
1337 sd_event_source **ret,
1340 sd_event_child_handler_t callback,
1346 assert_return(e, -EINVAL);
1347 assert_return(e = event_resolve(e), -ENOPKG);
1348 assert_return(pid > 1, -EINVAL);
1349 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1350 assert_return(options != 0, -EINVAL);
1351 assert_return(callback, -EINVAL);
1352 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1353 assert_return(!event_pid_changed(e), -ECHILD);
1355 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1359 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1362 s = source_new(e, !ret, SOURCE_CHILD);
1367 s->child.options = options;
1368 s->child.callback = callback;
1369 s->userdata = userdata;
1370 s->enabled = SD_EVENT_ONESHOT;
1372 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1378 e->n_enabled_child_sources++;
1380 r = event_make_signal_data(e, SIGCHLD, NULL);
1382 e->n_enabled_child_sources--;
1387 e->need_process_child = true;
1395 _public_ int sd_event_add_defer(
1397 sd_event_source **ret,
1398 sd_event_handler_t callback,
1404 assert_return(e, -EINVAL);
1405 assert_return(e = event_resolve(e), -ENOPKG);
1406 assert_return(callback, -EINVAL);
1407 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1408 assert_return(!event_pid_changed(e), -ECHILD);
1410 s = source_new(e, !ret, SOURCE_DEFER);
1414 s->defer.callback = callback;
1415 s->userdata = userdata;
1416 s->enabled = SD_EVENT_ONESHOT;
1418 r = source_set_pending(s, true);
1430 _public_ int sd_event_add_post(
1432 sd_event_source **ret,
1433 sd_event_handler_t callback,
1439 assert_return(e, -EINVAL);
1440 assert_return(e = event_resolve(e), -ENOPKG);
1441 assert_return(callback, -EINVAL);
1442 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1443 assert_return(!event_pid_changed(e), -ECHILD);
1445 r = set_ensure_allocated(&e->post_sources, NULL);
1449 s = source_new(e, !ret, SOURCE_POST);
1453 s->post.callback = callback;
1454 s->userdata = userdata;
1455 s->enabled = SD_EVENT_ON;
1457 r = set_put(e->post_sources, s);
1469 _public_ int sd_event_add_exit(
1471 sd_event_source **ret,
1472 sd_event_handler_t callback,
1478 assert_return(e, -EINVAL);
1479 assert_return(e = event_resolve(e), -ENOPKG);
1480 assert_return(callback, -EINVAL);
1481 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1482 assert_return(!event_pid_changed(e), -ECHILD);
1484 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1488 s = source_new(e, !ret, SOURCE_EXIT);
1492 s->exit.callback = callback;
1493 s->userdata = userdata;
1494 s->exit.prioq_index = PRIOQ_IDX_NULL;
1495 s->enabled = SD_EVENT_ONESHOT;
1497 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1509 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1515 assert(hashmap_isempty(d->inodes));
1516 assert(hashmap_isempty(d->wd));
1518 if (d->buffer_filled > 0)
1519 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1521 hashmap_free(d->inodes);
1522 hashmap_free(d->wd);
1524 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1527 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1528 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1535 static int event_make_inotify_data(
1538 struct inotify_data **ret) {
1540 _cleanup_close_ int fd = -1;
1541 struct inotify_data *d;
1542 struct epoll_event ev;
1547 d = hashmap_get(e->inotify_data, &priority);
1554 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1558 fd = fd_move_above_stdio(fd);
1560 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1564 d = new(struct inotify_data, 1);
1568 *d = (struct inotify_data) {
1569 .wakeup = WAKEUP_INOTIFY_DATA,
1571 .priority = priority,
1574 r = hashmap_put(e->inotify_data, &d->priority, d);
1576 d->fd = safe_close(d->fd);
1581 ev = (struct epoll_event) {
1586 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1588 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1589 * remove the fd from the epoll first, which we don't want as we couldn't
1590 * add it in the first place. */
1591 event_free_inotify_data(e, d);
1601 static int inode_data_compare(const void *a, const void *b) {
1602 const struct inode_data *x = a, *y = b;
1607 if (x->dev < y->dev)
1609 if (x->dev > y->dev)
1612 if (x->ino < y->ino)
1614 if (x->ino > y->ino)
1620 static void inode_data_hash_func(const void *p, struct siphash *state) {
1621 const struct inode_data *d = p;
1625 siphash24_compress(&d->dev, sizeof(d->dev), state);
1626 siphash24_compress(&d->ino, sizeof(d->ino), state);
1629 const struct hash_ops inode_data_hash_ops = {
1630 .hash = inode_data_hash_func,
1631 .compare = inode_data_compare
1634 static void event_free_inode_data(
1636 struct inode_data *d) {
1643 assert(!d->event_sources);
1646 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1650 if (d->inotify_data) {
1653 if (d->inotify_data->fd >= 0) {
1654 /* So here's a problem. At the time this runs the watch descriptor might already be
1655 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1656 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1657 * likely case to happen. */
1659 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1660 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1663 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1666 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1672 static void event_gc_inode_data(
1674 struct inode_data *d) {
1676 struct inotify_data *inotify_data;
1683 if (d->event_sources)
1686 inotify_data = d->inotify_data;
1687 event_free_inode_data(e, d);
1689 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1690 event_free_inotify_data(e, inotify_data);
1693 static int event_make_inode_data(
1695 struct inotify_data *inotify_data,
1698 struct inode_data **ret) {
1700 struct inode_data *d, key;
1704 assert(inotify_data);
1706 key = (struct inode_data) {
1711 d = hashmap_get(inotify_data->inodes, &key);
1719 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1723 d = new(struct inode_data, 1);
1727 *d = (struct inode_data) {
1732 .inotify_data = inotify_data,
1735 r = hashmap_put(inotify_data->inodes, d, d);
1747 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1748 bool excl_unlink = true;
1749 uint32_t combined = 0;
1754 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1755 * the IN_EXCL_UNLINK flag is ANDed instead.
1757 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1758 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1759 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1760 * events we don't care for client-side. */
1762 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1764 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1765 excl_unlink = false;
1767 combined |= s->inotify.mask;
1770 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1773 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1774 uint32_t combined_mask;
1780 combined_mask = inode_data_determine_mask(d);
1782 if (d->wd >= 0 && combined_mask == d->combined_mask)
1785 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1789 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1794 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1796 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1802 } else if (d->wd != wd) {
1804 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1805 (void) inotify_rm_watch(d->fd, wd);
1809 d->combined_mask = combined_mask;
1813 _public_ int sd_event_add_inotify(
1815 sd_event_source **ret,
1818 sd_event_inotify_handler_t callback,
1821 bool rm_inotify = false, rm_inode = false;
1822 struct inotify_data *inotify_data = NULL;
1823 struct inode_data *inode_data = NULL;
1824 _cleanup_close_ int fd = -1;
1829 assert_return(e, -EINVAL);
1830 assert_return(e = event_resolve(e), -ENOPKG);
1831 assert_return(path, -EINVAL);
1832 assert_return(callback, -EINVAL);
1833 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1834 assert_return(!event_pid_changed(e), -ECHILD);
1836 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1837 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1838 * the user can't use them for us. */
1839 if (mask & IN_MASK_ADD)
1842 fd = open(path, O_PATH|O_CLOEXEC|
1843 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1844 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1848 if (fstat(fd, &st) < 0)
1851 s = source_new(e, !ret, SOURCE_INOTIFY);
1855 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1856 s->inotify.mask = mask;
1857 s->inotify.callback = callback;
1858 s->userdata = userdata;
1860 /* Allocate an inotify object for this priority, and an inode object within it */
1861 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1866 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1871 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1872 * the event source, until then, for which we need the original inode. */
1873 if (inode_data->fd < 0) {
1874 inode_data->fd = TAKE_FD(fd);
1875 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1878 /* Link our event source to the inode data object */
1879 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1880 s->inotify.inode_data = inode_data;
1882 rm_inode = rm_inotify = false;
1884 /* Actually realize the watch now */
1885 r = inode_data_realize_watch(e, inode_data);
1889 (void) sd_event_source_set_description(s, path);
1900 event_free_inode_data(e, inode_data);
1903 event_free_inotify_data(e, inotify_data);
1908 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1913 assert(s->n_ref >= 1);
1919 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1924 assert(s->n_ref >= 1);
1927 if (s->n_ref <= 0) {
1928 /* Here's a special hack: when we are called from a
1929 * dispatch handler we won't free the event source
1930 * immediately, but we will detach the fd from the
1931 * epoll. This way it is safe for the caller to unref
1932 * the event source and immediately close the fd, but
1933 * we still retain a valid event source object after
1936 if (s->dispatching) {
1937 if (s->type == SOURCE_IO)
1938 source_io_unregister(s);
1940 source_disconnect(s);
1948 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1949 assert_return(s, -EINVAL);
1950 assert_return(!event_pid_changed(s->event), -ECHILD);
1952 return free_and_strdup(&s->description, description);
1955 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1956 assert_return(s, -EINVAL);
1957 assert_return(description, -EINVAL);
1958 assert_return(s->description, -ENXIO);
1959 assert_return(!event_pid_changed(s->event), -ECHILD);
1961 *description = s->description;
1965 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1966 assert_return(s, NULL);
1971 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1972 assert_return(s, -EINVAL);
1973 assert_return(s->type != SOURCE_EXIT, -EDOM);
1974 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1975 assert_return(!event_pid_changed(s->event), -ECHILD);
1980 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1981 assert_return(s, -EINVAL);
1982 assert_return(s->type == SOURCE_IO, -EDOM);
1983 assert_return(!event_pid_changed(s->event), -ECHILD);
1988 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1991 assert_return(s, -EINVAL);
1992 assert_return(fd >= 0, -EBADF);
1993 assert_return(s->type == SOURCE_IO, -EDOM);
1994 assert_return(!event_pid_changed(s->event), -ECHILD);
1999 if (s->enabled == SD_EVENT_OFF) {
2001 s->io.registered = false;
2005 saved_fd = s->io.fd;
2006 assert(s->io.registered);
2009 s->io.registered = false;
2011 r = source_io_register(s, s->enabled, s->io.events);
2013 s->io.fd = saved_fd;
2014 s->io.registered = true;
2018 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2024 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2025 assert_return(s, -EINVAL);
2026 assert_return(s->type == SOURCE_IO, -EDOM);
2031 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2032 assert_return(s, -EINVAL);
2033 assert_return(s->type == SOURCE_IO, -EDOM);
2039 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2040 assert_return(s, -EINVAL);
2041 assert_return(events, -EINVAL);
2042 assert_return(s->type == SOURCE_IO, -EDOM);
2043 assert_return(!event_pid_changed(s->event), -ECHILD);
2045 *events = s->io.events;
2049 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2052 assert_return(s, -EINVAL);
2053 assert_return(s->type == SOURCE_IO, -EDOM);
2054 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2055 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2056 assert_return(!event_pid_changed(s->event), -ECHILD);
2058 /* edge-triggered updates are never skipped, so we can reset edges */
2059 if (s->io.events == events && !(events & EPOLLET))
2062 r = source_set_pending(s, false);
2066 if (s->enabled != SD_EVENT_OFF) {
2067 r = source_io_register(s, s->enabled, events);
2072 s->io.events = events;
2077 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2078 assert_return(s, -EINVAL);
2079 assert_return(revents, -EINVAL);
2080 assert_return(s->type == SOURCE_IO, -EDOM);
2081 assert_return(s->pending, -ENODATA);
2082 assert_return(!event_pid_changed(s->event), -ECHILD);
2084 *revents = s->io.revents;
2088 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2089 assert_return(s, -EINVAL);
2090 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2091 assert_return(!event_pid_changed(s->event), -ECHILD);
2093 return s->signal.sig;
2096 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2097 assert_return(s, -EINVAL);
2098 assert_return(!event_pid_changed(s->event), -ECHILD);
2100 *priority = s->priority;
2104 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2105 bool rm_inotify = false, rm_inode = false;
2106 struct inotify_data *new_inotify_data = NULL;
2107 struct inode_data *new_inode_data = NULL;
2110 assert_return(s, -EINVAL);
2111 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2112 assert_return(!event_pid_changed(s->event), -ECHILD);
2114 if (s->priority == priority)
2117 if (s->type == SOURCE_INOTIFY) {
2118 struct inode_data *old_inode_data;
2120 assert(s->inotify.inode_data);
2121 old_inode_data = s->inotify.inode_data;
2123 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2124 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2125 * events we allow priority changes only until the first following iteration. */
2126 if (old_inode_data->fd < 0)
2129 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2134 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2139 if (new_inode_data->fd < 0) {
2140 /* Duplicate the fd for the new inode object if we don't have any yet */
2141 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2142 if (new_inode_data->fd < 0) {
2147 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2150 /* Move the event source to the new inode data structure */
2151 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2152 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2153 s->inotify.inode_data = new_inode_data;
2155 /* Now create the new watch */
2156 r = inode_data_realize_watch(s->event, new_inode_data);
2159 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2160 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2161 s->inotify.inode_data = old_inode_data;
2165 s->priority = priority;
2167 event_gc_inode_data(s->event, old_inode_data);
2169 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2170 struct signal_data *old, *d;
2172 /* Move us from the signalfd belonging to the old
2173 * priority to the signalfd of the new priority */
2175 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2177 s->priority = priority;
2179 r = event_make_signal_data(s->event, s->signal.sig, &d);
2181 s->priority = old->priority;
2185 event_unmask_signal_data(s->event, old, s->signal.sig);
2187 s->priority = priority;
2190 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2193 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2195 if (s->type == SOURCE_EXIT)
2196 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2202 event_free_inode_data(s->event, new_inode_data);
2205 event_free_inotify_data(s->event, new_inotify_data);
2210 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2211 assert_return(s, -EINVAL);
2212 assert_return(m, -EINVAL);
2213 assert_return(!event_pid_changed(s->event), -ECHILD);
2219 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2222 assert_return(s, -EINVAL);
2223 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2224 assert_return(!event_pid_changed(s->event), -ECHILD);
2226 /* If we are dead anyway, we are fine with turning off
2227 * sources, but everything else needs to fail. */
2228 if (s->event->state == SD_EVENT_FINISHED)
2229 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2231 if (s->enabled == m)
2234 if (m == SD_EVENT_OFF) {
2236 /* Unset the pending flag when this event source is disabled */
2237 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2238 r = source_set_pending(s, false);
2246 source_io_unregister(s);
2250 case SOURCE_TIME_REALTIME:
2251 case SOURCE_TIME_BOOTTIME:
2252 case SOURCE_TIME_MONOTONIC:
2253 case SOURCE_TIME_REALTIME_ALARM:
2254 case SOURCE_TIME_BOOTTIME_ALARM: {
2255 struct clock_data *d;
2258 d = event_get_clock_data(s->event, s->type);
2261 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2262 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2263 d->needs_rearm = true;
2270 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2276 assert(s->event->n_enabled_child_sources > 0);
2277 s->event->n_enabled_child_sources--;
2279 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2284 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2289 case SOURCE_INOTIFY:
2294 assert_not_reached("Wut? I shouldn't exist.");
2299 /* Unset the pending flag when this event source is enabled */
2300 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2301 r = source_set_pending(s, false);
2309 r = source_io_register(s, m, s->io.events);
2316 case SOURCE_TIME_REALTIME:
2317 case SOURCE_TIME_BOOTTIME:
2318 case SOURCE_TIME_MONOTONIC:
2319 case SOURCE_TIME_REALTIME_ALARM:
2320 case SOURCE_TIME_BOOTTIME_ALARM: {
2321 struct clock_data *d;
2324 d = event_get_clock_data(s->event, s->type);
2327 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2328 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2329 d->needs_rearm = true;
2337 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2339 s->enabled = SD_EVENT_OFF;
2340 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2348 if (s->enabled == SD_EVENT_OFF)
2349 s->event->n_enabled_child_sources++;
2353 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2355 s->enabled = SD_EVENT_OFF;
2356 s->event->n_enabled_child_sources--;
2357 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2365 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2370 case SOURCE_INOTIFY:
2375 assert_not_reached("Wut? I shouldn't exist.");
2380 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2383 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2388 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2389 assert_return(s, -EINVAL);
2390 assert_return(usec, -EINVAL);
2391 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2392 assert_return(!event_pid_changed(s->event), -ECHILD);
2394 *usec = s->time.next;
2398 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2399 struct clock_data *d;
2402 assert_return(s, -EINVAL);
2403 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2404 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2405 assert_return(!event_pid_changed(s->event), -ECHILD);
2407 r = source_set_pending(s, false);
2411 s->time.next = usec;
2413 d = event_get_clock_data(s->event, s->type);
2416 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2417 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2418 d->needs_rearm = true;
2423 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2424 assert_return(s, -EINVAL);
2425 assert_return(usec, -EINVAL);
2426 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2427 assert_return(!event_pid_changed(s->event), -ECHILD);
2429 *usec = s->time.accuracy;
2433 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2434 struct clock_data *d;
2437 assert_return(s, -EINVAL);
2438 assert_return(usec != (uint64_t) -1, -EINVAL);
2439 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2440 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2441 assert_return(!event_pid_changed(s->event), -ECHILD);
2443 r = source_set_pending(s, false);
2448 usec = DEFAULT_ACCURACY_USEC;
2450 s->time.accuracy = usec;
2452 d = event_get_clock_data(s->event, s->type);
2455 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2456 d->needs_rearm = true;
2461 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2462 assert_return(s, -EINVAL);
2463 assert_return(clock, -EINVAL);
2464 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2465 assert_return(!event_pid_changed(s->event), -ECHILD);
2467 *clock = event_source_type_to_clock(s->type);
2471 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2472 assert_return(s, -EINVAL);
2473 assert_return(pid, -EINVAL);
2474 assert_return(s->type == SOURCE_CHILD, -EDOM);
2475 assert_return(!event_pid_changed(s->event), -ECHILD);
2477 *pid = s->child.pid;
2481 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2482 assert_return(s, -EINVAL);
2483 assert_return(mask, -EINVAL);
2484 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2485 assert_return(!event_pid_changed(s->event), -ECHILD);
2487 *mask = s->inotify.mask;
2491 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2494 assert_return(s, -EINVAL);
2495 assert_return(s->type != SOURCE_EXIT, -EDOM);
2496 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2497 assert_return(!event_pid_changed(s->event), -ECHILD);
2499 if (s->prepare == callback)
2502 if (callback && s->prepare) {
2503 s->prepare = callback;
2507 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2511 s->prepare = callback;
2514 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2518 prioq_remove(s->event->prepare, s, &s->prepare_index);
2523 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2524 assert_return(s, NULL);
2529 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2532 assert_return(s, NULL);
2535 s->userdata = userdata;
2540 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2547 if (a >= USEC_INFINITY)
2548 return USEC_INFINITY;
2553 initialize_perturb(e);
2556 Find a good time to wake up again between times a and b. We
2557 have two goals here:
2559 a) We want to wake up as seldom as possible, hence prefer
2560 later times over earlier times.
2562 b) But if we have to wake up, then let's make sure to
2563 dispatch as much as possible on the entire system.
2565 We implement this by waking up everywhere at the same time
2566 within any given minute if we can, synchronised via the
2567 perturbation value determined from the boot ID. If we can't,
2568 then we try to find the same spot in every 10s, then 1s and
2569 then 250ms step. Otherwise, we pick the last possible time
2573 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2575 if (_unlikely_(c < USEC_PER_MINUTE))
2578 c -= USEC_PER_MINUTE;
2584 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2586 if (_unlikely_(c < USEC_PER_SEC*10))
2589 c -= USEC_PER_SEC*10;
2595 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2597 if (_unlikely_(c < USEC_PER_SEC))
2606 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2608 if (_unlikely_(c < USEC_PER_MSEC*250))
2611 c -= USEC_PER_MSEC*250;
2620 static int event_arm_timer(
2622 struct clock_data *d) {
2624 struct itimerspec its = {};
2625 sd_event_source *a, *b;
2632 if (!d->needs_rearm)
2635 d->needs_rearm = false;
2637 a = prioq_peek(d->earliest);
2638 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2643 if (d->next == USEC_INFINITY)
2647 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2651 d->next = USEC_INFINITY;
2655 b = prioq_peek(d->latest);
2656 assert_se(b && b->enabled != SD_EVENT_OFF);
2658 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2662 assert_se(d->fd >= 0);
2665 /* We don' want to disarm here, just mean some time looooong ago. */
2666 its.it_value.tv_sec = 0;
2667 its.it_value.tv_nsec = 1;
2669 timespec_store(&its.it_value, t);
2671 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2679 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2682 assert(s->type == SOURCE_IO);
2684 /* If the event source was already pending, we just OR in the
2685 * new revents, otherwise we reset the value. The ORing is
2686 * necessary to handle EPOLLONESHOT events properly where
2687 * readability might happen independently of writability, and
2688 * we need to keep track of both */
2691 s->io.revents |= revents;
2693 s->io.revents = revents;
2695 return source_set_pending(s, true);
2698 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2705 assert_return(events == EPOLLIN, -EIO);
2707 ss = read(fd, &x, sizeof(x));
2709 if (IN_SET(errno, EAGAIN, EINTR))
2715 if (_unlikely_(ss != sizeof(x)))
2719 *next = USEC_INFINITY;
2724 static int process_timer(
2727 struct clock_data *d) {
2736 s = prioq_peek(d->earliest);
2739 s->enabled == SD_EVENT_OFF ||
2743 r = source_set_pending(s, true);
2747 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2748 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2749 d->needs_rearm = true;
2755 static int process_child(sd_event *e) {
2762 e->need_process_child = false;
2765 So, this is ugly. We iteratively invoke waitid() with P_PID
2766 + WNOHANG for each PID we wait for, instead of using
2767 P_ALL. This is because we only want to get child
2768 information of very specific child processes, and not all
2769 of them. We might not have processed the SIGCHLD even of a
2770 previous invocation and we don't want to maintain a
2771 unbounded *per-child* event queue, hence we really don't
2772 want anything flushed out of the kernel's queue that we
2773 don't care about. Since this is O(n) this means that if you
2774 have a lot of processes you probably want to handle SIGCHLD
2777 We do not reap the children here (by using WNOWAIT), this
2778 is only done after the event source is dispatched so that
2779 the callback still sees the process as a zombie.
2782 HASHMAP_FOREACH(s, e->child_sources, i) {
2783 assert(s->type == SOURCE_CHILD);
2788 if (s->enabled == SD_EVENT_OFF)
2791 zero(s->child.siginfo);
2792 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2793 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2797 if (s->child.siginfo.si_pid != 0) {
2798 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2800 if (!zombie && (s->child.options & WEXITED)) {
2801 /* If the child isn't dead then let's
2802 * immediately remove the state change
2803 * from the queue, since there's no
2804 * benefit in leaving it queued */
2806 assert(s->child.options & (WSTOPPED|WCONTINUED));
2807 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2810 r = source_set_pending(s, true);
2819 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2820 bool read_one = false;
2825 assert_return(events == EPOLLIN, -EIO);
2827 /* If there's a signal queued on this priority and SIGCHLD is
2828 on this priority too, then make sure to recheck the
2829 children we watch. This is because we only ever dequeue
2830 the first signal per priority, and if we dequeue one, and
2831 SIGCHLD might be enqueued later we wouldn't know, but we
2832 might have higher priority children we care about hence we
2833 need to check that explicitly. */
2835 if (sigismember(&d->sigset, SIGCHLD))
2836 e->need_process_child = true;
2838 /* If there's already an event source pending for this
2839 * priority we don't read another */
2844 struct signalfd_siginfo si;
2846 sd_event_source *s = NULL;
2848 n = read(d->fd, &si, sizeof(si));
2850 if (IN_SET(errno, EAGAIN, EINTR))
2856 if (_unlikely_(n != sizeof(si)))
2859 assert(SIGNAL_VALID(si.ssi_signo));
2863 if (e->signal_sources)
2864 s = e->signal_sources[si.ssi_signo];
2870 s->signal.siginfo = si;
2873 r = source_set_pending(s, true);
2881 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2887 assert_return(revents == EPOLLIN, -EIO);
2889 /* If there's already an event source pending for this priority, don't read another */
2890 if (d->n_pending > 0)
2893 /* Is the read buffer non-empty? If so, let's not read more */
2894 if (d->buffer_filled > 0)
2897 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2899 if (IN_SET(errno, EAGAIN, EINTR))
2906 d->buffer_filled = (size_t) n;
2907 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2912 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2915 assert(sz <= d->buffer_filled);
2920 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2921 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2922 d->buffer_filled -= sz;
2924 if (d->buffer_filled == 0)
2925 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2928 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2934 /* If there's already an event source pending for this priority, don't read another */
2935 if (d->n_pending > 0)
2938 while (d->buffer_filled > 0) {
2941 /* Let's validate that the event structures are complete */
2942 if (d->buffer_filled < offsetof(struct inotify_event, name))
2945 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2946 if (d->buffer_filled < sz)
2949 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2950 struct inode_data *inode_data;
2953 /* The queue overran, let's pass this event to all event sources connected to this inotify
2956 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2959 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2961 if (s->enabled == SD_EVENT_OFF)
2964 r = source_set_pending(s, true);
2970 struct inode_data *inode_data;
2973 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2974 * our watch descriptor table. */
2975 if (d->buffer.ev.mask & IN_IGNORED) {
2977 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2979 event_inotify_data_drop(e, d, sz);
2983 /* The watch descriptor was removed by the kernel, let's drop it here too */
2984 inode_data->wd = -1;
2986 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2988 event_inotify_data_drop(e, d, sz);
2993 /* Trigger all event sources that are interested in these events. Also trigger all event
2994 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2995 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2997 if (s->enabled == SD_EVENT_OFF)
3000 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3001 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3004 r = source_set_pending(s, true);
3010 /* Something pending now? If so, let's finish, otherwise let's read more. */
3011 if (d->n_pending > 0)
3018 static int process_inotify(sd_event *e) {
3019 struct inotify_data *d;
3024 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3025 r = event_inotify_data_process(e, d);
3035 static int source_dispatch(sd_event_source *s) {
3036 EventSourceType saved_type;
3040 assert(s->pending || s->type == SOURCE_EXIT);
3042 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3044 saved_type = s->type;
3046 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3047 r = source_set_pending(s, false);
3052 if (s->type != SOURCE_POST) {
3056 /* If we execute a non-post source, let's mark all
3057 * post sources as pending */
3059 SET_FOREACH(z, s->event->post_sources, i) {
3060 if (z->enabled == SD_EVENT_OFF)
3063 r = source_set_pending(z, true);
3069 if (s->enabled == SD_EVENT_ONESHOT) {
3070 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3075 s->dispatching = true;
3080 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3083 case SOURCE_TIME_REALTIME:
3084 case SOURCE_TIME_BOOTTIME:
3085 case SOURCE_TIME_MONOTONIC:
3086 case SOURCE_TIME_REALTIME_ALARM:
3087 case SOURCE_TIME_BOOTTIME_ALARM:
3088 r = s->time.callback(s, s->time.next, s->userdata);
3092 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3095 case SOURCE_CHILD: {
3098 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3100 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3102 /* Now, reap the PID for good. */
3104 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3110 r = s->defer.callback(s, s->userdata);
3114 r = s->post.callback(s, s->userdata);
3118 r = s->exit.callback(s, s->userdata);
3121 case SOURCE_INOTIFY: {
3122 struct sd_event *e = s->event;
3123 struct inotify_data *d;
3126 assert(s->inotify.inode_data);
3127 assert_se(d = s->inotify.inode_data->inotify_data);
3129 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3130 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3131 assert(d->buffer_filled >= sz);
3133 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3135 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3137 if (d->n_pending == 0)
3138 event_inotify_data_drop(e, d, sz);
3143 case SOURCE_WATCHDOG:
3144 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3145 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3146 assert_not_reached("Wut? I shouldn't exist.");
3149 s->dispatching = false;
3152 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3153 strna(s->description), event_source_type_to_string(saved_type));
3158 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3163 static int event_prepare(sd_event *e) {
3171 s = prioq_peek(e->prepare);
3172 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3175 s->prepare_iteration = e->iteration;
3176 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3182 s->dispatching = true;
3183 r = s->prepare(s, s->userdata);
3184 s->dispatching = false;
3187 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3188 strna(s->description), event_source_type_to_string(s->type));
3193 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3199 static int dispatch_exit(sd_event *e) {
3201 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3206 p = prioq_peek(e->exit);
3207 if (!p || p->enabled == SD_EVENT_OFF) {
3208 e->state = SD_EVENT_FINISHED;
3212 ref = sd_event_ref(e);
3214 e->state = SD_EVENT_EXITING;
3215 r = source_dispatch(p);
3216 e->state = SD_EVENT_INITIAL;
3220 static sd_event_source* event_next_pending(sd_event *e) {
3225 p = prioq_peek(e->pending);
3229 if (p->enabled == SD_EVENT_OFF)
3235 static int arm_watchdog(sd_event *e) {
3236 struct itimerspec its = {};
3241 assert(e->watchdog_fd >= 0);
3243 t = sleep_between(e,
3244 e->watchdog_last + (e->watchdog_period / 2),
3245 e->watchdog_last + (e->watchdog_period * 3 / 4));
3247 timespec_store(&its.it_value, t);
3249 /* Make sure we never set the watchdog to 0, which tells the
3250 * kernel to disable it. */
3251 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3252 its.it_value.tv_nsec = 1;
3254 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3261 static int process_watchdog(sd_event *e) {
3267 /* Don't notify watchdog too often */
3268 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3271 sd_notify(false, "WATCHDOG=1");
3272 e->watchdog_last = e->timestamp.monotonic;
3274 return arm_watchdog(e);
3277 static void event_close_inode_data_fds(sd_event *e) {
3278 struct inode_data *d;
3282 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3283 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3284 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3285 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3288 while ((d = e->inode_data_to_close)) {
3290 d->fd = safe_close(d->fd);
3292 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3296 _public_ int sd_event_prepare(sd_event *e) {
3299 assert_return(e, -EINVAL);
3300 assert_return(e = event_resolve(e), -ENOPKG);
3301 assert_return(!event_pid_changed(e), -ECHILD);
3302 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3303 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3305 if (e->exit_requested)
3310 e->state = SD_EVENT_PREPARING;
3311 r = event_prepare(e);
3312 e->state = SD_EVENT_INITIAL;
3316 r = event_arm_timer(e, &e->realtime);
3320 r = event_arm_timer(e, &e->boottime);
3324 r = event_arm_timer(e, &e->monotonic);
3328 r = event_arm_timer(e, &e->realtime_alarm);
3332 r = event_arm_timer(e, &e->boottime_alarm);
3336 event_close_inode_data_fds(e);
3338 if (event_next_pending(e) || e->need_process_child)
3341 e->state = SD_EVENT_ARMED;
3346 e->state = SD_EVENT_ARMED;
3347 r = sd_event_wait(e, 0);
3349 e->state = SD_EVENT_ARMED;
3354 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3355 struct epoll_event *ev_queue;
3356 unsigned ev_queue_max;
3359 assert_return(e, -EINVAL);
3360 assert_return(e = event_resolve(e), -ENOPKG);
3361 assert_return(!event_pid_changed(e), -ECHILD);
3362 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3363 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3365 if (e->exit_requested) {
3366 e->state = SD_EVENT_PENDING;
3370 ev_queue_max = MAX(e->n_sources, 1u);
3371 ev_queue = newa(struct epoll_event, ev_queue_max);
3373 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3374 if (e->inotify_data_buffered)
3377 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3378 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3380 if (errno == EINTR) {
3381 e->state = SD_EVENT_PENDING;
3389 triple_timestamp_get(&e->timestamp);
3391 for (i = 0; i < m; i++) {
3393 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3394 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3396 WakeupType *t = ev_queue[i].data.ptr;
3400 case WAKEUP_EVENT_SOURCE:
3401 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3404 case WAKEUP_CLOCK_DATA: {
3405 struct clock_data *d = ev_queue[i].data.ptr;
3406 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3410 case WAKEUP_SIGNAL_DATA:
3411 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3414 case WAKEUP_INOTIFY_DATA:
3415 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3419 assert_not_reached("Invalid wake-up pointer");
3426 r = process_watchdog(e);
3430 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3434 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3438 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3442 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3446 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3450 if (e->need_process_child) {
3451 r = process_child(e);
3456 r = process_inotify(e);
3460 if (event_next_pending(e)) {
3461 e->state = SD_EVENT_PENDING;
3469 e->state = SD_EVENT_INITIAL;
3474 _public_ int sd_event_dispatch(sd_event *e) {
3478 assert_return(e, -EINVAL);
3479 assert_return(e = event_resolve(e), -ENOPKG);
3480 assert_return(!event_pid_changed(e), -ECHILD);
3481 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3482 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3484 if (e->exit_requested)
3485 return dispatch_exit(e);
3487 p = event_next_pending(e);
3489 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3491 ref = sd_event_ref(e);
3492 e->state = SD_EVENT_RUNNING;
3493 r = source_dispatch(p);
3494 e->state = SD_EVENT_INITIAL;
3498 e->state = SD_EVENT_INITIAL;
3503 static void event_log_delays(sd_event *e) {
3504 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3508 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3509 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3512 log_debug("Event loop iterations: %.*s", o, b);
3515 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3518 assert_return(e, -EINVAL);
3519 assert_return(e = event_resolve(e), -ENOPKG);
3520 assert_return(!event_pid_changed(e), -ECHILD);
3521 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3522 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3524 if (e->profile_delays && e->last_run) {
3528 this_run = now(CLOCK_MONOTONIC);
3530 l = u64log2(this_run - e->last_run);
3531 assert(l < sizeof(e->delays));
3534 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3535 event_log_delays(e);
3536 e->last_log = this_run;
3540 r = sd_event_prepare(e);
3542 /* There was nothing? Then wait... */
3543 r = sd_event_wait(e, timeout);
3545 if (e->profile_delays)
3546 e->last_run = now(CLOCK_MONOTONIC);
3549 /* There's something now, then let's dispatch it */
3550 r = sd_event_dispatch(e);
3560 _public_ int sd_event_loop(sd_event *e) {
3561 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3564 assert_return(e, -EINVAL);
3565 assert_return(e = event_resolve(e), -ENOPKG);
3566 assert_return(!event_pid_changed(e), -ECHILD);
3567 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3569 ref = sd_event_ref(e);
3571 while (e->state != SD_EVENT_FINISHED) {
3572 r = sd_event_run(e, (uint64_t) -1);
3577 return e->exit_code;
3580 _public_ int sd_event_get_fd(sd_event *e) {
3582 assert_return(e, -EINVAL);
3583 assert_return(e = event_resolve(e), -ENOPKG);
3584 assert_return(!event_pid_changed(e), -ECHILD);
3589 _public_ int sd_event_get_state(sd_event *e) {
3590 assert_return(e, -EINVAL);
3591 assert_return(e = event_resolve(e), -ENOPKG);
3592 assert_return(!event_pid_changed(e), -ECHILD);
3597 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3598 assert_return(e, -EINVAL);
3599 assert_return(e = event_resolve(e), -ENOPKG);
3600 assert_return(code, -EINVAL);
3601 assert_return(!event_pid_changed(e), -ECHILD);
3603 if (!e->exit_requested)
3606 *code = e->exit_code;
3610 _public_ int sd_event_exit(sd_event *e, int code) {
3611 assert_return(e, -EINVAL);
3612 assert_return(e = event_resolve(e), -ENOPKG);
3613 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3614 assert_return(!event_pid_changed(e), -ECHILD);
3616 e->exit_requested = true;
3617 e->exit_code = code;
3622 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3623 assert_return(e, -EINVAL);
3624 assert_return(e = event_resolve(e), -ENOPKG);
3625 assert_return(usec, -EINVAL);
3626 assert_return(!event_pid_changed(e), -ECHILD);
3628 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3631 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3632 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3633 * the purpose of getting the time this doesn't matter. */
3634 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3637 if (!triple_timestamp_is_set(&e->timestamp)) {
3638 /* Implicitly fall back to now() if we never ran
3639 * before and thus have no cached time. */
3644 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3648 _public_ int sd_event_default(sd_event **ret) {
3653 return !!default_event;
3655 if (default_event) {
3656 *ret = sd_event_ref(default_event);
3660 r = sd_event_new(&e);
3664 e->default_event_ptr = &default_event;
3672 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3673 assert_return(e, -EINVAL);
3674 assert_return(e = event_resolve(e), -ENOPKG);
3675 assert_return(tid, -EINVAL);
3676 assert_return(!event_pid_changed(e), -ECHILD);
3686 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3689 assert_return(e, -EINVAL);
3690 assert_return(e = event_resolve(e), -ENOPKG);
3691 assert_return(!event_pid_changed(e), -ECHILD);
3693 if (e->watchdog == !!b)
3697 struct epoll_event ev;
3699 r = sd_watchdog_enabled(false, &e->watchdog_period);
3703 /* Issue first ping immediately */
3704 sd_notify(false, "WATCHDOG=1");
3705 e->watchdog_last = now(CLOCK_MONOTONIC);
3707 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3708 if (e->watchdog_fd < 0)
3711 r = arm_watchdog(e);
3715 ev = (struct epoll_event) {
3717 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3720 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3727 if (e->watchdog_fd >= 0) {
3728 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3729 e->watchdog_fd = safe_close(e->watchdog_fd);
3737 e->watchdog_fd = safe_close(e->watchdog_fd);
3741 _public_ int sd_event_get_watchdog(sd_event *e) {
3742 assert_return(e, -EINVAL);
3743 assert_return(e = event_resolve(e), -ENOPKG);
3744 assert_return(!event_pid_changed(e), -ECHILD);
3749 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3750 assert_return(e, -EINVAL);
3751 assert_return(e = event_resolve(e), -ENOPKG);
3752 assert_return(!event_pid_changed(e), -ECHILD);
3754 *ret = e->iteration;