1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2013 Lennart Poettering
9 #include <sys/timerfd.h>
12 #include "sd-daemon.h"
16 #include "alloc-util.h"
18 //#include "fs-util.h"
24 #include "process-util.h"
26 #include "signal-util.h"
27 #include "string-table.h"
28 #include "string-util.h"
29 #include "time-util.h"
32 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
34 typedef enum EventSourceType {
38 SOURCE_TIME_MONOTONIC,
39 SOURCE_TIME_REALTIME_ALARM,
40 SOURCE_TIME_BOOTTIME_ALARM,
48 _SOURCE_EVENT_SOURCE_TYPE_MAX,
49 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
52 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
54 [SOURCE_TIME_REALTIME] = "realtime",
55 [SOURCE_TIME_BOOTTIME] = "bootime",
56 [SOURCE_TIME_MONOTONIC] = "monotonic",
57 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
58 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
59 [SOURCE_SIGNAL] = "signal",
60 [SOURCE_CHILD] = "child",
61 [SOURCE_DEFER] = "defer",
62 [SOURCE_POST] = "post",
63 [SOURCE_EXIT] = "exit",
64 [SOURCE_WATCHDOG] = "watchdog",
65 [SOURCE_INOTIFY] = "inotify",
68 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
70 /* All objects we use in epoll events start with this value, so that
71 * we know how to dispatch it */
72 typedef enum WakeupType {
79 _WAKEUP_TYPE_INVALID = -1,
82 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
86 struct sd_event_source {
93 sd_event_handler_t prepare;
97 EventSourceType type:5;
104 unsigned pending_index;
105 unsigned prepare_index;
106 uint64_t pending_iteration;
107 uint64_t prepare_iteration;
109 sd_event_destroy_t destroy_callback;
111 LIST_FIELDS(sd_event_source, sources);
115 sd_event_io_handler_t callback;
123 sd_event_time_handler_t callback;
124 usec_t next, accuracy;
125 unsigned earliest_index;
126 unsigned latest_index;
129 sd_event_signal_handler_t callback;
130 struct signalfd_siginfo siginfo;
134 sd_event_child_handler_t callback;
140 sd_event_handler_t callback;
143 sd_event_handler_t callback;
146 sd_event_handler_t callback;
147 unsigned prioq_index;
150 sd_event_inotify_handler_t callback;
152 struct inode_data *inode_data;
153 LIST_FIELDS(sd_event_source, by_inode_data);
162 /* For all clocks we maintain two priority queues each, one
163 * ordered for the earliest times the events may be
164 * dispatched, and one ordered by the latest times they must
165 * have been dispatched. The range between the top entries in
166 * the two prioqs is the time window we can freely schedule
179 /* For each priority we maintain one signal fd, so that we
180 * only have to dequeue a single event per priority at a
186 sd_event_source *current;
189 /* A structure listing all event sources currently watching a specific inode */
191 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
195 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
196 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
197 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
198 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
199 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
203 /* The inotify "watch descriptor" */
206 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
207 * most recently been set on the watch descriptor. */
208 uint32_t combined_mask;
210 /* All event sources subscribed to this inode */
211 LIST_HEAD(sd_event_source, event_sources);
213 /* The inotify object we watch this inode with */
214 struct inotify_data *inotify_data;
216 /* A linked list of all inode data objects with fds to close (see above) */
217 LIST_FIELDS(struct inode_data, to_close);
220 /* A structure encapsulating an inotify fd */
221 struct inotify_data {
224 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
230 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
231 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
233 /* The buffer we read inotify events into */
234 union inotify_event_buffer buffer;
235 size_t buffer_filled; /* fill level of the buffer */
237 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
238 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
239 * the events locally if they can't be coalesced). */
242 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
243 * to make it efficient to figure out what inotify objects to process data on next. */
244 LIST_FIELDS(struct inotify_data, buffered);
256 /* timerfd_create() only supports these five clocks so far. We
257 * can add support for more clocks when the kernel learns to
258 * deal with them, too. */
259 struct clock_data realtime;
260 struct clock_data boottime;
261 struct clock_data monotonic;
262 struct clock_data realtime_alarm;
263 struct clock_data boottime_alarm;
267 sd_event_source **signal_sources; /* indexed by signal number */
268 Hashmap *signal_data; /* indexed by priority */
270 Hashmap *child_sources;
271 unsigned n_enabled_child_sources;
277 Hashmap *inotify_data; /* indexed by priority */
279 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
280 LIST_HEAD(struct inode_data, inode_data_to_close);
282 /* A list of inotify objects that already have events buffered which aren't processed yet */
283 LIST_HEAD(struct inotify_data, inotify_data_buffered);
288 triple_timestamp timestamp;
291 bool exit_requested:1;
292 bool need_process_child:1;
294 bool profile_delays:1;
299 sd_event **default_event_ptr;
301 usec_t watchdog_last, watchdog_period;
305 LIST_HEAD(sd_event_source, sources);
307 usec_t last_run, last_log;
308 unsigned delays[sizeof(usec_t) * 8];
311 static thread_local sd_event *default_event = NULL;
313 static void source_disconnect(sd_event_source *s);
314 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
316 static sd_event *event_resolve(sd_event *e) {
317 return e == SD_EVENT_DEFAULT ? default_event : e;
320 static int pending_prioq_compare(const void *a, const void *b) {
321 const sd_event_source *x = a, *y = b;
326 /* Enabled ones first */
327 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
329 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
332 /* Lower priority values first */
333 if (x->priority < y->priority)
335 if (x->priority > y->priority)
338 /* Older entries first */
339 if (x->pending_iteration < y->pending_iteration)
341 if (x->pending_iteration > y->pending_iteration)
347 static int prepare_prioq_compare(const void *a, const void *b) {
348 const sd_event_source *x = a, *y = b;
353 /* Enabled ones first */
354 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
356 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
359 /* Move most recently prepared ones last, so that we can stop
360 * preparing as soon as we hit one that has already been
361 * prepared in the current iteration */
362 if (x->prepare_iteration < y->prepare_iteration)
364 if (x->prepare_iteration > y->prepare_iteration)
367 /* Lower priority values first */
368 if (x->priority < y->priority)
370 if (x->priority > y->priority)
376 static int earliest_time_prioq_compare(const void *a, const void *b) {
377 const sd_event_source *x = a, *y = b;
379 assert(EVENT_SOURCE_IS_TIME(x->type));
380 assert(x->type == y->type);
382 /* Enabled ones first */
383 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
385 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
388 /* Move the pending ones to the end */
389 if (!x->pending && y->pending)
391 if (x->pending && !y->pending)
395 if (x->time.next < y->time.next)
397 if (x->time.next > y->time.next)
403 static usec_t time_event_source_latest(const sd_event_source *s) {
404 return usec_add(s->time.next, s->time.accuracy);
407 static int latest_time_prioq_compare(const void *a, const void *b) {
408 const sd_event_source *x = a, *y = b;
410 assert(EVENT_SOURCE_IS_TIME(x->type));
411 assert(x->type == y->type);
413 /* Enabled ones first */
414 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
416 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
419 /* Move the pending ones to the end */
420 if (!x->pending && y->pending)
422 if (x->pending && !y->pending)
426 if (time_event_source_latest(x) < time_event_source_latest(y))
428 if (time_event_source_latest(x) > time_event_source_latest(y))
434 static int exit_prioq_compare(const void *a, const void *b) {
435 const sd_event_source *x = a, *y = b;
437 assert(x->type == SOURCE_EXIT);
438 assert(y->type == SOURCE_EXIT);
440 /* Enabled ones first */
441 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
443 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
446 /* Lower priority values first */
447 if (x->priority < y->priority)
449 if (x->priority > y->priority)
455 static void free_clock_data(struct clock_data *d) {
457 assert(d->wakeup == WAKEUP_CLOCK_DATA);
460 prioq_free(d->earliest);
461 prioq_free(d->latest);
464 static void event_free(sd_event *e) {
469 while ((s = e->sources)) {
471 source_disconnect(s);
472 sd_event_source_unref(s);
475 assert(e->n_sources == 0);
477 if (e->default_event_ptr)
478 *(e->default_event_ptr) = NULL;
480 safe_close(e->epoll_fd);
481 safe_close(e->watchdog_fd);
483 free_clock_data(&e->realtime);
484 free_clock_data(&e->boottime);
485 free_clock_data(&e->monotonic);
486 free_clock_data(&e->realtime_alarm);
487 free_clock_data(&e->boottime_alarm);
489 prioq_free(e->pending);
490 prioq_free(e->prepare);
493 free(e->signal_sources);
494 hashmap_free(e->signal_data);
496 hashmap_free(e->inotify_data);
498 hashmap_free(e->child_sources);
499 set_free(e->post_sources);
503 _public_ int sd_event_new(sd_event** ret) {
507 assert_return(ret, -EINVAL);
509 e = new(sd_event, 1);
517 .realtime.wakeup = WAKEUP_CLOCK_DATA,
519 .realtime.next = USEC_INFINITY,
520 .boottime.wakeup = WAKEUP_CLOCK_DATA,
522 .boottime.next = USEC_INFINITY,
523 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
525 .monotonic.next = USEC_INFINITY,
526 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
527 .realtime_alarm.fd = -1,
528 .realtime_alarm.next = USEC_INFINITY,
529 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
530 .boottime_alarm.fd = -1,
531 .boottime_alarm.next = USEC_INFINITY,
532 .perturb = USEC_INFINITY,
533 .original_pid = getpid_cached(),
536 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
540 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
541 if (e->epoll_fd < 0) {
546 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
548 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
549 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
550 e->profile_delays = true;
561 _public_ sd_event* sd_event_ref(sd_event *e) {
566 assert(e->n_ref >= 1);
572 _public_ sd_event* sd_event_unref(sd_event *e) {
577 assert(e->n_ref >= 1);
586 static bool event_pid_changed(sd_event *e) {
589 /* We don't support people creating an event loop and keeping
590 * it around over a fork(). Let's complain. */
592 return e->original_pid != getpid_cached();
595 static void source_io_unregister(sd_event_source *s) {
599 assert(s->type == SOURCE_IO);
601 if (event_pid_changed(s->event))
604 if (!s->io.registered)
607 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
609 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
610 strna(s->description), event_source_type_to_string(s->type));
612 s->io.registered = false;
615 static int source_io_register(
620 struct epoll_event ev;
624 assert(s->type == SOURCE_IO);
625 assert(enabled != SD_EVENT_OFF);
627 ev = (struct epoll_event) {
628 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
632 if (s->io.registered)
633 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
635 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
639 s->io.registered = true;
644 static clockid_t event_source_type_to_clock(EventSourceType t) {
648 case SOURCE_TIME_REALTIME:
649 return CLOCK_REALTIME;
651 case SOURCE_TIME_BOOTTIME:
652 return CLOCK_BOOTTIME;
654 case SOURCE_TIME_MONOTONIC:
655 return CLOCK_MONOTONIC;
657 case SOURCE_TIME_REALTIME_ALARM:
658 return CLOCK_REALTIME_ALARM;
660 case SOURCE_TIME_BOOTTIME_ALARM:
661 return CLOCK_BOOTTIME_ALARM;
664 return (clockid_t) -1;
668 static EventSourceType clock_to_event_source_type(clockid_t clock) {
673 return SOURCE_TIME_REALTIME;
676 return SOURCE_TIME_BOOTTIME;
678 case CLOCK_MONOTONIC:
679 return SOURCE_TIME_MONOTONIC;
681 case CLOCK_REALTIME_ALARM:
682 return SOURCE_TIME_REALTIME_ALARM;
684 case CLOCK_BOOTTIME_ALARM:
685 return SOURCE_TIME_BOOTTIME_ALARM;
688 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
692 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
697 case SOURCE_TIME_REALTIME:
700 case SOURCE_TIME_BOOTTIME:
703 case SOURCE_TIME_MONOTONIC:
704 return &e->monotonic;
706 case SOURCE_TIME_REALTIME_ALARM:
707 return &e->realtime_alarm;
709 case SOURCE_TIME_BOOTTIME_ALARM:
710 return &e->boottime_alarm;
717 static int event_make_signal_data(
720 struct signal_data **ret) {
722 struct epoll_event ev;
723 struct signal_data *d;
731 if (event_pid_changed(e))
734 if (e->signal_sources && e->signal_sources[sig])
735 priority = e->signal_sources[sig]->priority;
737 priority = SD_EVENT_PRIORITY_NORMAL;
739 d = hashmap_get(e->signal_data, &priority);
741 if (sigismember(&d->sigset, sig) > 0) {
747 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
751 d = new(struct signal_data, 1);
755 *d = (struct signal_data) {
756 .wakeup = WAKEUP_SIGNAL_DATA,
758 .priority = priority,
761 r = hashmap_put(e->signal_data, &d->priority, d);
771 assert_se(sigaddset(&ss_copy, sig) >= 0);
773 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
787 d->fd = fd_move_above_stdio(r);
789 ev = (struct epoll_event) {
794 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
807 d->fd = safe_close(d->fd);
808 hashmap_remove(e->signal_data, &d->priority);
815 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
819 /* Turns off the specified signal in the signal data
820 * object. If the signal mask of the object becomes empty that
823 if (sigismember(&d->sigset, sig) == 0)
826 assert_se(sigdelset(&d->sigset, sig) >= 0);
828 if (sigisemptyset(&d->sigset)) {
830 /* If all the mask is all-zero we can get rid of the structure */
831 hashmap_remove(e->signal_data, &d->priority);
839 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
840 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
843 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
844 struct signal_data *d;
845 static const int64_t zero_priority = 0;
849 /* Rechecks if the specified signal is still something we are
850 * interested in. If not, we'll unmask it, and possibly drop
851 * the signalfd for it. */
853 if (sig == SIGCHLD &&
854 e->n_enabled_child_sources > 0)
857 if (e->signal_sources &&
858 e->signal_sources[sig] &&
859 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
863 * The specified signal might be enabled in three different queues:
865 * 1) the one that belongs to the priority passed (if it is non-NULL)
866 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
867 * 3) the 0 priority (to cover the SIGCHLD case)
869 * Hence, let's remove it from all three here.
873 d = hashmap_get(e->signal_data, priority);
875 event_unmask_signal_data(e, d, sig);
878 if (e->signal_sources && e->signal_sources[sig]) {
879 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
881 event_unmask_signal_data(e, d, sig);
884 d = hashmap_get(e->signal_data, &zero_priority);
886 event_unmask_signal_data(e, d, sig);
889 static void source_disconnect(sd_event_source *s) {
897 assert(s->event->n_sources > 0);
903 source_io_unregister(s);
907 case SOURCE_TIME_REALTIME:
908 case SOURCE_TIME_BOOTTIME:
909 case SOURCE_TIME_MONOTONIC:
910 case SOURCE_TIME_REALTIME_ALARM:
911 case SOURCE_TIME_BOOTTIME_ALARM: {
912 struct clock_data *d;
914 d = event_get_clock_data(s->event, s->type);
917 prioq_remove(d->earliest, s, &s->time.earliest_index);
918 prioq_remove(d->latest, s, &s->time.latest_index);
919 d->needs_rearm = true;
924 if (s->signal.sig > 0) {
926 if (s->event->signal_sources)
927 s->event->signal_sources[s->signal.sig] = NULL;
929 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
935 if (s->child.pid > 0) {
936 if (s->enabled != SD_EVENT_OFF) {
937 assert(s->event->n_enabled_child_sources > 0);
938 s->event->n_enabled_child_sources--;
941 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
942 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
952 set_remove(s->event->post_sources, s);
956 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
959 case SOURCE_INOTIFY: {
960 struct inode_data *inode_data;
962 inode_data = s->inotify.inode_data;
964 struct inotify_data *inotify_data;
965 assert_se(inotify_data = inode_data->inotify_data);
967 /* Detach this event source from the inode object */
968 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
969 s->inotify.inode_data = NULL;
972 assert(inotify_data->n_pending > 0);
973 inotify_data->n_pending--;
976 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
977 * continued to being watched. That's because inotify doesn't really have an API for that: we
978 * can only change watch masks with access to the original inode either by fd or by path. But
979 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
980 * continously and keeping the mount busy which we can't really do. We could reconstruct the
981 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
982 * there), but given the need for open_by_handle_at() which is privileged and not universally
983 * available this would be quite an incomplete solution. Hence we go the other way, leave the
984 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
985 * anymore after reception. Yes, this sucks, but … Linux … */
987 /* Maybe release the inode data (and its inotify) */
988 event_gc_inode_data(s->event, inode_data);
995 assert_not_reached("Wut? I shouldn't exist.");
999 prioq_remove(s->event->pending, s, &s->pending_index);
1002 prioq_remove(s->event->prepare, s, &s->prepare_index);
1006 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
1008 LIST_REMOVE(sources, event->sources, s);
1012 sd_event_unref(event);
1015 static void source_free(sd_event_source *s) {
1018 source_disconnect(s);
1020 if (s->type == SOURCE_IO && s->io.owned)
1021 s->io.fd = safe_close(s->io.fd);
1023 if (s->destroy_callback)
1024 s->destroy_callback(s->userdata);
1026 free(s->description);
1030 static int source_set_pending(sd_event_source *s, bool b) {
1034 assert(s->type != SOURCE_EXIT);
1036 if (s->pending == b)
1042 s->pending_iteration = s->event->iteration;
1044 r = prioq_put(s->event->pending, s, &s->pending_index);
1050 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1052 if (EVENT_SOURCE_IS_TIME(s->type)) {
1053 struct clock_data *d;
1055 d = event_get_clock_data(s->event, s->type);
1058 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1059 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1060 d->needs_rearm = true;
1063 if (s->type == SOURCE_SIGNAL && !b) {
1064 struct signal_data *d;
1066 d = hashmap_get(s->event->signal_data, &s->priority);
1067 if (d && d->current == s)
1071 if (s->type == SOURCE_INOTIFY) {
1073 assert(s->inotify.inode_data);
1074 assert(s->inotify.inode_data->inotify_data);
1077 s->inotify.inode_data->inotify_data->n_pending ++;
1079 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1080 s->inotify.inode_data->inotify_data->n_pending --;
1087 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1092 s = new(sd_event_source, 1);
1096 *s = (struct sd_event_source) {
1099 .floating = floating,
1101 .pending_index = PRIOQ_IDX_NULL,
1102 .prepare_index = PRIOQ_IDX_NULL,
1108 LIST_PREPEND(sources, e->sources, s);
1114 _public_ int sd_event_add_io(
1116 sd_event_source **ret,
1119 sd_event_io_handler_t callback,
1125 assert_return(e, -EINVAL);
1126 assert_return(e = event_resolve(e), -ENOPKG);
1127 assert_return(fd >= 0, -EBADF);
1128 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1129 assert_return(callback, -EINVAL);
1130 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1131 assert_return(!event_pid_changed(e), -ECHILD);
1133 s = source_new(e, !ret, SOURCE_IO);
1137 s->wakeup = WAKEUP_EVENT_SOURCE;
1139 s->io.events = events;
1140 s->io.callback = callback;
1141 s->userdata = userdata;
1142 s->enabled = SD_EVENT_ON;
1144 r = source_io_register(s, s->enabled, events);
1156 static void initialize_perturb(sd_event *e) {
1157 sd_id128_t bootid = {};
1159 /* When we sleep for longer, we try to realign the wakeup to
1160 the same time wihtin each minute/second/250ms, so that
1161 events all across the system can be coalesced into a single
1162 CPU wakeup. However, let's take some system-specific
1163 randomness for this value, so that in a network of systems
1164 with synced clocks timer events are distributed a
1165 bit. Here, we calculate a perturbation usec offset from the
1168 if (_likely_(e->perturb != USEC_INFINITY))
1171 if (sd_id128_get_boot(&bootid) >= 0)
1172 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1175 static int event_setup_timer_fd(
1177 struct clock_data *d,
1180 struct epoll_event ev;
1186 if (_likely_(d->fd >= 0))
1189 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1193 fd = fd_move_above_stdio(fd);
1195 ev = (struct epoll_event) {
1200 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1210 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1213 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1216 _public_ int sd_event_add_time(
1218 sd_event_source **ret,
1222 sd_event_time_handler_t callback,
1225 EventSourceType type;
1227 struct clock_data *d;
1230 assert_return(e, -EINVAL);
1231 assert_return(e = event_resolve(e), -ENOPKG);
1232 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1233 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1234 assert_return(!event_pid_changed(e), -ECHILD);
1236 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1239 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1244 callback = time_exit_callback;
1246 d = event_get_clock_data(e, type);
1249 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1253 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1258 r = event_setup_timer_fd(e, d, clock);
1263 s = source_new(e, !ret, type);
1267 s->time.next = usec;
1268 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1269 s->time.callback = callback;
1270 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1271 s->userdata = userdata;
1272 s->enabled = SD_EVENT_ONESHOT;
1274 d->needs_rearm = true;
1276 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1280 r = prioq_put(d->latest, s, &s->time.latest_index);
1294 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1297 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1300 _public_ int sd_event_add_signal(
1302 sd_event_source **ret,
1304 sd_event_signal_handler_t callback,
1308 struct signal_data *d;
1312 assert_return(e, -EINVAL);
1313 assert_return(e = event_resolve(e), -ENOPKG);
1314 assert_return(SIGNAL_VALID(sig), -EINVAL);
1315 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1316 assert_return(!event_pid_changed(e), -ECHILD);
1319 callback = signal_exit_callback;
1321 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1325 if (!sigismember(&ss, sig))
1328 if (!e->signal_sources) {
1329 e->signal_sources = new0(sd_event_source*, _NSIG);
1330 if (!e->signal_sources)
1332 } else if (e->signal_sources[sig])
1335 s = source_new(e, !ret, SOURCE_SIGNAL);
1339 s->signal.sig = sig;
1340 s->signal.callback = callback;
1341 s->userdata = userdata;
1342 s->enabled = SD_EVENT_ON;
1344 e->signal_sources[sig] = s;
1346 r = event_make_signal_data(e, sig, &d);
1352 /* Use the signal name as description for the event source by default */
1353 (void) sd_event_source_set_description(s, signal_to_string(sig));
1361 _public_ int sd_event_add_child(
1363 sd_event_source **ret,
1366 sd_event_child_handler_t callback,
1372 assert_return(e, -EINVAL);
1373 assert_return(e = event_resolve(e), -ENOPKG);
1374 assert_return(pid > 1, -EINVAL);
1375 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1376 assert_return(options != 0, -EINVAL);
1377 assert_return(callback, -EINVAL);
1378 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1379 assert_return(!event_pid_changed(e), -ECHILD);
1381 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1385 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1388 s = source_new(e, !ret, SOURCE_CHILD);
1393 s->child.options = options;
1394 s->child.callback = callback;
1395 s->userdata = userdata;
1396 s->enabled = SD_EVENT_ONESHOT;
1398 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1404 e->n_enabled_child_sources++;
1406 r = event_make_signal_data(e, SIGCHLD, NULL);
1408 e->n_enabled_child_sources--;
1413 e->need_process_child = true;
1421 _public_ int sd_event_add_defer(
1423 sd_event_source **ret,
1424 sd_event_handler_t callback,
1430 assert_return(e, -EINVAL);
1431 assert_return(e = event_resolve(e), -ENOPKG);
1432 assert_return(callback, -EINVAL);
1433 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1434 assert_return(!event_pid_changed(e), -ECHILD);
1436 s = source_new(e, !ret, SOURCE_DEFER);
1440 s->defer.callback = callback;
1441 s->userdata = userdata;
1442 s->enabled = SD_EVENT_ONESHOT;
1444 r = source_set_pending(s, true);
1456 _public_ int sd_event_add_post(
1458 sd_event_source **ret,
1459 sd_event_handler_t callback,
1465 assert_return(e, -EINVAL);
1466 assert_return(e = event_resolve(e), -ENOPKG);
1467 assert_return(callback, -EINVAL);
1468 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1469 assert_return(!event_pid_changed(e), -ECHILD);
1471 r = set_ensure_allocated(&e->post_sources, NULL);
1475 s = source_new(e, !ret, SOURCE_POST);
1479 s->post.callback = callback;
1480 s->userdata = userdata;
1481 s->enabled = SD_EVENT_ON;
1483 r = set_put(e->post_sources, s);
1495 _public_ int sd_event_add_exit(
1497 sd_event_source **ret,
1498 sd_event_handler_t callback,
1504 assert_return(e, -EINVAL);
1505 assert_return(e = event_resolve(e), -ENOPKG);
1506 assert_return(callback, -EINVAL);
1507 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1508 assert_return(!event_pid_changed(e), -ECHILD);
1510 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1514 s = source_new(e, !ret, SOURCE_EXIT);
1518 s->exit.callback = callback;
1519 s->userdata = userdata;
1520 s->exit.prioq_index = PRIOQ_IDX_NULL;
1521 s->enabled = SD_EVENT_ONESHOT;
1523 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1535 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1541 assert(hashmap_isempty(d->inodes));
1542 assert(hashmap_isempty(d->wd));
1544 if (d->buffer_filled > 0)
1545 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1547 hashmap_free(d->inodes);
1548 hashmap_free(d->wd);
1550 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1553 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1554 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1561 static int event_make_inotify_data(
1564 struct inotify_data **ret) {
1566 _cleanup_close_ int fd = -1;
1567 struct inotify_data *d;
1568 struct epoll_event ev;
1573 d = hashmap_get(e->inotify_data, &priority);
1580 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1584 fd = fd_move_above_stdio(fd);
1586 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1590 d = new(struct inotify_data, 1);
1594 *d = (struct inotify_data) {
1595 .wakeup = WAKEUP_INOTIFY_DATA,
1597 .priority = priority,
1600 r = hashmap_put(e->inotify_data, &d->priority, d);
1602 d->fd = safe_close(d->fd);
1607 ev = (struct epoll_event) {
1612 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1614 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1615 * remove the fd from the epoll first, which we don't want as we couldn't
1616 * add it in the first place. */
1617 event_free_inotify_data(e, d);
1627 static int inode_data_compare(const void *a, const void *b) {
1628 const struct inode_data *x = a, *y = b;
1633 if (x->dev < y->dev)
1635 if (x->dev > y->dev)
1638 if (x->ino < y->ino)
1640 if (x->ino > y->ino)
1646 static void inode_data_hash_func(const void *p, struct siphash *state) {
1647 const struct inode_data *d = p;
1651 siphash24_compress(&d->dev, sizeof(d->dev), state);
1652 siphash24_compress(&d->ino, sizeof(d->ino), state);
1655 const struct hash_ops inode_data_hash_ops = {
1656 .hash = inode_data_hash_func,
1657 .compare = inode_data_compare
1660 static void event_free_inode_data(
1662 struct inode_data *d) {
1669 assert(!d->event_sources);
1672 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1676 if (d->inotify_data) {
1679 if (d->inotify_data->fd >= 0) {
1680 /* So here's a problem. At the time this runs the watch descriptor might already be
1681 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1682 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1683 * likely case to happen. */
1685 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1686 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1689 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1692 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1698 static void event_gc_inode_data(
1700 struct inode_data *d) {
1702 struct inotify_data *inotify_data;
1709 if (d->event_sources)
1712 inotify_data = d->inotify_data;
1713 event_free_inode_data(e, d);
1715 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1716 event_free_inotify_data(e, inotify_data);
1719 static int event_make_inode_data(
1721 struct inotify_data *inotify_data,
1724 struct inode_data **ret) {
1726 struct inode_data *d, key;
1730 assert(inotify_data);
1732 key = (struct inode_data) {
1737 d = hashmap_get(inotify_data->inodes, &key);
1745 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1749 d = new(struct inode_data, 1);
1753 *d = (struct inode_data) {
1758 .inotify_data = inotify_data,
1761 r = hashmap_put(inotify_data->inodes, d, d);
1773 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1774 bool excl_unlink = true;
1775 uint32_t combined = 0;
1780 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1781 * the IN_EXCL_UNLINK flag is ANDed instead.
1783 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1784 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1785 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1786 * events we don't care for client-side. */
1788 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1790 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1791 excl_unlink = false;
1793 combined |= s->inotify.mask;
1796 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1799 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1800 uint32_t combined_mask;
1806 combined_mask = inode_data_determine_mask(d);
1808 if (d->wd >= 0 && combined_mask == d->combined_mask)
1811 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1815 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1820 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1822 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1828 } else if (d->wd != wd) {
1830 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1831 (void) inotify_rm_watch(d->fd, wd);
1835 d->combined_mask = combined_mask;
1839 _public_ int sd_event_add_inotify(
1841 sd_event_source **ret,
1844 sd_event_inotify_handler_t callback,
1847 bool rm_inotify = false, rm_inode = false;
1848 struct inotify_data *inotify_data = NULL;
1849 struct inode_data *inode_data = NULL;
1850 _cleanup_close_ int fd = -1;
1855 assert_return(e, -EINVAL);
1856 assert_return(e = event_resolve(e), -ENOPKG);
1857 assert_return(path, -EINVAL);
1858 assert_return(callback, -EINVAL);
1859 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1860 assert_return(!event_pid_changed(e), -ECHILD);
1862 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1863 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1864 * the user can't use them for us. */
1865 if (mask & IN_MASK_ADD)
1868 fd = open(path, O_PATH|O_CLOEXEC|
1869 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1870 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1874 if (fstat(fd, &st) < 0)
1877 s = source_new(e, !ret, SOURCE_INOTIFY);
1881 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1882 s->inotify.mask = mask;
1883 s->inotify.callback = callback;
1884 s->userdata = userdata;
1886 /* Allocate an inotify object for this priority, and an inode object within it */
1887 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1892 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1897 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1898 * the event source, until then, for which we need the original inode. */
1899 if (inode_data->fd < 0) {
1900 inode_data->fd = TAKE_FD(fd);
1901 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1904 /* Link our event source to the inode data object */
1905 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1906 s->inotify.inode_data = inode_data;
1908 rm_inode = rm_inotify = false;
1910 /* Actually realize the watch now */
1911 r = inode_data_realize_watch(e, inode_data);
1915 (void) sd_event_source_set_description(s, path);
1926 event_free_inode_data(e, inode_data);
1929 event_free_inotify_data(e, inotify_data);
1934 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1939 assert(s->n_ref >= 1);
1945 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1950 assert(s->n_ref >= 1);
1953 if (s->n_ref <= 0) {
1954 /* Here's a special hack: when we are called from a
1955 * dispatch handler we won't free the event source
1956 * immediately, but we will detach the fd from the
1957 * epoll. This way it is safe for the caller to unref
1958 * the event source and immediately close the fd, but
1959 * we still retain a valid event source object after
1962 if (s->dispatching) {
1963 if (s->type == SOURCE_IO)
1964 source_io_unregister(s);
1966 source_disconnect(s);
1974 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1975 assert_return(s, -EINVAL);
1976 assert_return(!event_pid_changed(s->event), -ECHILD);
1978 return free_and_strdup(&s->description, description);
1981 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1982 assert_return(s, -EINVAL);
1983 assert_return(description, -EINVAL);
1984 assert_return(s->description, -ENXIO);
1985 assert_return(!event_pid_changed(s->event), -ECHILD);
1987 *description = s->description;
1991 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1992 assert_return(s, NULL);
1997 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1998 assert_return(s, -EINVAL);
1999 assert_return(s->type != SOURCE_EXIT, -EDOM);
2000 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2001 assert_return(!event_pid_changed(s->event), -ECHILD);
2006 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2007 assert_return(s, -EINVAL);
2008 assert_return(s->type == SOURCE_IO, -EDOM);
2009 assert_return(!event_pid_changed(s->event), -ECHILD);
2014 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2017 assert_return(s, -EINVAL);
2018 assert_return(fd >= 0, -EBADF);
2019 assert_return(s->type == SOURCE_IO, -EDOM);
2020 assert_return(!event_pid_changed(s->event), -ECHILD);
2025 if (s->enabled == SD_EVENT_OFF) {
2027 s->io.registered = false;
2031 saved_fd = s->io.fd;
2032 assert(s->io.registered);
2035 s->io.registered = false;
2037 r = source_io_register(s, s->enabled, s->io.events);
2039 s->io.fd = saved_fd;
2040 s->io.registered = true;
2044 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2050 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2051 assert_return(s, -EINVAL);
2052 assert_return(s->type == SOURCE_IO, -EDOM);
2057 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2058 assert_return(s, -EINVAL);
2059 assert_return(s->type == SOURCE_IO, -EDOM);
2065 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2066 assert_return(s, -EINVAL);
2067 assert_return(events, -EINVAL);
2068 assert_return(s->type == SOURCE_IO, -EDOM);
2069 assert_return(!event_pid_changed(s->event), -ECHILD);
2071 *events = s->io.events;
2075 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2078 assert_return(s, -EINVAL);
2079 assert_return(s->type == SOURCE_IO, -EDOM);
2080 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2081 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2082 assert_return(!event_pid_changed(s->event), -ECHILD);
2084 /* edge-triggered updates are never skipped, so we can reset edges */
2085 if (s->io.events == events && !(events & EPOLLET))
2088 r = source_set_pending(s, false);
2092 if (s->enabled != SD_EVENT_OFF) {
2093 r = source_io_register(s, s->enabled, events);
2098 s->io.events = events;
2103 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2104 assert_return(s, -EINVAL);
2105 assert_return(revents, -EINVAL);
2106 assert_return(s->type == SOURCE_IO, -EDOM);
2107 assert_return(s->pending, -ENODATA);
2108 assert_return(!event_pid_changed(s->event), -ECHILD);
2110 *revents = s->io.revents;
2114 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2115 assert_return(s, -EINVAL);
2116 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2117 assert_return(!event_pid_changed(s->event), -ECHILD);
2119 return s->signal.sig;
2122 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2123 assert_return(s, -EINVAL);
2124 assert_return(!event_pid_changed(s->event), -ECHILD);
2126 *priority = s->priority;
2130 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2131 bool rm_inotify = false, rm_inode = false;
2132 struct inotify_data *new_inotify_data = NULL;
2133 struct inode_data *new_inode_data = NULL;
2136 assert_return(s, -EINVAL);
2137 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2138 assert_return(!event_pid_changed(s->event), -ECHILD);
2140 if (s->priority == priority)
2143 if (s->type == SOURCE_INOTIFY) {
2144 struct inode_data *old_inode_data;
2146 assert(s->inotify.inode_data);
2147 old_inode_data = s->inotify.inode_data;
2149 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2150 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2151 * events we allow priority changes only until the first following iteration. */
2152 if (old_inode_data->fd < 0)
2155 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2160 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2165 if (new_inode_data->fd < 0) {
2166 /* Duplicate the fd for the new inode object if we don't have any yet */
2167 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2168 if (new_inode_data->fd < 0) {
2173 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2176 /* Move the event source to the new inode data structure */
2177 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2178 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2179 s->inotify.inode_data = new_inode_data;
2181 /* Now create the new watch */
2182 r = inode_data_realize_watch(s->event, new_inode_data);
2185 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2186 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2187 s->inotify.inode_data = old_inode_data;
2191 s->priority = priority;
2193 event_gc_inode_data(s->event, old_inode_data);
2195 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2196 struct signal_data *old, *d;
2198 /* Move us from the signalfd belonging to the old
2199 * priority to the signalfd of the new priority */
2201 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2203 s->priority = priority;
2205 r = event_make_signal_data(s->event, s->signal.sig, &d);
2207 s->priority = old->priority;
2211 event_unmask_signal_data(s->event, old, s->signal.sig);
2213 s->priority = priority;
2216 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2219 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2221 if (s->type == SOURCE_EXIT)
2222 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2228 event_free_inode_data(s->event, new_inode_data);
2231 event_free_inotify_data(s->event, new_inotify_data);
2236 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2237 assert_return(s, -EINVAL);
2238 assert_return(m, -EINVAL);
2239 assert_return(!event_pid_changed(s->event), -ECHILD);
2245 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2248 assert_return(s, -EINVAL);
2249 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2250 assert_return(!event_pid_changed(s->event), -ECHILD);
2252 /* If we are dead anyway, we are fine with turning off
2253 * sources, but everything else needs to fail. */
2254 if (s->event->state == SD_EVENT_FINISHED)
2255 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2257 if (s->enabled == m)
2260 if (m == SD_EVENT_OFF) {
2262 /* Unset the pending flag when this event source is disabled */
2263 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2264 r = source_set_pending(s, false);
2272 source_io_unregister(s);
2276 case SOURCE_TIME_REALTIME:
2277 case SOURCE_TIME_BOOTTIME:
2278 case SOURCE_TIME_MONOTONIC:
2279 case SOURCE_TIME_REALTIME_ALARM:
2280 case SOURCE_TIME_BOOTTIME_ALARM: {
2281 struct clock_data *d;
2284 d = event_get_clock_data(s->event, s->type);
2287 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2288 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2289 d->needs_rearm = true;
2296 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2302 assert(s->event->n_enabled_child_sources > 0);
2303 s->event->n_enabled_child_sources--;
2305 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2310 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2315 case SOURCE_INOTIFY:
2320 assert_not_reached("Wut? I shouldn't exist.");
2325 /* Unset the pending flag when this event source is enabled */
2326 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2327 r = source_set_pending(s, false);
2335 r = source_io_register(s, m, s->io.events);
2342 case SOURCE_TIME_REALTIME:
2343 case SOURCE_TIME_BOOTTIME:
2344 case SOURCE_TIME_MONOTONIC:
2345 case SOURCE_TIME_REALTIME_ALARM:
2346 case SOURCE_TIME_BOOTTIME_ALARM: {
2347 struct clock_data *d;
2350 d = event_get_clock_data(s->event, s->type);
2353 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2354 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2355 d->needs_rearm = true;
2363 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2365 s->enabled = SD_EVENT_OFF;
2366 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2374 if (s->enabled == SD_EVENT_OFF)
2375 s->event->n_enabled_child_sources++;
2379 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2381 s->enabled = SD_EVENT_OFF;
2382 s->event->n_enabled_child_sources--;
2383 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2391 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2396 case SOURCE_INOTIFY:
2401 assert_not_reached("Wut? I shouldn't exist.");
2406 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2409 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2414 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2415 assert_return(s, -EINVAL);
2416 assert_return(usec, -EINVAL);
2417 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2418 assert_return(!event_pid_changed(s->event), -ECHILD);
2420 *usec = s->time.next;
2424 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2425 struct clock_data *d;
2428 assert_return(s, -EINVAL);
2429 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2430 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2431 assert_return(!event_pid_changed(s->event), -ECHILD);
2433 r = source_set_pending(s, false);
2437 s->time.next = usec;
2439 d = event_get_clock_data(s->event, s->type);
2442 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2443 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2444 d->needs_rearm = true;
2449 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2450 assert_return(s, -EINVAL);
2451 assert_return(usec, -EINVAL);
2452 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2453 assert_return(!event_pid_changed(s->event), -ECHILD);
2455 *usec = s->time.accuracy;
2459 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2460 struct clock_data *d;
2463 assert_return(s, -EINVAL);
2464 assert_return(usec != (uint64_t) -1, -EINVAL);
2465 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2466 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2467 assert_return(!event_pid_changed(s->event), -ECHILD);
2469 r = source_set_pending(s, false);
2474 usec = DEFAULT_ACCURACY_USEC;
2476 s->time.accuracy = usec;
2478 d = event_get_clock_data(s->event, s->type);
2481 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2482 d->needs_rearm = true;
2487 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2488 assert_return(s, -EINVAL);
2489 assert_return(clock, -EINVAL);
2490 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2491 assert_return(!event_pid_changed(s->event), -ECHILD);
2493 *clock = event_source_type_to_clock(s->type);
2497 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2498 assert_return(s, -EINVAL);
2499 assert_return(pid, -EINVAL);
2500 assert_return(s->type == SOURCE_CHILD, -EDOM);
2501 assert_return(!event_pid_changed(s->event), -ECHILD);
2503 *pid = s->child.pid;
2507 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2508 assert_return(s, -EINVAL);
2509 assert_return(mask, -EINVAL);
2510 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2511 assert_return(!event_pid_changed(s->event), -ECHILD);
2513 *mask = s->inotify.mask;
2517 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2520 assert_return(s, -EINVAL);
2521 assert_return(s->type != SOURCE_EXIT, -EDOM);
2522 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2523 assert_return(!event_pid_changed(s->event), -ECHILD);
2525 if (s->prepare == callback)
2528 if (callback && s->prepare) {
2529 s->prepare = callback;
2533 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2537 s->prepare = callback;
2540 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2544 prioq_remove(s->event->prepare, s, &s->prepare_index);
2549 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2550 assert_return(s, NULL);
2555 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2558 assert_return(s, NULL);
2561 s->userdata = userdata;
2566 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2573 if (a >= USEC_INFINITY)
2574 return USEC_INFINITY;
2579 initialize_perturb(e);
2582 Find a good time to wake up again between times a and b. We
2583 have two goals here:
2585 a) We want to wake up as seldom as possible, hence prefer
2586 later times over earlier times.
2588 b) But if we have to wake up, then let's make sure to
2589 dispatch as much as possible on the entire system.
2591 We implement this by waking up everywhere at the same time
2592 within any given minute if we can, synchronised via the
2593 perturbation value determined from the boot ID. If we can't,
2594 then we try to find the same spot in every 10s, then 1s and
2595 then 250ms step. Otherwise, we pick the last possible time
2599 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2601 if (_unlikely_(c < USEC_PER_MINUTE))
2604 c -= USEC_PER_MINUTE;
2610 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2612 if (_unlikely_(c < USEC_PER_SEC*10))
2615 c -= USEC_PER_SEC*10;
2621 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2623 if (_unlikely_(c < USEC_PER_SEC))
2632 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2634 if (_unlikely_(c < USEC_PER_MSEC*250))
2637 c -= USEC_PER_MSEC*250;
2646 static int event_arm_timer(
2648 struct clock_data *d) {
2650 struct itimerspec its = {};
2651 sd_event_source *a, *b;
2658 if (!d->needs_rearm)
2661 d->needs_rearm = false;
2663 a = prioq_peek(d->earliest);
2664 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2669 if (d->next == USEC_INFINITY)
2673 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2677 d->next = USEC_INFINITY;
2681 b = prioq_peek(d->latest);
2682 assert_se(b && b->enabled != SD_EVENT_OFF);
2684 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2688 assert_se(d->fd >= 0);
2691 /* We don' want to disarm here, just mean some time looooong ago. */
2692 its.it_value.tv_sec = 0;
2693 its.it_value.tv_nsec = 1;
2695 timespec_store(&its.it_value, t);
2697 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2705 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2708 assert(s->type == SOURCE_IO);
2710 /* If the event source was already pending, we just OR in the
2711 * new revents, otherwise we reset the value. The ORing is
2712 * necessary to handle EPOLLONESHOT events properly where
2713 * readability might happen independently of writability, and
2714 * we need to keep track of both */
2717 s->io.revents |= revents;
2719 s->io.revents = revents;
2721 return source_set_pending(s, true);
2724 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2731 assert_return(events == EPOLLIN, -EIO);
2733 ss = read(fd, &x, sizeof(x));
2735 if (IN_SET(errno, EAGAIN, EINTR))
2741 if (_unlikely_(ss != sizeof(x)))
2745 *next = USEC_INFINITY;
2750 static int process_timer(
2753 struct clock_data *d) {
2762 s = prioq_peek(d->earliest);
2765 s->enabled == SD_EVENT_OFF ||
2769 r = source_set_pending(s, true);
2773 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2774 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2775 d->needs_rearm = true;
2781 static int process_child(sd_event *e) {
2788 e->need_process_child = false;
2791 So, this is ugly. We iteratively invoke waitid() with P_PID
2792 + WNOHANG for each PID we wait for, instead of using
2793 P_ALL. This is because we only want to get child
2794 information of very specific child processes, and not all
2795 of them. We might not have processed the SIGCHLD even of a
2796 previous invocation and we don't want to maintain a
2797 unbounded *per-child* event queue, hence we really don't
2798 want anything flushed out of the kernel's queue that we
2799 don't care about. Since this is O(n) this means that if you
2800 have a lot of processes you probably want to handle SIGCHLD
2803 We do not reap the children here (by using WNOWAIT), this
2804 is only done after the event source is dispatched so that
2805 the callback still sees the process as a zombie.
2808 HASHMAP_FOREACH(s, e->child_sources, i) {
2809 assert(s->type == SOURCE_CHILD);
2814 if (s->enabled == SD_EVENT_OFF)
2817 zero(s->child.siginfo);
2818 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2819 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2823 if (s->child.siginfo.si_pid != 0) {
2824 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2826 if (!zombie && (s->child.options & WEXITED)) {
2827 /* If the child isn't dead then let's
2828 * immediately remove the state change
2829 * from the queue, since there's no
2830 * benefit in leaving it queued */
2832 assert(s->child.options & (WSTOPPED|WCONTINUED));
2833 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2836 r = source_set_pending(s, true);
2845 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2846 bool read_one = false;
2851 assert_return(events == EPOLLIN, -EIO);
2853 /* If there's a signal queued on this priority and SIGCHLD is
2854 on this priority too, then make sure to recheck the
2855 children we watch. This is because we only ever dequeue
2856 the first signal per priority, and if we dequeue one, and
2857 SIGCHLD might be enqueued later we wouldn't know, but we
2858 might have higher priority children we care about hence we
2859 need to check that explicitly. */
2861 if (sigismember(&d->sigset, SIGCHLD))
2862 e->need_process_child = true;
2864 /* If there's already an event source pending for this
2865 * priority we don't read another */
2870 struct signalfd_siginfo si;
2872 sd_event_source *s = NULL;
2874 n = read(d->fd, &si, sizeof(si));
2876 if (IN_SET(errno, EAGAIN, EINTR))
2882 if (_unlikely_(n != sizeof(si)))
2885 assert(SIGNAL_VALID(si.ssi_signo));
2889 if (e->signal_sources)
2890 s = e->signal_sources[si.ssi_signo];
2896 s->signal.siginfo = si;
2899 r = source_set_pending(s, true);
2907 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2913 assert_return(revents == EPOLLIN, -EIO);
2915 /* If there's already an event source pending for this priority, don't read another */
2916 if (d->n_pending > 0)
2919 /* Is the read buffer non-empty? If so, let's not read more */
2920 if (d->buffer_filled > 0)
2923 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2925 if (IN_SET(errno, EAGAIN, EINTR))
2932 d->buffer_filled = (size_t) n;
2933 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2938 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2941 assert(sz <= d->buffer_filled);
2946 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2947 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2948 d->buffer_filled -= sz;
2950 if (d->buffer_filled == 0)
2951 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2954 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2960 /* If there's already an event source pending for this priority, don't read another */
2961 if (d->n_pending > 0)
2964 while (d->buffer_filled > 0) {
2967 /* Let's validate that the event structures are complete */
2968 if (d->buffer_filled < offsetof(struct inotify_event, name))
2971 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2972 if (d->buffer_filled < sz)
2975 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2976 struct inode_data *inode_data;
2979 /* The queue overran, let's pass this event to all event sources connected to this inotify
2982 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2985 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2987 if (s->enabled == SD_EVENT_OFF)
2990 r = source_set_pending(s, true);
2996 struct inode_data *inode_data;
2999 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3000 * our watch descriptor table. */
3001 if (d->buffer.ev.mask & IN_IGNORED) {
3003 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3005 event_inotify_data_drop(e, d, sz);
3009 /* The watch descriptor was removed by the kernel, let's drop it here too */
3010 inode_data->wd = -1;
3012 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3014 event_inotify_data_drop(e, d, sz);
3019 /* Trigger all event sources that are interested in these events. Also trigger all event
3020 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3021 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3023 if (s->enabled == SD_EVENT_OFF)
3026 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3027 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3030 r = source_set_pending(s, true);
3036 /* Something pending now? If so, let's finish, otherwise let's read more. */
3037 if (d->n_pending > 0)
3044 static int process_inotify(sd_event *e) {
3045 struct inotify_data *d;
3050 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3051 r = event_inotify_data_process(e, d);
3061 static int source_dispatch(sd_event_source *s) {
3062 EventSourceType saved_type;
3066 assert(s->pending || s->type == SOURCE_EXIT);
3068 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3070 saved_type = s->type;
3072 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3073 r = source_set_pending(s, false);
3078 if (s->type != SOURCE_POST) {
3082 /* If we execute a non-post source, let's mark all
3083 * post sources as pending */
3085 SET_FOREACH(z, s->event->post_sources, i) {
3086 if (z->enabled == SD_EVENT_OFF)
3089 r = source_set_pending(z, true);
3095 if (s->enabled == SD_EVENT_ONESHOT) {
3096 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3101 s->dispatching = true;
3106 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3109 case SOURCE_TIME_REALTIME:
3110 case SOURCE_TIME_BOOTTIME:
3111 case SOURCE_TIME_MONOTONIC:
3112 case SOURCE_TIME_REALTIME_ALARM:
3113 case SOURCE_TIME_BOOTTIME_ALARM:
3114 r = s->time.callback(s, s->time.next, s->userdata);
3118 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3121 case SOURCE_CHILD: {
3124 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3126 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3128 /* Now, reap the PID for good. */
3130 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3136 r = s->defer.callback(s, s->userdata);
3140 r = s->post.callback(s, s->userdata);
3144 r = s->exit.callback(s, s->userdata);
3147 case SOURCE_INOTIFY: {
3148 struct sd_event *e = s->event;
3149 struct inotify_data *d;
3152 assert(s->inotify.inode_data);
3153 assert_se(d = s->inotify.inode_data->inotify_data);
3155 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3156 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3157 assert(d->buffer_filled >= sz);
3159 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3161 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3163 if (d->n_pending == 0)
3164 event_inotify_data_drop(e, d, sz);
3169 case SOURCE_WATCHDOG:
3170 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3171 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3172 assert_not_reached("Wut? I shouldn't exist.");
3175 s->dispatching = false;
3178 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3179 strna(s->description), event_source_type_to_string(saved_type));
3184 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3189 static int event_prepare(sd_event *e) {
3197 s = prioq_peek(e->prepare);
3198 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3201 s->prepare_iteration = e->iteration;
3202 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3208 s->dispatching = true;
3209 r = s->prepare(s, s->userdata);
3210 s->dispatching = false;
3213 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3214 strna(s->description), event_source_type_to_string(s->type));
3219 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3225 static int dispatch_exit(sd_event *e) {
3227 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3232 p = prioq_peek(e->exit);
3233 if (!p || p->enabled == SD_EVENT_OFF) {
3234 e->state = SD_EVENT_FINISHED;
3238 ref = sd_event_ref(e);
3240 e->state = SD_EVENT_EXITING;
3241 r = source_dispatch(p);
3242 e->state = SD_EVENT_INITIAL;
3246 static sd_event_source* event_next_pending(sd_event *e) {
3251 p = prioq_peek(e->pending);
3255 if (p->enabled == SD_EVENT_OFF)
3261 static int arm_watchdog(sd_event *e) {
3262 struct itimerspec its = {};
3267 assert(e->watchdog_fd >= 0);
3269 t = sleep_between(e,
3270 e->watchdog_last + (e->watchdog_period / 2),
3271 e->watchdog_last + (e->watchdog_period * 3 / 4));
3273 timespec_store(&its.it_value, t);
3275 /* Make sure we never set the watchdog to 0, which tells the
3276 * kernel to disable it. */
3277 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3278 its.it_value.tv_nsec = 1;
3280 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3287 static int process_watchdog(sd_event *e) {
3293 /* Don't notify watchdog too often */
3294 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3297 sd_notify(false, "WATCHDOG=1");
3298 e->watchdog_last = e->timestamp.monotonic;
3300 return arm_watchdog(e);
3303 static void event_close_inode_data_fds(sd_event *e) {
3304 struct inode_data *d;
3308 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3309 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3310 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3311 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3314 while ((d = e->inode_data_to_close)) {
3316 d->fd = safe_close(d->fd);
3318 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3322 _public_ int sd_event_prepare(sd_event *e) {
3325 assert_return(e, -EINVAL);
3326 assert_return(e = event_resolve(e), -ENOPKG);
3327 assert_return(!event_pid_changed(e), -ECHILD);
3328 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3329 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3331 if (e->exit_requested)
3336 e->state = SD_EVENT_PREPARING;
3337 r = event_prepare(e);
3338 e->state = SD_EVENT_INITIAL;
3342 r = event_arm_timer(e, &e->realtime);
3346 r = event_arm_timer(e, &e->boottime);
3350 r = event_arm_timer(e, &e->monotonic);
3354 r = event_arm_timer(e, &e->realtime_alarm);
3358 r = event_arm_timer(e, &e->boottime_alarm);
3362 event_close_inode_data_fds(e);
3364 if (event_next_pending(e) || e->need_process_child)
3367 e->state = SD_EVENT_ARMED;
3372 e->state = SD_EVENT_ARMED;
3373 r = sd_event_wait(e, 0);
3375 e->state = SD_EVENT_ARMED;
3380 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3381 struct epoll_event *ev_queue;
3382 unsigned ev_queue_max;
3385 assert_return(e, -EINVAL);
3386 assert_return(e = event_resolve(e), -ENOPKG);
3387 assert_return(!event_pid_changed(e), -ECHILD);
3388 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3389 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3391 if (e->exit_requested) {
3392 e->state = SD_EVENT_PENDING;
3396 ev_queue_max = MAX(e->n_sources, 1u);
3397 ev_queue = newa(struct epoll_event, ev_queue_max);
3399 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3400 if (e->inotify_data_buffered)
3403 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3404 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3406 if (errno == EINTR) {
3407 e->state = SD_EVENT_PENDING;
3415 triple_timestamp_get(&e->timestamp);
3417 for (i = 0; i < m; i++) {
3419 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3420 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3422 WakeupType *t = ev_queue[i].data.ptr;
3426 case WAKEUP_EVENT_SOURCE:
3427 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3430 case WAKEUP_CLOCK_DATA: {
3431 struct clock_data *d = ev_queue[i].data.ptr;
3432 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3436 case WAKEUP_SIGNAL_DATA:
3437 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3440 case WAKEUP_INOTIFY_DATA:
3441 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3445 assert_not_reached("Invalid wake-up pointer");
3452 r = process_watchdog(e);
3456 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3460 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3464 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3468 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3472 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3476 if (e->need_process_child) {
3477 r = process_child(e);
3482 r = process_inotify(e);
3486 if (event_next_pending(e)) {
3487 e->state = SD_EVENT_PENDING;
3495 e->state = SD_EVENT_INITIAL;
3500 _public_ int sd_event_dispatch(sd_event *e) {
3504 assert_return(e, -EINVAL);
3505 assert_return(e = event_resolve(e), -ENOPKG);
3506 assert_return(!event_pid_changed(e), -ECHILD);
3507 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3508 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3510 if (e->exit_requested)
3511 return dispatch_exit(e);
3513 p = event_next_pending(e);
3515 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3517 ref = sd_event_ref(e);
3518 e->state = SD_EVENT_RUNNING;
3519 r = source_dispatch(p);
3520 e->state = SD_EVENT_INITIAL;
3524 e->state = SD_EVENT_INITIAL;
3529 static void event_log_delays(sd_event *e) {
3530 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3534 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3535 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3538 log_debug("Event loop iterations: %.*s", o, b);
3541 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3544 assert_return(e, -EINVAL);
3545 assert_return(e = event_resolve(e), -ENOPKG);
3546 assert_return(!event_pid_changed(e), -ECHILD);
3547 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3548 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3550 if (e->profile_delays && e->last_run) {
3554 this_run = now(CLOCK_MONOTONIC);
3556 l = u64log2(this_run - e->last_run);
3557 assert(l < sizeof(e->delays));
3560 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3561 event_log_delays(e);
3562 e->last_log = this_run;
3566 r = sd_event_prepare(e);
3568 /* There was nothing? Then wait... */
3569 r = sd_event_wait(e, timeout);
3571 if (e->profile_delays)
3572 e->last_run = now(CLOCK_MONOTONIC);
3575 /* There's something now, then let's dispatch it */
3576 r = sd_event_dispatch(e);
3586 _public_ int sd_event_loop(sd_event *e) {
3587 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3590 assert_return(e, -EINVAL);
3591 assert_return(e = event_resolve(e), -ENOPKG);
3592 assert_return(!event_pid_changed(e), -ECHILD);
3593 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3595 ref = sd_event_ref(e);
3597 while (e->state != SD_EVENT_FINISHED) {
3598 r = sd_event_run(e, (uint64_t) -1);
3603 return e->exit_code;
3606 _public_ int sd_event_get_fd(sd_event *e) {
3608 assert_return(e, -EINVAL);
3609 assert_return(e = event_resolve(e), -ENOPKG);
3610 assert_return(!event_pid_changed(e), -ECHILD);
3615 _public_ int sd_event_get_state(sd_event *e) {
3616 assert_return(e, -EINVAL);
3617 assert_return(e = event_resolve(e), -ENOPKG);
3618 assert_return(!event_pid_changed(e), -ECHILD);
3623 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3624 assert_return(e, -EINVAL);
3625 assert_return(e = event_resolve(e), -ENOPKG);
3626 assert_return(code, -EINVAL);
3627 assert_return(!event_pid_changed(e), -ECHILD);
3629 if (!e->exit_requested)
3632 *code = e->exit_code;
3636 _public_ int sd_event_exit(sd_event *e, int code) {
3637 assert_return(e, -EINVAL);
3638 assert_return(e = event_resolve(e), -ENOPKG);
3639 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3640 assert_return(!event_pid_changed(e), -ECHILD);
3642 e->exit_requested = true;
3643 e->exit_code = code;
3648 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3649 assert_return(e, -EINVAL);
3650 assert_return(e = event_resolve(e), -ENOPKG);
3651 assert_return(usec, -EINVAL);
3652 assert_return(!event_pid_changed(e), -ECHILD);
3654 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3657 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3658 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3659 * the purpose of getting the time this doesn't matter. */
3660 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3663 if (!triple_timestamp_is_set(&e->timestamp)) {
3664 /* Implicitly fall back to now() if we never ran
3665 * before and thus have no cached time. */
3670 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3674 _public_ int sd_event_default(sd_event **ret) {
3679 return !!default_event;
3681 if (default_event) {
3682 *ret = sd_event_ref(default_event);
3686 r = sd_event_new(&e);
3690 e->default_event_ptr = &default_event;
3698 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3699 assert_return(e, -EINVAL);
3700 assert_return(e = event_resolve(e), -ENOPKG);
3701 assert_return(tid, -EINVAL);
3702 assert_return(!event_pid_changed(e), -ECHILD);
3712 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3715 assert_return(e, -EINVAL);
3716 assert_return(e = event_resolve(e), -ENOPKG);
3717 assert_return(!event_pid_changed(e), -ECHILD);
3719 if (e->watchdog == !!b)
3723 struct epoll_event ev;
3725 r = sd_watchdog_enabled(false, &e->watchdog_period);
3729 /* Issue first ping immediately */
3730 sd_notify(false, "WATCHDOG=1");
3731 e->watchdog_last = now(CLOCK_MONOTONIC);
3733 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3734 if (e->watchdog_fd < 0)
3737 r = arm_watchdog(e);
3741 ev = (struct epoll_event) {
3743 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3746 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3753 if (e->watchdog_fd >= 0) {
3754 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3755 e->watchdog_fd = safe_close(e->watchdog_fd);
3763 e->watchdog_fd = safe_close(e->watchdog_fd);
3767 _public_ int sd_event_get_watchdog(sd_event *e) {
3768 assert_return(e, -EINVAL);
3769 assert_return(e = event_resolve(e), -ENOPKG);
3770 assert_return(!event_pid_changed(e), -ECHILD);
3775 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3776 assert_return(e, -EINVAL);
3777 assert_return(e = event_resolve(e), -ENOPKG);
3778 assert_return(!event_pid_changed(e), -ECHILD);
3780 *ret = e->iteration;
3784 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3785 assert_return(s, -EINVAL);
3787 s->destroy_callback = callback;
3791 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3792 assert_return(s, -EINVAL);
3795 *ret = s->destroy_callback;
3797 return !!s->destroy_callback;