chiark / gitweb /
sd-event: minor fixups to delays profiling changes
[elogind.git] / src / libelogind / sd-event / sd-event.c
index a14ade221b3ae89b76a6eccc5e76e8f23f6586f1..5ca8c48c82db8d4b15c7617426cb57e140daa624 100644 (file)
 #include <sys/timerfd.h>
 #include <sys/wait.h>
 
-#include "sd-id128.h"
 #include "sd-daemon.h"
-#include "macro.h"
-#include "prioq.h"
+#include "sd-event.h"
+#include "sd-id128.h"
+
+#include "alloc-util.h"
+#include "fd-util.h"
 #include "hashmap.h"
-#include "util.h"
-#include "time-util.h"
+#include "list.h"
+#include "macro.h"
 #include "missing.h"
+#include "prioq.h"
+#include "process-util.h"
 #include "set.h"
-#include "list.h"
 #include "signal-util.h"
-
-#include "sd-event.h"
+#include "string-table.h"
+#include "string-util.h"
+#include "time-util.h"
+#include "util.h"
 
 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
 
@@ -56,9 +61,39 @@ typedef enum EventSourceType {
         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
 } EventSourceType;
 
+static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
+        [SOURCE_IO] = "io",
+        [SOURCE_TIME_REALTIME] = "realtime",
+        [SOURCE_TIME_BOOTTIME] = "bootime",
+        [SOURCE_TIME_MONOTONIC] = "monotonic",
+        [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
+        [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
+        [SOURCE_SIGNAL] = "signal",
+        [SOURCE_CHILD] = "child",
+        [SOURCE_DEFER] = "defer",
+        [SOURCE_POST] = "post",
+        [SOURCE_EXIT] = "exit",
+        [SOURCE_WATCHDOG] = "watchdog",
+};
+
+DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
+
+/* All objects we use in epoll events start with this value, so that
+ * we know how to dispatch it */
+typedef enum WakeupType {
+        WAKEUP_NONE,
+        WAKEUP_EVENT_SOURCE,
+        WAKEUP_CLOCK_DATA,
+        WAKEUP_SIGNAL_DATA,
+        _WAKEUP_TYPE_MAX,
+        _WAKEUP_TYPE_INVALID = -1,
+} WakeupType;
+
 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
 
 struct sd_event_source {
+        WakeupType wakeup;
+
         unsigned n_ref;
 
         sd_event *event;
@@ -120,6 +155,7 @@ struct sd_event_source {
 };
 
 struct clock_data {
+        WakeupType wakeup;
         int fd;
 
         /* For all clocks we maintain two priority queues each, one
@@ -136,11 +172,23 @@ struct clock_data {
         bool needs_rearm:1;
 };
 
+struct signal_data {
+        WakeupType wakeup;
+
+        /* For each priority we maintain one signal fd, so that we
+         * only have to dequeue a single event per priority at a
+         * time. */
+
+        int fd;
+        int64_t priority;
+        sigset_t sigset;
+        sd_event_source *current;
+};
+
 struct sd_event {
         unsigned n_ref;
 
         int epoll_fd;
-        int signal_fd;
         int watchdog_fd;
 
         Prioq *pending;
@@ -157,8 +205,8 @@ struct sd_event {
 
         usec_t perturb;
 
-        sigset_t sigset;
-        sd_event_source **signal_sources;
+        sd_event_source **signal_sources; /* indexed by signal number */
+        Hashmap *signal_data; /* indexed by priority */
 
         Hashmap *child_sources;
         unsigned n_enabled_child_sources;
@@ -177,6 +225,7 @@ struct sd_event {
         bool exit_requested:1;
         bool need_process_child:1;
         bool watchdog:1;
+        bool profile_delays:1;
 
         int exit_code;
 
@@ -188,6 +237,9 @@ struct sd_event {
         unsigned n_sources;
 
         LIST_HEAD(sd_event_source, sources);
+
+        usec_t last_run, last_log;
+        unsigned delays[sizeof(usec_t) * 8];
 };
 
 static void source_disconnect(sd_event_source *s);
@@ -216,12 +268,6 @@ static int pending_prioq_compare(const void *a, const void *b) {
         if (x->pending_iteration > y->pending_iteration)
                 return 1;
 
-        /* Stability for the rest */
-        if (x < y)
-                return -1;
-        if (x > y)
-                return 1;
-
         return 0;
 }
 
@@ -231,6 +277,12 @@ static int prepare_prioq_compare(const void *a, const void *b) {
         assert(x->prepare);
         assert(y->prepare);
 
+        /* Enabled ones first */
+        if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+                return -1;
+        if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+                return 1;
+
         /* Move most recently prepared ones last, so that we can stop
          * preparing as soon as we hit one that has already been
          * prepared in the current iteration */
@@ -239,24 +291,12 @@ static int prepare_prioq_compare(const void *a, const void *b) {
         if (x->prepare_iteration > y->prepare_iteration)
                 return 1;
 
-        /* Enabled ones first */
-        if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
-                return -1;
-        if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
-                return 1;
-
         /* Lower priority values first */
         if (x->priority < y->priority)
                 return -1;
         if (x->priority > y->priority)
                 return 1;
 
-        /* Stability for the rest */
-        if (x < y)
-                return -1;
-        if (x > y)
-                return 1;
-
         return 0;
 }
 
@@ -284,12 +324,6 @@ static int earliest_time_prioq_compare(const void *a, const void *b) {
         if (x->time.next > y->time.next)
                 return 1;
 
-        /* Stability for the rest */
-        if (x < y)
-                return -1;
-        if (x > y)
-                return 1;
-
         return 0;
 }
 
@@ -317,12 +351,6 @@ static int latest_time_prioq_compare(const void *a, const void *b) {
         if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
                 return 1;
 
-        /* Stability for the rest */
-        if (x < y)
-                return -1;
-        if (x > y)
-                return 1;
-
         return 0;
 }
 
@@ -344,17 +372,12 @@ static int exit_prioq_compare(const void *a, const void *b) {
         if (x->priority > y->priority)
                 return 1;
 
-        /* Stability for the rest */
-        if (x < y)
-                return -1;
-        if (x > y)
-                return 1;
-
         return 0;
 }
 
 static void free_clock_data(struct clock_data *d) {
         assert(d);
+        assert(d->wakeup == WAKEUP_CLOCK_DATA);
 
         safe_close(d->fd);
         prioq_free(d->earliest);
@@ -378,7 +401,6 @@ static void event_free(sd_event *e) {
                 *(e->default_event_ptr) = NULL;
 
         safe_close(e->epoll_fd);
-        safe_close(e->signal_fd);
         safe_close(e->watchdog_fd);
 
         free_clock_data(&e->realtime);
@@ -392,6 +414,7 @@ static void event_free(sd_event *e) {
         prioq_free(e->exit);
 
         free(e->signal_sources);
+        hashmap_free(e->signal_data);
 
         hashmap_free(e->child_sources);
         set_free(e->post_sources);
@@ -409,18 +432,15 @@ _public_ int sd_event_new(sd_event** ret) {
                 return -ENOMEM;
 
         e->n_ref = 1;
-        e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
+        e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
         e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
+        e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
         e->original_pid = getpid();
         e->perturb = USEC_INFINITY;
 
-        assert_se(sigemptyset(&e->sigset) == 0);
-
-        e->pending = prioq_new(pending_prioq_compare);
-        if (!e->pending) {
-                r = -ENOMEM;
+        r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
+        if (r < 0)
                 goto fail;
-        }
 
         e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
         if (e->epoll_fd < 0) {
@@ -428,6 +448,11 @@ _public_ int sd_event_new(sd_event** ret) {
                 goto fail;
         }
 
+        if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
+                log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
+                e->profile_delays = true;
+        }
+
         *ret = e;
         return 0;
 
@@ -482,7 +507,8 @@ static void source_io_unregister(sd_event_source *s) {
 
         r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
         if (r < 0)
-                log_debug_errno(errno, "Failed to remove source %s from epoll: %m", strna(s->description));
+                log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
+                                strna(s->description), event_source_type_to_string(s->type));
 
         s->io.registered = false;
 }
@@ -509,7 +535,6 @@ static int source_io_register(
                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
         else
                 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
-
         if (r < 0)
                 return -errno;
 
@@ -518,8 +543,7 @@ static int source_io_register(
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 static clockid_t event_source_type_to_clock(EventSourceType t) {
 
         switch (t) {
@@ -594,45 +618,171 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
         }
 }
 
-static bool need_signal(sd_event *e, int signal) {
-        return (e->signal_sources && e->signal_sources[signal] &&
-                e->signal_sources[signal]->enabled != SD_EVENT_OFF)
-                ||
-               (signal == SIGCHLD &&
-                e->n_enabled_child_sources > 0);
-}
+static int event_make_signal_data(
+                sd_event *e,
+                int sig,
+                struct signal_data **ret) {
 
-static int event_update_signal_fd(sd_event *e) {
         struct epoll_event ev = {};
-        bool add_to_epoll;
+        struct signal_data *d;
+        bool added = false;
+        sigset_t ss_copy;
+        int64_t priority;
         int r;
 
         assert(e);
 
         if (event_pid_changed(e))
+                return -ECHILD;
+
+        if (e->signal_sources && e->signal_sources[sig])
+                priority = e->signal_sources[sig]->priority;
+        else
+                priority = 0;
+
+        d = hashmap_get(e->signal_data, &priority);
+        if (d) {
+                if (sigismember(&d->sigset, sig) > 0) {
+                        if (ret)
+                                *ret = d;
                 return 0;
+                }
+        } else {
+                r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
+                if (r < 0)
+                        return r;
 
-        add_to_epoll = e->signal_fd < 0;
+                d = new0(struct signal_data, 1);
+                if (!d)
+                        return -ENOMEM;
+
+                d->wakeup = WAKEUP_SIGNAL_DATA;
+                d->fd  = -1;
+                d->priority = priority;
 
-        r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
+                r = hashmap_put(e->signal_data, &d->priority, d);
         if (r < 0)
-                return -errno;
+                        return r;
 
-        e->signal_fd = r;
+                added = true;
+        }
+
+        ss_copy = d->sigset;
+        assert_se(sigaddset(&ss_copy, sig) >= 0);
 
-        if (!add_to_epoll)
+        r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
+        if (r < 0) {
+                r = -errno;
+                goto fail;
+        }
+
+        d->sigset = ss_copy;
+
+        if (d->fd >= 0) {
+                if (ret)
+                        *ret = d;
                 return 0;
+        }
+
+        d->fd = r;
 
         ev.events = EPOLLIN;
-        ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
+        ev.data.ptr = d;
 
-        r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
+        r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
         if (r < 0) {
-                e->signal_fd = safe_close(e->signal_fd);
-                return -errno;
+                r = -errno;
+                goto fail;
         }
 
+        if (ret)
+                *ret = d;
+
         return 0;
+
+fail:
+        if (added) {
+                d->fd = safe_close(d->fd);
+                hashmap_remove(e->signal_data, &d->priority);
+                free(d);
+        }
+
+        return r;
+}
+
+static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
+        assert(e);
+        assert(d);
+
+        /* Turns off the specified signal in the signal data
+         * object. If the signal mask of the object becomes empty that
+         * way removes it. */
+
+        if (sigismember(&d->sigset, sig) == 0)
+                return;
+
+        assert_se(sigdelset(&d->sigset, sig) >= 0);
+
+        if (sigisemptyset(&d->sigset)) {
+
+                /* If all the mask is all-zero we can get rid of the structure */
+                hashmap_remove(e->signal_data, &d->priority);
+                assert(!d->current);
+                safe_close(d->fd);
+                free(d);
+                return;
+        }
+
+        assert(d->fd >= 0);
+
+        if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
+                log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
+}
+
+static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
+        struct signal_data *d;
+        static const int64_t zero_priority = 0;
+
+        assert(e);
+
+        /* Rechecks if the specified signal is still something we are
+         * interested in. If not, we'll unmask it, and possibly drop
+         * the signalfd for it. */
+
+        if (sig == SIGCHLD &&
+            e->n_enabled_child_sources > 0)
+                return;
+
+        if (e->signal_sources &&
+            e->signal_sources[sig] &&
+            e->signal_sources[sig]->enabled != SD_EVENT_OFF)
+                return;
+
+        /*
+         * The specified signal might be enabled in three different queues:
+         *
+         * 1) the one that belongs to the priority passed (if it is non-NULL)
+         * 2) the one that belongs to the priority of the event source of the signal (if there is one)
+         * 3) the 0 priority (to cover the SIGCHLD case)
+         *
+         * Hence, let's remove it from all three here.
+         */
+
+        if (priority) {
+                d = hashmap_get(e->signal_data, priority);
+                if (d)
+                        event_unmask_signal_data(e, d, sig);
+        }
+
+        if (e->signal_sources && e->signal_sources[sig]) {
+                d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
+                if (d)
+                        event_unmask_signal_data(e, d, sig);
+        }
+
+        d = hashmap_get(e->signal_data, &zero_priority);
+        if (d)
+                event_unmask_signal_data(e, d, sig);
 }
 
 static void source_disconnect(sd_event_source *s) {
@@ -671,17 +821,11 @@ static void source_disconnect(sd_event_source *s) {
 
         case SOURCE_SIGNAL:
                 if (s->signal.sig > 0) {
+
                         if (s->event->signal_sources)
                                 s->event->signal_sources[s->signal.sig] = NULL;
 
-                        /* If the signal was on and now it is off... */
-                        if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
-                                assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
-
-                                (void) event_update_signal_fd(s->event);
-                                /* If disabling failed, we might get a spurious event,
-                                 * but otherwise nothing bad should happen. */
-                        }
+                        event_gc_signal_data(s->event, &s->priority, s->signal.sig);
                 }
 
                 break;
@@ -691,18 +835,10 @@ static void source_disconnect(sd_event_source *s) {
                         if (s->enabled != SD_EVENT_OFF) {
                                 assert(s->event->n_enabled_child_sources > 0);
                                 s->event->n_enabled_child_sources--;
-
-                                /* We know the signal was on, if it is off now... */
-                                if (!need_signal(s->event, SIGCHLD)) {
-                                        assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
-
-                                        (void) event_update_signal_fd(s->event);
-                                        /* If disabling failed, we might get a spurious event,
-                                         * but otherwise nothing bad should happen. */
-                                }
                         }
 
-                        hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
+                        (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
+                        event_gc_signal_data(s->event, &s->priority, SIGCHLD);
                 }
 
                 break;
@@ -781,6 +917,14 @@ static int source_set_pending(sd_event_source *s, bool b) {
                 d->needs_rearm = true;
         }
 
+        if (s->type == SOURCE_SIGNAL && !b) {
+                struct signal_data *d;
+
+                d = hashmap_get(s->event->signal_data, &s->priority);
+                if (d && d->current == s)
+                        d->current = NULL;
+        }
+
         return 0;
 }
 
@@ -830,6 +974,7 @@ _public_ int sd_event_add_io(
         if (!s)
                 return -ENOMEM;
 
+        s->wakeup = WAKEUP_EVENT_SOURCE;
         s->io.fd = fd;
         s->io.events = events;
         s->io.callback = callback;
@@ -886,7 +1031,7 @@ static int event_setup_timer_fd(
                 return -errno;
 
         ev.events = EPOLLIN;
-        ev.data.ptr = INT_TO_PTR(clock_to_event_source_type(clock));
+        ev.data.ptr = d;
 
         r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
         if (r < 0) {
@@ -933,17 +1078,13 @@ _public_ int sd_event_add_time(
         d = event_get_clock_data(e, type);
         assert(d);
 
-        if (!d->earliest) {
-                d->earliest = prioq_new(earliest_time_prioq_compare);
-                if (!d->earliest)
-                        return -ENOMEM;
-        }
+        r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
+        if (r < 0)
+                return r;
 
-        if (!d->latest) {
-                d->latest = prioq_new(latest_time_prioq_compare);
-                if (!d->latest)
-                        return -ENOMEM;
-        }
+        r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
+        if (r < 0)
+                return r;
 
         if (d->fd < 0) {
                 r = event_setup_timer_fd(e, d, clock);
@@ -996,9 +1137,9 @@ _public_ int sd_event_add_signal(
                 void *userdata) {
 
         sd_event_source *s;
+        struct signal_data *d;
         sigset_t ss;
         int r;
-        bool previous;
 
         assert_return(e, -EINVAL);
         assert_return(sig > 0, -EINVAL);
@@ -1010,8 +1151,8 @@ _public_ int sd_event_add_signal(
                 callback = signal_exit_callback;
 
         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
-        if (r < 0)
-                return -errno;
+        if (r != 0)
+                return -r;
 
         if (!sigismember(&ss, sig))
                 return -EBUSY;
@@ -1023,8 +1164,6 @@ _public_ int sd_event_add_signal(
         } else if (e->signal_sources[sig])
                 return -EBUSY;
 
-        previous = need_signal(e, sig);
-
         s = source_new(e, !ret, SOURCE_SIGNAL);
         if (!s)
                 return -ENOMEM;
@@ -1036,15 +1175,11 @@ _public_ int sd_event_add_signal(
 
         e->signal_sources[sig] = s;
 
-        if (!previous) {
-                assert_se(sigaddset(&e->sigset, sig) == 0);
-
-                r = event_update_signal_fd(e);
+        r = event_make_signal_data(e, sig, &d);
                 if (r < 0) {
                         source_free(s);
                         return r;
                 }
-        }
 
         /* Use the signal name as description for the event source by default */
         (void) sd_event_source_set_description(s, signal_to_string(sig));
@@ -1055,6 +1190,7 @@ _public_ int sd_event_add_signal(
         return 0;
 }
 
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_add_child(
                 sd_event *e,
                 sd_event_source **ret,
@@ -1065,7 +1201,6 @@ _public_ int sd_event_add_child(
 
         sd_event_source *s;
         int r;
-        bool previous;
 
         assert_return(e, -EINVAL);
         assert_return(pid > 1, -EINVAL);
@@ -1079,11 +1214,9 @@ _public_ int sd_event_add_child(
         if (r < 0)
                 return r;
 
-        if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
+        if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
                 return -EBUSY;
 
-        previous = need_signal(e, SIGCHLD);
-
         s = source_new(e, !ret, SOURCE_CHILD);
         if (!s)
                 return -ENOMEM;
@@ -1094,7 +1227,7 @@ _public_ int sd_event_add_child(
         s->userdata = userdata;
         s->enabled = SD_EVENT_ONESHOT;
 
-        r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
+        r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
         if (r < 0) {
                 source_free(s);
                 return r;
@@ -1102,15 +1235,12 @@ _public_ int sd_event_add_child(
 
         e->n_enabled_child_sources ++;
 
-        if (!previous) {
-                assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
-
-                r = event_update_signal_fd(e);
+        r = event_make_signal_data(e, SIGCHLD, NULL);
                 if (r < 0) {
+                e->n_enabled_child_sources--;
                         source_free(s);
                         return r;
                 }
-        }
 
         e->need_process_child = true;
 
@@ -1153,6 +1283,7 @@ _public_ int sd_event_add_defer(
 
         return 0;
 }
+#endif // 0
 
 _public_ int sd_event_add_post(
                 sd_event *e,
@@ -1206,11 +1337,9 @@ _public_ int sd_event_add_exit(
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        if (!e->exit) {
-                e->exit = prioq_new(exit_prioq_compare);
-                if (!e->exit)
-                        return -ENOMEM;
-        }
+        r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
+        if (r < 0)
+                return r;
 
         s = source_new(e, !ret, SOURCE_EXIT);
         if (!s)
@@ -1233,8 +1362,7 @@ _public_ int sd_event_add_exit(
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
         assert_return(s, NULL);
 
@@ -1281,8 +1409,7 @@ _public_ int sd_event_source_set_description(sd_event_source *s, const char *des
         return free_and_strdup(&s->description, description);
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
         assert_return(s, -EINVAL);
         assert_return(description, -EINVAL);
@@ -1300,8 +1427,7 @@ _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
         return s->event;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_pending(sd_event_source *s) {
         assert_return(s, -EINVAL);
         assert_return(s->type != SOURCE_EXIT, -EDOM);
@@ -1356,8 +1482,7 @@ _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
         assert_return(s, -EINVAL);
         assert_return(events, -EINVAL);
@@ -1394,8 +1519,7 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events)
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
         assert_return(s, -EINVAL);
         assert_return(revents, -EINVAL);
@@ -1424,6 +1548,8 @@ _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority)
 #endif // 0
 
 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
+        int r;
+
         assert_return(s, -EINVAL);
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
@@ -1431,6 +1557,24 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
         if (s->priority == priority)
                 return 0;
 
+        if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
+                struct signal_data *old, *d;
+
+                /* Move us from the signalfd belonging to the old
+                 * priority to the signalfd of the new priority */
+
+                assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
+
+                s->priority = priority;
+
+                r = event_make_signal_data(s->event, s->signal.sig, &d);
+                if (r < 0) {
+                        s->priority = old->priority;
+                        return r;
+                }
+
+                event_unmask_signal_data(s->event, old, s->signal.sig);
+        } else
         s->priority = priority;
 
         if (s->pending)
@@ -1445,8 +1589,7 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority)
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
         assert_return(s, -EINVAL);
         assert_return(m, -EINVAL);
@@ -1499,34 +1642,18 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
                 }
 
                 case SOURCE_SIGNAL:
-                        assert(need_signal(s->event, s->signal.sig));
-
                         s->enabled = m;
 
-                        if (!need_signal(s->event, s->signal.sig)) {
-                                assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
-
-                                (void) event_update_signal_fd(s->event);
-                                /* If disabling failed, we might get a spurious event,
-                                 * but otherwise nothing bad should happen. */
-                        }
-
+                        event_gc_signal_data(s->event, &s->priority, s->signal.sig);
                         break;
 
                 case SOURCE_CHILD:
-                        assert(need_signal(s->event, SIGCHLD));
-
                         s->enabled = m;
 
                         assert(s->event->n_enabled_child_sources > 0);
                         s->event->n_enabled_child_sources--;
 
-                        if (!need_signal(s->event, SIGCHLD)) {
-                                assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
-
-                                (void) event_update_signal_fd(s->event);
-                        }
-
+                        event_gc_signal_data(s->event, &s->priority, SIGCHLD);
                         break;
 
                 case SOURCE_EXIT:
@@ -1572,37 +1699,33 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
                 }
 
                 case SOURCE_SIGNAL:
-                        /* Check status before enabling. */
-                        if (!need_signal(s->event, s->signal.sig)) {
-                                assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
 
-                                r = event_update_signal_fd(s->event);
+                        s->enabled = m;
+
+                        r = event_make_signal_data(s->event, s->signal.sig, NULL);
                                 if (r < 0) {
                                         s->enabled = SD_EVENT_OFF;
+                                event_gc_signal_data(s->event, &s->priority, s->signal.sig);
                                         return r;
                                 }
-                        }
 
-                        s->enabled = m;
                         break;
 
                 case SOURCE_CHILD:
-                        /* Check status before enabling. */
-                        if (s->enabled == SD_EVENT_OFF) {
-                                if (!need_signal(s->event, SIGCHLD)) {
-                                        assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
 
-                                        r = event_update_signal_fd(s->event);
+                        if (s->enabled == SD_EVENT_OFF)
+                                s->event->n_enabled_child_sources++;
+
+                        s->enabled = m;
+
+                        r = event_make_signal_data(s->event, SIGCHLD, NULL);
                                         if (r < 0) {
                                                 s->enabled = SD_EVENT_OFF;
+                                s->event->n_enabled_child_sources--;
+                                event_gc_signal_data(s->event, &s->priority, SIGCHLD);
                                                 return r;
                                         }
-                                }
-
-                                s->event->n_enabled_child_sources++;
-                        }
 
-                        s->enabled = m;
                         break;
 
                 case SOURCE_EXIT:
@@ -1662,8 +1785,7 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
         assert_return(s, -EINVAL);
         assert_return(usec, -EINVAL);
@@ -1752,8 +1874,7 @@ _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
         assert_return(s, NULL);
 
@@ -2052,20 +2173,35 @@ static int process_child(sd_event *e) {
         return 0;
 }
 
-static int process_signal(sd_event *e, uint32_t events) {
+static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
         bool read_one = false;
         int r;
 
         assert(e);
-
         assert_return(events == EPOLLIN, -EIO);
 
+        /* If there's a signal queued on this priority and SIGCHLD is
+           on this priority too, then make sure to recheck the
+           children we watch. This is because we only ever dequeue
+           the first signal per priority, and if we dequeue one, and
+           SIGCHLD might be enqueued later we wouldn't know, but we
+           might have higher priority children we care about hence we
+           need to check that explicitly. */
+
+        if (sigismember(&d->sigset, SIGCHLD))
+                e->need_process_child = true;
+
+        /* If there's already an event source pending for this
+         * priority we don't read another */
+        if (d->current)
+                return 0;
+
         for (;;) {
                 struct signalfd_siginfo si;
                 ssize_t n;
                 sd_event_source *s = NULL;
 
-                n = read(e->signal_fd, &si, sizeof(si));
+                n = read(d->fd, &si, sizeof(si));
                 if (n < 0) {
                         if (errno == EAGAIN || errno == EINTR)
                                 return read_one;
@@ -2080,24 +2216,21 @@ static int process_signal(sd_event *e, uint32_t events) {
 
                 read_one = true;
 
-                if (si.ssi_signo == SIGCHLD) {
-                        r = process_child(e);
-                        if (r < 0)
-                                return r;
-                        if (r > 0)
-                                continue;
-                }
-
                 if (e->signal_sources)
                         s = e->signal_sources[si.ssi_signo];
-
                 if (!s)
                         continue;
+                if (s->pending)
+                        continue;
 
                 s->signal.siginfo = si;
+                d->current = s;
+
                 r = source_set_pending(s, true);
                 if (r < 0)
                         return r;
+
+                return 1;
         }
 }
 
@@ -2192,12 +2325,9 @@ static int source_dispatch(sd_event_source *s) {
 
         s->dispatching = false;
 
-        if (r < 0) {
-                if (s->description)
-                        log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
-                else
-                        log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
-        }
+        if (r < 0)
+                log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
+                                strna(s->description), event_source_type_to_string(s->type));
 
         if (s->n_ref == 0)
                 source_free(s);
@@ -2230,12 +2360,9 @@ static int event_prepare(sd_event *e) {
                 r = s->prepare(s, s->userdata);
                 s->dispatching = false;
 
-                if (r < 0) {
-                        if (s->description)
-                                log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
-                        else
-                                log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
-                }
+                if (r < 0)
+                        log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
+                                        strna(s->description), event_source_type_to_string(s->type));
 
                 if (s->n_ref == 0)
                         source_free(s);
@@ -2340,7 +2467,9 @@ _public_ int sd_event_prepare(sd_event *e) {
 
         e->iteration++;
 
+        e->state = SD_EVENT_PREPARING;
         r = event_prepare(e);
+        e->state = SD_EVENT_INITIAL;
         if (r < 0)
                 return r;
 
@@ -2415,23 +2544,31 @@ _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
 
         for (i = 0; i < m; i++) {
 
-                if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
-                        r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
-                        r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
-                        r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
-                        r = flush_timer(e, e->realtime_alarm.fd, ev_queue[i].events, &e->realtime_alarm.next);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME_ALARM))
-                        r = flush_timer(e, e->boottime_alarm.fd, ev_queue[i].events, &e->boottime_alarm.next);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
-                        r = process_signal(e, ev_queue[i].events);
-                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+                if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
                         r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
-                else
+                else {
+                        WakeupType *t = ev_queue[i].data.ptr;
+
+                        switch (*t) {
+
+                        case WAKEUP_EVENT_SOURCE:
                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
+                                break;
 
+                        case WAKEUP_CLOCK_DATA: {
+                                struct clock_data *d = ev_queue[i].data.ptr;
+                                r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
+                                break;
+                        }
+
+                        case WAKEUP_SIGNAL_DATA:
+                                r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
+                                break;
+
+                        default:
+                                assert_not_reached("Invalid wake-up pointer");
+                        }
+                }
                 if (r < 0)
                         goto finish;
         }
@@ -2510,6 +2647,18 @@ _public_ int sd_event_dispatch(sd_event *e) {
         return 1;
 }
 
+static void event_log_delays(sd_event *e) {
+        char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
+        unsigned i;
+        int o;
+
+        for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
+                o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
+                e->delays[i] = 0;
+        }
+        log_debug("Event loop iterations: %.*s", o, b);
+}
+
 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         int r;
 
@@ -2518,11 +2667,30 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
 
+        if (e->profile_delays && e->last_run) {
+                usec_t this_run;
+                unsigned l;
+
+                this_run = now(CLOCK_MONOTONIC);
+
+                l = u64log2(this_run - e->last_run);
+                assert(l < sizeof(e->delays));
+                e->delays[l]++;
+
+                if (this_run - e->last_log >= 5*USEC_PER_SEC) {
+                        event_log_delays(e);
+                        e->last_log = this_run;
+                }
+        }
+
         r = sd_event_prepare(e);
         if (r == 0)
                 /* There was nothing? Then wait... */
                 r = sd_event_wait(e, timeout);
 
+        if (e->profile_delays)
+                e->last_run = now(CLOCK_MONOTONIC);
+
         if (r > 0) {
                 /* There's something now, then let's dispatch it */
                 r = sd_event_dispatch(e);
@@ -2535,8 +2703,7 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         return r;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_loop(sd_event *e) {
         int r;
 
@@ -2575,6 +2742,7 @@ _public_ int sd_event_get_state(sd_event *e) {
         return e->state;
 }
 
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
         assert_return(e, -EINVAL);
         assert_return(code, -EINVAL);
@@ -2586,6 +2754,7 @@ _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
         *code = e->exit_code;
         return 0;
 }
+#endif // 0
 
 _public_ int sd_event_exit(sd_event *e, int code) {
         assert_return(e, -EINVAL);
@@ -2598,12 +2767,17 @@ _public_ int sd_event_exit(sd_event *e, int code) {
         return 0;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
         assert_return(e, -EINVAL);
         assert_return(usec, -EINVAL);
         assert_return(!event_pid_changed(e), -ECHILD);
+        assert_return(IN_SET(clock,
+                             CLOCK_REALTIME,
+                             CLOCK_REALTIME_ALARM,
+                             CLOCK_MONOTONIC,
+                             CLOCK_BOOTTIME,
+                             CLOCK_BOOTTIME_ALARM), -EOPNOTSUPP);
 
         if (!dual_timestamp_is_set(&e->timestamp)) {
                 /* Implicitly fall back to now() if we never ran
@@ -2623,8 +2797,7 @@ _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
                 *usec = e->timestamp.monotonic;
                 break;
 
-        case CLOCK_BOOTTIME:
-        case CLOCK_BOOTTIME_ALARM:
+        default:
                 *usec = e->timestamp_boottime;
                 break;
         }
@@ -2659,6 +2832,7 @@ _public_ int sd_event_default(sd_event **ret) {
         return 1;
 }
 
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
         assert_return(e, -EINVAL);
         assert_return(tid, -EINVAL);
@@ -2671,6 +2845,7 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
 
         return -ENXIO;
 }
+#endif // 0
 
 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
         int r;
@@ -2724,8 +2899,7 @@ fail:
         return r;
 }
 
-/// UNNEEDED by elogind
-#if 0
+#if 0 /// UNNEEDED by elogind
 _public_ int sd_event_get_watchdog(sd_event *e) {
         assert_return(e, -EINVAL);
         assert_return(!event_pid_changed(e), -ECHILD);