chiark / gitweb /
sd-event: fix typo
[elogind.git] / src / libsystemd / sd-event / sd-event.c
index a4b67431ef66ffb97cb17a6afb065d6aa898ebf1..a270849cd7d914a829bcd5076130a4937d8cf389 100644 (file)
@@ -22,7 +22,6 @@
 #include <sys/epoll.h>
 #include <sys/timerfd.h>
 #include <sys/wait.h>
-#include <pthread.h>
 
 #include "sd-id128.h"
 #include "sd-daemon.h"
 #include "time-util.h"
 #include "missing.h"
 #include "set.h"
+#include "list.h"
 
 #include "sd-event.h"
 
-#define EPOLL_QUEUE_MAX 512U
 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
 
 typedef enum EventSourceType {
         SOURCE_IO,
         SOURCE_TIME_REALTIME,
+        SOURCE_TIME_BOOTTIME,
         SOURCE_TIME_MONOTONIC,
         SOURCE_TIME_REALTIME_ALARM,
         SOURCE_TIME_BOOTTIME_ALARM,
@@ -51,11 +51,11 @@ typedef enum EventSourceType {
         SOURCE_POST,
         SOURCE_EXIT,
         SOURCE_WATCHDOG,
-        _SOUFCE_EVENT_SOURCE_TYPE_MAX,
+        _SOURCE_EVENT_SOURCE_TYPE_MAX,
         _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
 } EventSourceType;
 
-#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
+#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
 
 struct sd_event_source {
         unsigned n_ref;
@@ -64,10 +64,13 @@ struct sd_event_source {
         void *userdata;
         sd_event_handler_t prepare;
 
+        char *description;
+
         EventSourceType type:5;
         int enabled:3;
         bool pending:1;
         bool dispatching:1;
+        bool floating:1;
 
         int64_t priority;
         unsigned pending_index;
@@ -75,6 +78,8 @@ struct sd_event_source {
         unsigned pending_iteration;
         unsigned prepare_iteration;
 
+        LIST_FIELDS(sd_event_source, sources);
+
         union {
                 struct {
                         sd_event_io_handler_t callback;
@@ -126,6 +131,8 @@ struct clock_data {
         Prioq *earliest;
         Prioq *latest;
         usec_t next;
+
+        bool needs_rearm:1;
 };
 
 struct sd_event {
@@ -138,10 +145,11 @@ struct sd_event {
         Prioq *pending;
         Prioq *prepare;
 
-        /* timerfd_create() only supports these four clocks so far. We
+        /* timerfd_create() only supports these five clocks so far. We
          * can add support for more clocks when the kernel learns to
          * deal with them, too. */
         struct clock_data realtime;
+        struct clock_data boottime;
         struct clock_data monotonic;
         struct clock_data realtime_alarm;
         struct clock_data boottime_alarm;
@@ -177,8 +185,12 @@ struct sd_event {
         usec_t watchdog_last, watchdog_period;
 
         unsigned n_sources;
+
+        LIST_HEAD(sd_event_source, sources);
 };
 
+static void source_disconnect(sd_event_source *s);
+
 static int pending_prioq_compare(const void *a, const void *b) {
         const sd_event_source *x = a, *y = b;
 
@@ -349,7 +361,16 @@ static void free_clock_data(struct clock_data *d) {
 }
 
 static void event_free(sd_event *e) {
+        sd_event_source *s;
+
         assert(e);
+
+        while ((s = e->sources)) {
+                assert(s->floating);
+                source_disconnect(s);
+                sd_event_source_unref(s);
+        }
+
         assert(e->n_sources == 0);
 
         if (e->default_event_ptr)
@@ -360,6 +381,7 @@ static void event_free(sd_event *e) {
         safe_close(e->watchdog_fd);
 
         free_clock_data(&e->realtime);
+        free_clock_data(&e->boottime);
         free_clock_data(&e->monotonic);
         free_clock_data(&e->realtime_alarm);
         free_clock_data(&e->boottime_alarm);
@@ -386,10 +408,10 @@ _public_ int sd_event_new(sd_event** ret) {
                 return -ENOMEM;
 
         e->n_ref = 1;
-        e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
-        e->realtime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = (usec_t) -1;
+        e->signal_fd = e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
+        e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
         e->original_pid = getpid();
-        e->perturb = (usec_t) -1;
+        e->perturb = USEC_INFINITY;
 
         assert_se(sigemptyset(&e->sigset) == 0);
 
@@ -439,7 +461,7 @@ _public_ sd_event* sd_event_unref(sd_event *e) {
 static bool event_pid_changed(sd_event *e) {
         assert(e);
 
-        /* We don't support people creating am event loop and keeping
+        /* We don't support people creating an event loop and keeping
          * it around over a fork(). Let's complain. */
 
         return e->original_pid != getpid();
@@ -500,6 +522,9 @@ static clockid_t event_source_type_to_clock(EventSourceType t) {
         case SOURCE_TIME_REALTIME:
                 return CLOCK_REALTIME;
 
+        case SOURCE_TIME_BOOTTIME:
+                return CLOCK_BOOTTIME;
+
         case SOURCE_TIME_MONOTONIC:
                 return CLOCK_MONOTONIC;
 
@@ -521,6 +546,9 @@ static EventSourceType clock_to_event_source_type(clockid_t clock) {
         case CLOCK_REALTIME:
                 return SOURCE_TIME_REALTIME;
 
+        case CLOCK_BOOTTIME:
+                return SOURCE_TIME_BOOTTIME;
+
         case CLOCK_MONOTONIC:
                 return SOURCE_TIME_MONOTONIC;
 
@@ -543,6 +571,9 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
         case SOURCE_TIME_REALTIME:
                 return &e->realtime;
 
+        case SOURCE_TIME_BOOTTIME:
+                return &e->boottime;
+
         case SOURCE_TIME_MONOTONIC:
                 return &e->monotonic;
 
@@ -557,86 +588,154 @@ static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
         }
 }
 
-static void source_free(sd_event_source *s) {
+static bool need_signal(sd_event *e, int signal) {
+        return (e->signal_sources && e->signal_sources[signal] &&
+                e->signal_sources[signal]->enabled != SD_EVENT_OFF)
+                ||
+               (signal == SIGCHLD &&
+                e->n_enabled_child_sources > 0);
+}
+
+static int event_update_signal_fd(sd_event *e) {
+        struct epoll_event ev = {};
+        bool add_to_epoll;
+        int r;
+
+        assert(e);
+
+        add_to_epoll = e->signal_fd < 0;
+
+        r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
+        if (r < 0)
+                return -errno;
+
+        e->signal_fd = r;
+
+        if (!add_to_epoll)
+                return 0;
+
+        ev.events = EPOLLIN;
+        ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
+
+        r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
+        if (r < 0) {
+                e->signal_fd = safe_close(e->signal_fd);
+                return -errno;
+        }
+
+        return 0;
+}
+
+static void source_disconnect(sd_event_source *s) {
+        sd_event *event;
+
         assert(s);
 
-        if (s->event) {
-                assert(s->event->n_sources > 0);
+        if (!s->event)
+                return;
 
-                switch (s->type) {
+        assert(s->event->n_sources > 0);
 
-                case SOURCE_IO:
-                        if (s->io.fd >= 0)
-                                source_io_unregister(s);
+        switch (s->type) {
 
-                        break;
+        case SOURCE_IO:
+                if (s->io.fd >= 0)
+                        source_io_unregister(s);
 
-                case SOURCE_TIME_REALTIME:
-                case SOURCE_TIME_MONOTONIC:
-                case SOURCE_TIME_REALTIME_ALARM:
-                case SOURCE_TIME_BOOTTIME_ALARM: {
-                        struct clock_data *d;
+                break;
 
-                        d = event_get_clock_data(s->event, s->type);
-                        assert(d);
+        case SOURCE_TIME_REALTIME:
+        case SOURCE_TIME_BOOTTIME:
+        case SOURCE_TIME_MONOTONIC:
+        case SOURCE_TIME_REALTIME_ALARM:
+        case SOURCE_TIME_BOOTTIME_ALARM: {
+                struct clock_data *d;
 
-                        prioq_remove(d->earliest, s, &s->time.earliest_index);
-                        prioq_remove(d->latest, s, &s->time.latest_index);
-                        break;
-                }
+                d = event_get_clock_data(s->event, s->type);
+                assert(d);
 
-                case SOURCE_SIGNAL:
-                        if (s->signal.sig > 0) {
-                                if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
-                                        assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
+                prioq_remove(d->earliest, s, &s->time.earliest_index);
+                prioq_remove(d->latest, s, &s->time.latest_index);
+                d->needs_rearm = true;
+                break;
+        }
 
-                                if (s->event->signal_sources)
-                                        s->event->signal_sources[s->signal.sig] = NULL;
+        case SOURCE_SIGNAL:
+                if (s->signal.sig > 0) {
+                        if (s->event->signal_sources)
+                                s->event->signal_sources[s->signal.sig] = NULL;
+
+                        /* If the signal was on and now it is off... */
+                        if (s->enabled != SD_EVENT_OFF && !need_signal(s->event, s->signal.sig)) {
+                                assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
+
+                                (void) event_update_signal_fd(s->event);
+                                /* If disabling failed, we might get a spurious event,
+                                 * but otherwise nothing bad should happen. */
                         }
+                }
 
-                        break;
+                break;
 
-                case SOURCE_CHILD:
-                        if (s->child.pid > 0) {
-                                if (s->enabled != SD_EVENT_OFF) {
-                                        assert(s->event->n_enabled_child_sources > 0);
-                                        s->event->n_enabled_child_sources--;
-                                }
+        case SOURCE_CHILD:
+                if (s->child.pid > 0) {
+                        if (s->enabled != SD_EVENT_OFF) {
+                                assert(s->event->n_enabled_child_sources > 0);
+                                s->event->n_enabled_child_sources--;
 
-                                if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
+                                /* We know the signal was on, if it is off now... */
+                                if (!need_signal(s->event, SIGCHLD)) {
                                         assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
 
-                                hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
+                                        (void) event_update_signal_fd(s->event);
+                                        /* If disabling failed, we might get a spurious event,
+                                         * but otherwise nothing bad should happen. */
+                                }
                         }
 
-                        break;
+                        hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
+                }
 
-                case SOURCE_DEFER:
-                        /* nothing */
-                        break;
+                break;
 
-                case SOURCE_POST:
-                        set_remove(s->event->post_sources, s);
-                        break;
+        case SOURCE_DEFER:
+                /* nothing */
+                break;
 
-                case SOURCE_EXIT:
-                        prioq_remove(s->event->exit, s, &s->exit.prioq_index);
-                        break;
+        case SOURCE_POST:
+                set_remove(s->event->post_sources, s);
+                break;
 
-                default:
-                        assert_not_reached("Wut? I shouldn't exist.");
-                }
+        case SOURCE_EXIT:
+                prioq_remove(s->event->exit, s, &s->exit.prioq_index);
+                break;
 
-                if (s->pending)
-                        prioq_remove(s->event->pending, s, &s->pending_index);
+        default:
+                assert_not_reached("Wut? I shouldn't exist.");
+        }
 
-                if (s->prepare)
-                        prioq_remove(s->event->prepare, s, &s->prepare_index);
+        if (s->pending)
+                prioq_remove(s->event->pending, s, &s->pending_index);
 
-                s->event->n_sources--;
-                sd_event_unref(s->event);
-        }
+        if (s->prepare)
+                prioq_remove(s->event->prepare, s, &s->prepare_index);
+
+        event = s->event;
+
+        s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
+        s->event = NULL;
+        LIST_REMOVE(sources, event->sources, s);
+        event->n_sources--;
 
+        if (!s->floating)
+                sd_event_unref(event);
+}
+
+static void source_free(sd_event_source *s) {
+        assert(s);
+
+        source_disconnect(s);
+        free(s->description);
         free(s);
 }
 
@@ -670,12 +769,13 @@ static int source_set_pending(sd_event_source *s, bool b) {
 
                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
+                d->needs_rearm = true;
         }
 
         return 0;
 }
 
-static sd_event_source *source_new(sd_event *e, EventSourceType type) {
+static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
         sd_event_source *s;
 
         assert(e);
@@ -685,10 +785,15 @@ static sd_event_source *source_new(sd_event *e, EventSourceType type) {
                 return NULL;
 
         s->n_ref = 1;
-        s->event = sd_event_ref(e);
+        s->event = e;
+        s->floating = floating;
         s->type = type;
         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
 
+        if (!floating)
+                sd_event_ref(e);
+
+        LIST_PREPEND(sources, e->sources, s);
         e->n_sources ++;
 
         return s;
@@ -709,11 +814,10 @@ _public_ int sd_event_add_io(
         assert_return(fd >= 0, -EINVAL);
         assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
         assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        s = source_new(e, SOURCE_IO);
+        s = source_new(e, !ret, SOURCE_IO);
         if (!s)
                 return -ENOMEM;
 
@@ -726,19 +830,39 @@ _public_ int sd_event_add_io(
         r = source_io_register(s, s->enabled, events);
         if (r < 0) {
                 source_free(s);
-                return -errno;
+                return r;
         }
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
+static void initialize_perturb(sd_event *e) {
+        sd_id128_t bootid = {};
+
+        /* When we sleep for longer, we try to realign the wakeup to
+           the same time wihtin each minute/second/250ms, so that
+           events all across the system can be coalesced into a single
+           CPU wakeup. However, let's take some system-specific
+           randomness for this value, so that in a network of systems
+           with synced clocks timer events are distributed a
+           bit. Here, we calculate a perturbation usec offset from the
+           boot ID. */
+
+        if (_likely_(e->perturb != USEC_INFINITY))
+                return;
+
+        if (sd_id128_get_boot(&bootid) >= 0)
+                e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
+}
+
 static int event_setup_timer_fd(
                 sd_event *e,
                 struct clock_data *d,
                 clockid_t clock) {
 
-        sd_id128_t bootid = {};
         struct epoll_event ev = {};
         int r, fd;
 
@@ -762,21 +886,13 @@ static int event_setup_timer_fd(
         }
 
         d->fd = fd;
+        return 0;
+}
 
-        /* When we sleep for longer, we try to realign the wakeup to
-           the same time wihtin each minute/second/250ms, so that
-           events all across the system can be coalesced into a single
-           CPU wakeup. However, let's take some system-specific
-           randomness for this value, so that in a network of systems
-           with synced clocks timer events are distributed a
-           bit. Here, we calculate a perturbation usec offset from the
-           boot ID. */
-
-        if (e->perturb == (usec_t) -1)
-                if (sd_id128_get_boot(&bootid) >= 0)
-                        e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
+static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
+        assert(s);
 
-        return 0;
+        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
 }
 
 _public_ int sd_event_add_time(
@@ -794,13 +910,14 @@ _public_ int sd_event_add_time(
         int r;
 
         assert_return(e, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(usec != (uint64_t) -1, -EINVAL);
         assert_return(accuracy != (uint64_t) -1, -EINVAL);
-        assert_return(callback, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
+        if (!callback)
+                callback = time_exit_callback;
+
         type = clock_to_event_source_type(clock);
         assert_return(type >= 0, -ENOTSUP);
 
@@ -825,7 +942,7 @@ _public_ int sd_event_add_time(
                         return r;
         }
 
-        s = source_new(e, type);
+        s = source_new(e, !ret, type);
         if (!s)
                 return -ENOMEM;
 
@@ -836,6 +953,8 @@ _public_ int sd_event_add_time(
         s->userdata = userdata;
         s->enabled = SD_EVENT_ONESHOT;
 
+        d->needs_rearm = true;
+
         r = prioq_put(d->earliest, s, &s->time.earliest_index);
         if (r < 0)
                 goto fail;
@@ -844,7 +963,9 @@ _public_ int sd_event_add_time(
         if (r < 0)
                 goto fail;
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 
 fail:
@@ -852,34 +973,10 @@ fail:
         return r;
 }
 
-static int event_update_signal_fd(sd_event *e) {
-        struct epoll_event ev = {};
-        bool add_to_epoll;
-        int r;
-
-        assert(e);
-
-        add_to_epoll = e->signal_fd < 0;
-
-        r = signalfd(e->signal_fd, &e->sigset, SFD_NONBLOCK|SFD_CLOEXEC);
-        if (r < 0)
-                return -errno;
-
-        e->signal_fd = r;
-
-        if (!add_to_epoll)
-                return 0;
-
-        ev.events = EPOLLIN;
-        ev.data.ptr = INT_TO_PTR(SOURCE_SIGNAL);
-
-        r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->signal_fd, &ev);
-        if (r < 0) {
-                e->signal_fd = safe_close(e->signal_fd);
-                return -errno;
-        }
+static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+        assert(s);
 
-        return 0;
+        return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
 }
 
 _public_ int sd_event_add_signal(
@@ -892,15 +989,17 @@ _public_ int sd_event_add_signal(
         sd_event_source *s;
         sigset_t ss;
         int r;
+        bool previous;
 
         assert_return(e, -EINVAL);
         assert_return(sig > 0, -EINVAL);
         assert_return(sig < _NSIG, -EINVAL);
-        assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
+        if (!callback)
+                callback = signal_exit_callback;
+
         r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
         if (r < 0)
                 return -errno;
@@ -915,7 +1014,9 @@ _public_ int sd_event_add_signal(
         } else if (e->signal_sources[sig])
                 return -EBUSY;
 
-        s = source_new(e, SOURCE_SIGNAL);
+        previous = need_signal(e, sig);
+
+        s = source_new(e, !ret, SOURCE_SIGNAL);
         if (!s)
                 return -ENOMEM;
 
@@ -925,9 +1026,10 @@ _public_ int sd_event_add_signal(
         s->enabled = SD_EVENT_ON;
 
         e->signal_sources[sig] = s;
-        assert_se(sigaddset(&e->sigset, sig) == 0);
 
-        if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
+        if (!previous) {
+                assert_se(sigaddset(&e->sigset, sig) == 0);
+
                 r = event_update_signal_fd(e);
                 if (r < 0) {
                         source_free(s);
@@ -935,7 +1037,12 @@ _public_ int sd_event_add_signal(
                 }
         }
 
-        *ret = s;
+        /* Use the signal name as description for the event source by default */
+        (void) sd_event_source_set_description(s, signal_to_string(sig));
+
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
@@ -949,24 +1056,26 @@ _public_ int sd_event_add_child(
 
         sd_event_source *s;
         int r;
+        bool previous;
 
         assert_return(e, -EINVAL);
         assert_return(pid > 1, -EINVAL);
         assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
         assert_return(options != 0, -EINVAL);
         assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
+        r = hashmap_ensure_allocated(&e->child_sources, NULL);
         if (r < 0)
                 return r;
 
         if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
                 return -EBUSY;
 
-        s = source_new(e, SOURCE_CHILD);
+        previous = need_signal(e, SIGCHLD);
+
+        s = source_new(e, !ret, SOURCE_CHILD);
         if (!s)
                 return -ENOMEM;
 
@@ -984,19 +1093,21 @@ _public_ int sd_event_add_child(
 
         e->n_enabled_child_sources ++;
 
-        assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
+        if (!previous) {
+                assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
 
-        if (!e->signal_sources || !e->signal_sources[SIGCHLD]) {
                 r = event_update_signal_fd(e);
                 if (r < 0) {
                         source_free(s);
-                        return -errno;
+                        return r;
                 }
         }
 
         e->need_process_child = true;
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
@@ -1011,11 +1122,10 @@ _public_ int sd_event_add_defer(
 
         assert_return(e, -EINVAL);
         assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        s = source_new(e, SOURCE_DEFER);
+        s = source_new(e, !ret, SOURCE_DEFER);
         if (!s)
                 return -ENOMEM;
 
@@ -1029,7 +1139,9 @@ _public_ int sd_event_add_defer(
                 return r;
         }
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
@@ -1044,15 +1156,14 @@ _public_ int sd_event_add_post(
 
         assert_return(e, -EINVAL);
         assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        r = set_ensure_allocated(&e->post_sources, trivial_hash_func, trivial_compare_func);
+        r = set_ensure_allocated(&e->post_sources, NULL);
         if (r < 0)
                 return r;
 
-        s = source_new(e, SOURCE_POST);
+        s = source_new(e, !ret, SOURCE_POST);
         if (!s)
                 return -ENOMEM;
 
@@ -1066,7 +1177,9 @@ _public_ int sd_event_add_post(
                 return r;
         }
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
@@ -1081,7 +1194,6 @@ _public_ int sd_event_add_exit(
 
         assert_return(e, -EINVAL);
         assert_return(callback, -EINVAL);
-        assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
@@ -1091,7 +1203,7 @@ _public_ int sd_event_add_exit(
                         return -ENOMEM;
         }
 
-        s = source_new(e, SOURCE_EXIT);
+        s = source_new(e, !ret, SOURCE_EXIT);
         if (!s)
                 return -ENOMEM;
 
@@ -1106,7 +1218,9 @@ _public_ int sd_event_add_exit(
                 return r;
         }
 
-        *ret = s;
+        if (ret)
+                *ret = s;
+
         return 0;
 }
 
@@ -1139,6 +1253,8 @@ _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
                 if (s->dispatching) {
                         if (s->type == SOURCE_IO)
                                 source_io_unregister(s);
+
+                        source_disconnect(s);
                 } else
                         source_free(s);
         }
@@ -1146,6 +1262,23 @@ _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
         return NULL;
 }
 
+_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
+        assert_return(s, -EINVAL);
+        assert_return(!event_pid_changed(s->event), -ECHILD);
+
+        return free_and_strdup(&s->description, description);
+}
+
+_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
+        assert_return(s, -EINVAL);
+        assert_return(description, -EINVAL);
+        assert_return(s->description, -ENXIO);
+        assert_return(!event_pid_changed(s->event), -ECHILD);
+
+        *description = s->description;
+        return 0;
+}
+
 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
         assert_return(s, NULL);
 
@@ -1224,7 +1357,8 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events)
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
-        if (s->io.events == events)
+        /* edge-triggered updates are never skipped, so we can reset edges */
+        if (s->io.events == events && !(events & EPOLLET))
                 return 0;
 
         if (s->enabled != SD_EVENT_OFF) {
@@ -1301,9 +1435,13 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
 
         assert_return(s, -EINVAL);
         assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
-        assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
+        /* If we are dead anyway, we are fine with turning off
+         * sources, but everything else needs to fail. */
+        if (s->event->state == SD_EVENT_FINISHED)
+                return m == SD_EVENT_OFF ? 0 : -ESTALE;
+
         if (s->enabled == m)
                 return 0;
 
@@ -1320,6 +1458,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
                         break;
 
                 case SOURCE_TIME_REALTIME:
+                case SOURCE_TIME_BOOTTIME:
                 case SOURCE_TIME_MONOTONIC:
                 case SOURCE_TIME_REALTIME_ALARM:
                 case SOURCE_TIME_BOOTTIME_ALARM: {
@@ -1331,27 +1470,37 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
 
                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
+                        d->needs_rearm = true;
                         break;
                 }
 
                 case SOURCE_SIGNAL:
+                        assert(need_signal(s->event, s->signal.sig));
+
                         s->enabled = m;
-                        if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
+
+                        if (!need_signal(s->event, s->signal.sig)) {
                                 assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
-                                event_update_signal_fd(s->event);
+
+                                (void) event_update_signal_fd(s->event);
+                                /* If disabling failed, we might get a spurious event,
+                                 * but otherwise nothing bad should happen. */
                         }
 
                         break;
 
                 case SOURCE_CHILD:
+                        assert(need_signal(s->event, SIGCHLD));
+
                         s->enabled = m;
 
                         assert(s->event->n_enabled_child_sources > 0);
                         s->event->n_enabled_child_sources--;
 
-                        if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
+                        if (!need_signal(s->event, SIGCHLD)) {
                                 assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
-                                event_update_signal_fd(s->event);
+
+                                (void) event_update_signal_fd(s->event);
                         }
 
                         break;
@@ -1382,6 +1531,7 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
                         break;
 
                 case SOURCE_TIME_REALTIME:
+                case SOURCE_TIME_BOOTTIME:
                 case SOURCE_TIME_MONOTONIC:
                 case SOURCE_TIME_REALTIME_ALARM:
                 case SOURCE_TIME_BOOTTIME_ALARM: {
@@ -1393,26 +1543,39 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
 
                         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
                         prioq_reshuffle(d->latest, s, &s->time.latest_index);
+                        d->needs_rearm = true;
                         break;
                 }
 
                 case SOURCE_SIGNAL:
-                        s->enabled = m;
-
-                        if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)  {
+                        /* Check status before enabling. */
+                        if (!need_signal(s->event, s->signal.sig)) {
                                 assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
-                                event_update_signal_fd(s->event);
+
+                                r = event_update_signal_fd(s->event);
+                                if (r < 0) {
+                                        s->enabled = SD_EVENT_OFF;
+                                        return r;
+                                }
                         }
+
+                        s->enabled = m;
                         break;
 
                 case SOURCE_CHILD:
+                        /* Check status before enabling. */
                         if (s->enabled == SD_EVENT_OFF) {
-                                s->event->n_enabled_child_sources++;
-
-                                if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
-                                        assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
-                                        event_update_signal_fd(s->event);
+                                if (!need_signal(s->event, SIGCHLD)) {
+                                        assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
+
+                                        r = event_update_signal_fd(s->event);
+                                        if (r < 0) {
+                                                s->enabled = SD_EVENT_OFF;
+                                                return r;
+                                        }
                                 }
+
+                                s->event->n_enabled_child_sources++;
                         }
 
                         s->enabled = m;
@@ -1470,6 +1633,7 @@ _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
 
         prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
         prioq_reshuffle(d->latest, s, &s->time.latest_index);
+        d->needs_rearm = true;
 
         return 0;
 }
@@ -1504,6 +1668,7 @@ _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec
         assert(d);
 
         prioq_reshuffle(d->latest, s, &s->time.latest_index);
+        d->needs_rearm = true;
 
         return 0;
 }
@@ -1588,6 +1753,8 @@ static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
         if (b <= a + 1)
                 return a;
 
+        initialize_perturb(e);
+
         /*
           Find a good time to wake up again between times a and b. We
           have two goals here:
@@ -1665,13 +1832,18 @@ static int event_arm_timer(
         assert(e);
         assert(d);
 
+        if (!d->needs_rearm)
+                return 0;
+        else
+                d->needs_rearm = false;
+
         a = prioq_peek(d->earliest);
         if (!a || a->enabled == SD_EVENT_OFF) {
 
                 if (d->fd < 0)
                         return 0;
 
-                if (d->next == (usec_t) -1)
+                if (d->next == USEC_INFINITY)
                         return 0;
 
                 /* disarm */
@@ -1679,7 +1851,7 @@ static int event_arm_timer(
                 if (r < 0)
                         return r;
 
-                d->next = (usec_t) -1;
+                d->next = USEC_INFINITY;
                 return 0;
         }
 
@@ -1747,7 +1919,7 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
                 return -EIO;
 
         if (next)
-                *next = (usec_t) -1;
+                *next = USEC_INFINITY;
 
         return 0;
 }
@@ -1777,6 +1949,7 @@ static int process_timer(
 
                 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
                 prioq_reshuffle(d->latest, s, &s->time.latest_index);
+                d->needs_rearm = true;
         }
 
         return 0;
@@ -1854,38 +2027,42 @@ static int process_signal(sd_event *e, uint32_t events) {
         int r;
 
         assert(e);
-        assert(e->signal_sources);
 
         assert_return(events == EPOLLIN, -EIO);
 
         for (;;) {
                 struct signalfd_siginfo si;
-                ssize_t ss;
-                sd_event_source *s;
+                ssize_t n;
+                sd_event_source *s = NULL;
 
-                ss = read(e->signal_fd, &si, sizeof(si));
-                if (ss < 0) {
+                n = read(e->signal_fd, &si, sizeof(si));
+                if (n < 0) {
                         if (errno == EAGAIN || errno == EINTR)
                                 return read_one;
 
                         return -errno;
                 }
 
-                if (_unlikely_(ss != sizeof(si)))
+                if (_unlikely_(n != sizeof(si)))
                         return -EIO;
 
+                assert(si.ssi_signo < _NSIG);
+
                 read_one = true;
 
-                s = e->signal_sources[si.ssi_signo];
                 if (si.ssi_signo == SIGCHLD) {
                         r = process_child(e);
                         if (r < 0)
                                 return r;
-                        if (r > 0 || !s)
+                        if (r > 0)
                                 continue;
-                } else
-                        if (!s)
-                                return -EIO;
+                }
+
+                if (e->signal_sources)
+                        s = e->signal_sources[si.ssi_signo];
+
+                if (!s)
+                        continue;
 
                 s->signal.siginfo = si;
                 r = source_set_pending(s, true);
@@ -1938,6 +2115,7 @@ static int source_dispatch(sd_event_source *s) {
                 break;
 
         case SOURCE_TIME_REALTIME:
+        case SOURCE_TIME_BOOTTIME:
         case SOURCE_TIME_MONOTONIC:
         case SOURCE_TIME_REALTIME_ALARM:
         case SOURCE_TIME_BOOTTIME_ALARM:
@@ -1977,13 +2155,19 @@ static int source_dispatch(sd_event_source *s) {
                 break;
 
         case SOURCE_WATCHDOG:
+        case _SOURCE_EVENT_SOURCE_TYPE_MAX:
+        case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
                 assert_not_reached("Wut? I shouldn't exist.");
         }
 
         s->dispatching = false;
 
-        if (r < 0)
-                log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
+        if (r < 0) {
+                if (s->description)
+                        log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
+                else
+                        log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
+        }
 
         if (s->n_ref == 0)
                 source_free(s);
@@ -2016,8 +2200,12 @@ static int event_prepare(sd_event *e) {
                 r = s->prepare(s, s->userdata);
                 s->dispatching = false;
 
-                if (r < 0)
-                        log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
+                if (r < 0) {
+                        if (s->description)
+                                log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
+                        else
+                                log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
+                }
 
                 if (s->n_ref == 0)
                         source_free(s);
@@ -2081,6 +2269,11 @@ static int arm_watchdog(sd_event *e) {
 
         timespec_store(&its.it_value, t);
 
+        /* Make sure we never set the watchdog to 0, which tells the
+         * kernel to disable it. */
+        if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
+                its.it_value.tv_nsec = 1;
+
         r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
         if (r < 0)
                 return -errno;
@@ -2104,11 +2297,8 @@ static int process_watchdog(sd_event *e) {
         return arm_watchdog(e);
 }
 
-_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
-        struct epoll_event *ev_queue;
-        unsigned ev_queue_max;
-        sd_event_source *p;
-        int r, i, m;
+_public_ int sd_event_prepare(sd_event *e) {
+        int r;
 
         assert_return(e, -EINVAL);
         assert_return(!event_pid_changed(e), -ECHILD);
@@ -2116,42 +2306,78 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
 
         if (e->exit_requested)
-                return dispatch_exit(e);
+                goto pending;
 
-        sd_event_ref(e);
         e->iteration++;
-        e->state = SD_EVENT_RUNNING;
 
         r = event_prepare(e);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = event_arm_timer(e, &e->realtime);
         if (r < 0)
-                goto finish;
+                return r;
+
+        r = event_arm_timer(e, &e->boottime);
+        if (r < 0)
+                return r;
 
         r = event_arm_timer(e, &e->monotonic);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = event_arm_timer(e, &e->realtime_alarm);
         if (r < 0)
-                goto finish;
+                return r;
 
         r = event_arm_timer(e, &e->boottime_alarm);
         if (r < 0)
-                goto finish;
+                return r;
 
         if (event_next_pending(e) || e->need_process_child)
-                timeout = 0;
+                goto pending;
+
+        e->state = SD_EVENT_PREPARED;
+
+        return 0;
+
+pending:
+        e->state = SD_EVENT_PREPARED;
+        r = sd_event_wait(e, 0);
+        if (r == 0)
+                e->state = SD_EVENT_PREPARED;
+
+        return r;
+}
+
+_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
+        struct epoll_event *ev_queue;
+        unsigned ev_queue_max;
+        int r, m, i;
 
-        ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+        assert_return(e->state == SD_EVENT_PREPARED, -EBUSY);
+
+        if (e->exit_requested) {
+                e->state = SD_EVENT_PENDING;
+                return 1;
+        }
+
+        ev_queue_max = MAX(e->n_sources, 1u);
         ev_queue = newa(struct epoll_event, ev_queue_max);
 
         m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
         if (m < 0) {
-                r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
+                if (errno == EINTR) {
+                        e->state = SD_EVENT_PENDING;
+                        return 1;
+                }
+
+                r = -errno;
+
                 goto finish;
         }
 
@@ -2162,6 +2388,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
 
                 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME))
                         r = flush_timer(e, e->realtime.fd, ev_queue[i].events, &e->realtime.next);
+                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_BOOTTIME))
+                        r = flush_timer(e, e->boottime.fd, ev_queue[i].events, &e->boottime.next);
                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_MONOTONIC))
                         r = flush_timer(e, e->monotonic.fd, ev_queue[i].events, &e->monotonic.next);
                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_TIME_REALTIME_ALARM))
@@ -2187,6 +2415,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         if (r < 0)
                 goto finish;
 
+        r = process_timer(e, e->timestamp_boottime, &e->boottime);
+        if (r < 0)
+                goto finish;
+
         r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
         if (r < 0)
                 goto finish;
@@ -2205,21 +2437,71 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
                         goto finish;
         }
 
-        p = event_next_pending(e);
-        if (!p) {
-                r = 1;
-                goto finish;
+        if (event_next_pending(e)) {
+                e->state = SD_EVENT_PENDING;
+
+                return 1;
         }
 
-        r = source_dispatch(p);
+        r = 0;
 
 finish:
         e->state = SD_EVENT_PASSIVE;
-        sd_event_unref(e);
 
         return r;
 }
 
+_public_ int sd_event_dispatch(sd_event *e) {
+        sd_event_source *p;
+        int r;
+
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+        assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
+
+        if (e->exit_requested)
+                return dispatch_exit(e);
+
+        p = event_next_pending(e);
+        if (p) {
+                sd_event_ref(e);
+
+                e->state = SD_EVENT_RUNNING;
+                r = source_dispatch(p);
+                e->state = SD_EVENT_PASSIVE;
+
+                sd_event_unref(e);
+
+                return r;
+        }
+
+        e->state = SD_EVENT_PASSIVE;
+
+        return 1;
+}
+
+_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
+        int r;
+
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+        assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+        assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
+
+        r = sd_event_prepare(e);
+        if (r > 0)
+                return sd_event_dispatch(e);
+        else if (r < 0)
+                return r;
+
+        r = sd_event_wait(e, timeout);
+        if (r > 0)
+                return sd_event_dispatch(e);
+        else
+                return r;
+}
+
 _public_ int sd_event_loop(sd_event *e) {
         int r;
 
@@ -2242,6 +2524,14 @@ finish:
         return r;
 }
 
+_public_ int sd_event_get_fd(sd_event *e) {
+
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+
+        return e->epoll_fd;
+}
+
 _public_ int sd_event_get_state(sd_event *e) {
         assert_return(e, -EINVAL);
         assert_return(!event_pid_changed(e), -ECHILD);
@@ -2292,6 +2582,7 @@ _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
                 *usec = e->timestamp.monotonic;
                 break;
 
+        case CLOCK_BOOTTIME:
         case CLOCK_BOOTTIME_ALARM:
                 *usec = e->timestamp_boottime;
                 break;