chiark / gitweb /
sd-event: fix return code of sd_event_run()
[elogind.git] / src / libsystemd-bus / sd-event.c
index 2a8eac6df2fdd3cb9401250f7affb41ec45f80d5..0317088af31b73b59d84345968df5518f206faf0 100644 (file)
@@ -24,6 +24,7 @@
 #include <sys/wait.h>
 
 #include "sd-id128.h"
 #include <sys/wait.h>
 
 #include "sd-id128.h"
+#include "sd-daemon.h"
 #include "macro.h"
 #include "prioq.h"
 #include "hashmap.h"
 #include "macro.h"
 #include "prioq.h"
 #include "hashmap.h"
@@ -33,7 +34,7 @@
 
 #include "sd-event.h"
 
 
 #include "sd-event.h"
 
-#define EPOLL_QUEUE_MAX 64
+#define EPOLL_QUEUE_MAX 512U
 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
 
 typedef enum EventSourceType {
 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
 
 typedef enum EventSourceType {
@@ -43,7 +44,8 @@ typedef enum EventSourceType {
         SOURCE_SIGNAL,
         SOURCE_CHILD,
         SOURCE_DEFER,
         SOURCE_SIGNAL,
         SOURCE_CHILD,
         SOURCE_DEFER,
-        SOURCE_QUIT
+        SOURCE_EXIT,
+        SOURCE_WATCHDOG
 } EventSourceType;
 
 struct sd_event_source {
 } EventSourceType;
 
 struct sd_event_source {
@@ -56,6 +58,7 @@ struct sd_event_source {
         EventSourceType type:4;
         int enabled:3;
         bool pending:1;
         EventSourceType type:4;
         int enabled:3;
         bool pending:1;
+        bool dispatching:1;
 
         int priority;
         unsigned pending_index;
 
         int priority;
         unsigned pending_index;
@@ -94,7 +97,7 @@ struct sd_event_source {
                 struct {
                         sd_event_handler_t callback;
                         unsigned prioq_index;
                 struct {
                         sd_event_handler_t callback;
                         unsigned prioq_index;
-                } quit;
+                } exit;
         };
 };
 
         };
 };
 
@@ -105,6 +108,7 @@ struct sd_event {
         int signal_fd;
         int realtime_fd;
         int monotonic_fd;
         int signal_fd;
         int realtime_fd;
         int monotonic_fd;
+        int watchdog_fd;
 
         Prioq *pending;
         Prioq *prepare;
 
         Prioq *pending;
         Prioq *prepare;
@@ -129,7 +133,7 @@ struct sd_event {
         Hashmap *child_sources;
         unsigned n_enabled_child_sources;
 
         Hashmap *child_sources;
         unsigned n_enabled_child_sources;
 
-        Prioq *quit;
+        Prioq *exit;
 
         pid_t original_pid;
 
 
         pid_t original_pid;
 
@@ -137,11 +141,18 @@ struct sd_event {
         dual_timestamp timestamp;
         int state;
 
         dual_timestamp timestamp;
         int state;
 
-        bool quit_requested:1;
+        bool exit_requested:1;
         bool need_process_child:1;
         bool need_process_child:1;
+        bool watchdog:1;
+
+        int exit_code;
 
         pid_t tid;
         sd_event **default_event_ptr;
 
         pid_t tid;
         sd_event **default_event_ptr;
+
+        usec_t watchdog_last, watchdog_period;
+
+        unsigned n_sources;
 };
 
 static int pending_prioq_compare(const void *a, const void *b) {
 };
 
 static int pending_prioq_compare(const void *a, const void *b) {
@@ -278,11 +289,11 @@ static int latest_time_prioq_compare(const void *a, const void *b) {
         return 0;
 }
 
         return 0;
 }
 
-static int quit_prioq_compare(const void *a, const void *b) {
+static int exit_prioq_compare(const void *a, const void *b) {
         const sd_event_source *x = a, *y = b;
 
         const sd_event_source *x = a, *y = b;
 
-        assert(x->type == SOURCE_QUIT);
-        assert(y->type == SOURCE_QUIT);
+        assert(x->type == SOURCE_EXIT);
+        assert(y->type == SOURCE_EXIT);
 
         /* Enabled ones first */
         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
 
         /* Enabled ones first */
         if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
@@ -307,6 +318,7 @@ static int quit_prioq_compare(const void *a, const void *b) {
 
 static void event_free(sd_event *e) {
         assert(e);
 
 static void event_free(sd_event *e) {
         assert(e);
+        assert(e->n_sources == 0);
 
         if (e->default_event_ptr)
                 *(e->default_event_ptr) = NULL;
 
         if (e->default_event_ptr)
                 *(e->default_event_ptr) = NULL;
@@ -323,13 +335,16 @@ static void event_free(sd_event *e) {
         if (e->monotonic_fd >= 0)
                 close_nointr_nofail(e->monotonic_fd);
 
         if (e->monotonic_fd >= 0)
                 close_nointr_nofail(e->monotonic_fd);
 
+        if (e->watchdog_fd >= 0)
+                close_nointr_nofail(e->watchdog_fd);
+
         prioq_free(e->pending);
         prioq_free(e->prepare);
         prioq_free(e->monotonic_earliest);
         prioq_free(e->monotonic_latest);
         prioq_free(e->realtime_earliest);
         prioq_free(e->realtime_latest);
         prioq_free(e->pending);
         prioq_free(e->prepare);
         prioq_free(e->monotonic_earliest);
         prioq_free(e->monotonic_latest);
         prioq_free(e->realtime_earliest);
         prioq_free(e->realtime_latest);
-        prioq_free(e->quit);
+        prioq_free(e->exit);
 
         free(e->signal_sources);
 
 
         free(e->signal_sources);
 
@@ -348,7 +363,7 @@ _public_ int sd_event_new(sd_event** ret) {
                 return -ENOMEM;
 
         e->n_ref = 1;
                 return -ENOMEM;
 
         e->n_ref = 1;
-        e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
+        e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
         e->realtime_next = e->monotonic_next = (usec_t) -1;
         e->original_pid = getpid();
 
         e->realtime_next = e->monotonic_next = (usec_t) -1;
         e->original_pid = getpid();
 
@@ -384,7 +399,9 @@ _public_ sd_event* sd_event_ref(sd_event *e) {
 }
 
 _public_ sd_event* sd_event_unref(sd_event *e) {
 }
 
 _public_ sd_event* sd_event_unref(sd_event *e) {
-        assert_return(e, NULL);
+
+        if (!e)
+                return NULL;
 
         assert(e->n_ref >= 1);
         e->n_ref--;
 
         assert(e->n_ref >= 1);
         e->n_ref--;
@@ -456,6 +473,8 @@ static void source_free(sd_event_source *s) {
         assert(s);
 
         if (s->event) {
         assert(s);
 
         if (s->event) {
+                assert(s->event->n_sources > 0);
+
                 switch (s->type) {
 
                 case SOURCE_IO:
                 switch (s->type) {
 
                 case SOURCE_IO:
@@ -504,9 +523,12 @@ static void source_free(sd_event_source *s) {
                         /* nothing */
                         break;
 
                         /* nothing */
                         break;
 
-                case SOURCE_QUIT:
-                        prioq_remove(s->event->quit, s, &s->quit.prioq_index);
+                case SOURCE_EXIT:
+                        prioq_remove(s->event->exit, s, &s->exit.prioq_index);
                         break;
                         break;
+
+                case SOURCE_WATCHDOG:
+                        assert_not_reached("Wut? I shouldn't exist.");
                 }
 
                 if (s->pending)
                 }
 
                 if (s->pending)
@@ -515,6 +537,7 @@ static void source_free(sd_event_source *s) {
                 if (s->prepare)
                         prioq_remove(s->event->prepare, s, &s->prepare_index);
 
                 if (s->prepare)
                         prioq_remove(s->event->prepare, s, &s->prepare_index);
 
+                s->event->n_sources--;
                 sd_event_unref(s->event);
         }
 
                 sd_event_unref(s->event);
         }
 
@@ -525,7 +548,7 @@ static int source_set_pending(sd_event_source *s, bool b) {
         int r;
 
         assert(s);
         int r;
 
         assert(s);
-        assert(s->type != SOURCE_QUIT);
+        assert(s->type != SOURCE_EXIT);
 
         if (s->pending == b)
                 return 0;
 
         if (s->pending == b)
                 return 0;
@@ -568,6 +591,8 @@ static sd_event_source *source_new(sd_event *e, EventSourceType type) {
         s->type = type;
         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
 
         s->type = type;
         s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
 
+        e->n_sources ++;
+
         return s;
 }
 
         return s;
 }
 
@@ -584,7 +609,7 @@ _public_ int sd_event_add_io(
 
         assert_return(e, -EINVAL);
         assert_return(fd >= 0, -EINVAL);
 
         assert_return(e, -EINVAL);
         assert_return(fd >= 0, -EINVAL);
-        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
+        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
         assert_return(callback, -EINVAL);
         assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(callback, -EINVAL);
         assert_return(ret, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
@@ -923,7 +948,7 @@ _public_ int sd_event_add_defer(
         return 0;
 }
 
         return 0;
 }
 
-_public_ int sd_event_add_quit(
+_public_ int sd_event_add_exit(
                 sd_event *e,
                 sd_event_handler_t callback,
                 void *userdata,
                 sd_event *e,
                 sd_event_handler_t callback,
                 void *userdata,
@@ -938,22 +963,22 @@ _public_ int sd_event_add_quit(
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        if (!e->quit) {
-                e->quit = prioq_new(quit_prioq_compare);
-                if (!e->quit)
+        if (!e->exit) {
+                e->exit = prioq_new(exit_prioq_compare);
+                if (!e->exit)
                         return -ENOMEM;
         }
 
                         return -ENOMEM;
         }
 
-        s = source_new(e, SOURCE_QUIT);
+        s = source_new(e, SOURCE_EXIT);
         if (!s)
                 return -ENOMEM;
 
         if (!s)
                 return -ENOMEM;
 
-        s->quit.callback = callback;
+        s->exit.callback = callback;
         s->userdata = userdata;
         s->userdata = userdata;
-        s->quit.prioq_index = PRIOQ_IDX_NULL;
+        s->exit.prioq_index = PRIOQ_IDX_NULL;
         s->enabled = SD_EVENT_ONESHOT;
 
         s->enabled = SD_EVENT_ONESHOT;
 
-        r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
+        r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
         if (r < 0) {
                 source_free(s);
                 return r;
         if (r < 0) {
                 source_free(s);
                 return r;
@@ -973,18 +998,33 @@ _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
 }
 
 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
 }
 
 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
-        assert_return(s, NULL);
+
+        if (!s)
+                return NULL;
 
         assert(s->n_ref >= 1);
         s->n_ref--;
 
 
         assert(s->n_ref >= 1);
         s->n_ref--;
 
-        if (s->n_ref <= 0)
-                source_free(s);
+        if (s->n_ref <= 0) {
+                /* Here's a special hack: when we are called from a
+                 * dispatch handler we won't free the event source
+                 * immediately, but we will detach the fd from the
+                 * epoll. This way it is safe for the caller to unref
+                 * the event source and immediately close the fd, but
+                 * we still retain a valid event source object after
+                 * the callback. */
+
+                if (s->dispatching) {
+                        if (s->type == SOURCE_IO)
+                                source_io_unregister(s);
+                } else
+                        source_free(s);
+        }
 
         return NULL;
 }
 
 
         return NULL;
 }
 
-_public_ sd_event *sd_event_get(sd_event_source *s) {
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
         assert_return(s, NULL);
 
         return s->event;
         assert_return(s, NULL);
 
         return s->event;
@@ -992,7 +1032,7 @@ _public_ sd_event *sd_event_get(sd_event_source *s) {
 
 _public_ int sd_event_source_get_pending(sd_event_source *s) {
         assert_return(s, -EINVAL);
 
 _public_ int sd_event_source_get_pending(sd_event_source *s) {
         assert_return(s, -EINVAL);
-        assert_return(s->type != SOURCE_QUIT, -EDOM);
+        assert_return(s->type != SOURCE_EXIT, -EDOM);
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
@@ -1007,6 +1047,42 @@ _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
         return s->io.fd;
 }
 
         return s->io.fd;
 }
 
+_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
+        int r;
+
+        assert_return(s, -EINVAL);
+        assert_return(fd >= 0, -EINVAL);
+        assert_return(s->type == SOURCE_IO, -EDOM);
+        assert_return(!event_pid_changed(s->event), -ECHILD);
+
+        if (s->io.fd == fd)
+                return 0;
+
+        if (s->enabled == SD_EVENT_OFF) {
+                s->io.fd = fd;
+                s->io.registered = false;
+        } else {
+                int saved_fd;
+
+                saved_fd = s->io.fd;
+                assert(s->io.registered);
+
+                s->io.fd = fd;
+                s->io.registered = false;
+
+                r = source_io_register(s, s->enabled, s->io.events);
+                if (r < 0) {
+                        s->io.fd = saved_fd;
+                        s->io.registered = true;
+                        return r;
+                }
+
+                epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
+        }
+
+        return 0;
+}
+
 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
         assert_return(s, -EINVAL);
         assert_return(events, -EINVAL);
 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
         assert_return(s, -EINVAL);
         assert_return(events, -EINVAL);
@@ -1022,7 +1098,7 @@ _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events)
 
         assert_return(s, -EINVAL);
         assert_return(s->type == SOURCE_IO, -EDOM);
 
         assert_return(s, -EINVAL);
         assert_return(s->type == SOURCE_IO, -EDOM);
-        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
+        assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
@@ -1083,8 +1159,8 @@ _public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
         if (s->prepare)
                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
 
         if (s->prepare)
                 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
 
-        if (s->type == SOURCE_QUIT)
-                prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+        if (s->type == SOURCE_EXIT)
+                prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
 
         return 0;
 }
 
         return 0;
 }
@@ -1155,14 +1231,17 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
 
                         break;
 
 
                         break;
 
-                case SOURCE_QUIT:
+                case SOURCE_EXIT:
                         s->enabled = m;
                         s->enabled = m;
-                        prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+                        prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
                         break;
 
                 case SOURCE_DEFER:
                         s->enabled = m;
                         break;
                         break;
 
                 case SOURCE_DEFER:
                         s->enabled = m;
                         break;
+
+                case SOURCE_WATCHDOG:
+                        assert_not_reached("Wut? I shouldn't exist.");
                 }
 
         } else {
                 }
 
         } else {
@@ -1210,14 +1289,17 @@ _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
                         }
                         break;
 
                         }
                         break;
 
-                case SOURCE_QUIT:
+                case SOURCE_EXIT:
                         s->enabled = m;
                         s->enabled = m;
-                        prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+                        prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
                         break;
 
                 case SOURCE_DEFER:
                         s->enabled = m;
                         break;
                         break;
 
                 case SOURCE_DEFER:
                         s->enabled = m;
                         break;
+
+                case SOURCE_WATCHDOG:
+                        assert_not_reached("Wut? I shouldn't exist.");
                 }
         }
 
                 }
         }
 
@@ -1308,7 +1390,7 @@ _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t
         int r;
 
         assert_return(s, -EINVAL);
         int r;
 
         assert_return(s, -EINVAL);
-        assert_return(s->type != SOURCE_QUIT, -EDOM);
+        assert_return(s->type != SOURCE_EXIT, -EDOM);
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
         assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(s->event), -ECHILD);
 
@@ -1342,6 +1424,17 @@ _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
         return s->userdata;
 }
 
         return s->userdata;
 }
 
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+        void *ret;
+
+        assert_return(s, NULL);
+
+        ret = s->userdata;
+        s->userdata = userdata;
+
+        return ret;
+}
+
 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
         usec_t c;
         assert(e);
 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
         usec_t c;
         assert(e);
@@ -1366,8 +1459,9 @@ static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
           We implement this by waking up everywhere at the same time
           within any given minute if we can, synchronised via the
           perturbation value determined from the boot ID. If we can't,
           We implement this by waking up everywhere at the same time
           within any given minute if we can, synchronised via the
           perturbation value determined from the boot ID. If we can't,
-          then we try to find the same spot in every 1s and then 250ms
-          step. Otherwise, we pick the last possible time to wake up.
+          then we try to find the same spot in every 10s, then 1s and
+          then 250ms step. Otherwise, we pick the last possible time
+          to wake up.
         */
 
         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
         */
 
         c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
@@ -1378,6 +1472,17 @@ static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
                 c -= USEC_PER_MINUTE;
         }
 
                 c -= USEC_PER_MINUTE;
         }
 
+        if (c >= a)
+                return c;
+
+        c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+        if (c >= b) {
+                if (_unlikely_(c < USEC_PER_SEC*10))
+                        return b;
+
+                c -= USEC_PER_SEC*10;
+        }
+
         if (c >= a)
                 return c;
 
         if (c >= a)
                 return c;
 
@@ -1418,8 +1523,8 @@ static int event_arm_timer(
         usec_t t;
         int r;
 
         usec_t t;
         int r;
 
-        assert_se(e);
-        assert_se(next);
+        assert(e);
+        assert(next);
 
         a = prioq_peek(earliest);
         if (!a || a->enabled == SD_EVENT_OFF) {
 
         a = prioq_peek(earliest);
         if (!a || a->enabled == SD_EVENT_OFF) {
@@ -1458,18 +1563,27 @@ static int event_arm_timer(
 
         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
         if (r < 0)
 
         r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
         if (r < 0)
-                return r;
+                return -errno;
 
         *next = t;
         return 0;
 }
 
 
         *next = t;
         return 0;
 }
 
-static int process_io(sd_event *e, sd_event_source *s, uint32_t events) {
+static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
         assert(e);
         assert(s);
         assert(s->type == SOURCE_IO);
 
         assert(e);
         assert(s);
         assert(s->type == SOURCE_IO);
 
-        s->io.revents = events;
+        /* If the event source was already pending, we just OR in the
+         * new revents, otherwise we reset the value. The ORing is
+         * necessary to handle EPOLLONESHOT events properly where
+         * readability might happen independently of writability, and
+         * we need to keep track of both */
+
+        if (s->pending)
+                s->io.revents |= revents;
+        else
+                s->io.revents = revents;
 
         return source_set_pending(s, true);
 }
 
         return source_set_pending(s, true);
 }
@@ -1480,7 +1594,6 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
 
         assert(e);
         assert(fd >= 0);
 
         assert(e);
         assert(fd >= 0);
-        assert(next);
 
         assert_return(events == EPOLLIN, -EIO);
 
 
         assert_return(events == EPOLLIN, -EIO);
 
@@ -1492,10 +1605,11 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
                 return -errno;
         }
 
                 return -errno;
         }
 
-        if (ss != sizeof(x))
+        if (_unlikely_(ss != sizeof(x)))
                 return -EIO;
 
                 return -EIO;
 
-        *next = (usec_t) -1;
+        if (next)
+                *next = (usec_t) -1;
 
         return 0;
 }
 
         return 0;
 }
@@ -1551,6 +1665,10 @@ static int process_child(sd_event *e) {
            don't care about. Since this is O(n) this means that if you
            have a lot of processes you probably want to handle SIGCHLD
            yourself.
            don't care about. Since this is O(n) this means that if you
            have a lot of processes you probably want to handle SIGCHLD
            yourself.
+
+           We do not reap the children here (by using WNOWAIT), this
+           is only done after the event source is dispatched so that
+           the callback still sees the process as a zombie.
         */
 
         HASHMAP_FOREACH(s, e->child_sources, i) {
         */
 
         HASHMAP_FOREACH(s, e->child_sources, i) {
@@ -1563,11 +1681,27 @@ static int process_child(sd_event *e) {
                         continue;
 
                 zero(s->child.siginfo);
                         continue;
 
                 zero(s->child.siginfo);
-                r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
+                r = waitid(P_PID, s->child.pid, &s->child.siginfo,
+                           WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
                 if (r < 0)
                         return -errno;
 
                 if (s->child.siginfo.si_pid != 0) {
                 if (r < 0)
                         return -errno;
 
                 if (s->child.siginfo.si_pid != 0) {
+                        bool zombie =
+                                s->child.siginfo.si_code == CLD_EXITED ||
+                                s->child.siginfo.si_code == CLD_KILLED ||
+                                s->child.siginfo.si_code == CLD_DUMPED;
+
+                        if (!zombie && (s->child.options & WEXITED)) {
+                                /* If the child isn't dead then let's
+                                 * immediately remove the state change
+                                 * from the queue, since there's no
+                                 * benefit in leaving it queued */
+
+                                assert(s->child.options & (WSTOPPED|WCONTINUED));
+                                waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+                        }
+
                         r = source_set_pending(s, true);
                         if (r < 0)
                                 return r;
                         r = source_set_pending(s, true);
                         if (r < 0)
                                 return r;
@@ -1599,7 +1733,7 @@ static int process_signal(sd_event *e, uint32_t events) {
                         return -errno;
                 }
 
                         return -errno;
                 }
 
-                if (ss != sizeof(si))
+                if (_unlikely_(ss != sizeof(si)))
                         return -EIO;
 
                 read_one = true;
                         return -EIO;
 
                 read_one = true;
@@ -1621,7 +1755,6 @@ static int process_signal(sd_event *e, uint32_t events) {
                         return r;
         }
 
                         return r;
         }
 
-
         return 0;
 }
 
         return 0;
 }
 
@@ -1629,9 +1762,9 @@ static int source_dispatch(sd_event_source *s) {
         int r = 0;
 
         assert(s);
         int r = 0;
 
         assert(s);
-        assert(s->pending || s->type == SOURCE_QUIT);
+        assert(s->pending || s->type == SOURCE_EXIT);
 
 
-        if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
+        if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
                 r = source_set_pending(s, false);
                 if (r < 0)
                         return r;
                 r = source_set_pending(s, false);
                 if (r < 0)
                         return r;
@@ -1643,7 +1776,7 @@ static int source_dispatch(sd_event_source *s) {
                         return r;
         }
 
                         return r;
         }
 
-        sd_event_source_ref(s);
+        s->dispatching = true;
 
         switch (s->type) {
 
 
         switch (s->type) {
 
@@ -1663,22 +1796,45 @@ static int source_dispatch(sd_event_source *s) {
                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
                 break;
 
                 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
                 break;
 
-        case SOURCE_CHILD:
+        case SOURCE_CHILD: {
+                bool zombie;
+
+                zombie = s->child.siginfo.si_code == CLD_EXITED ||
+                         s->child.siginfo.si_code == CLD_KILLED ||
+                         s->child.siginfo.si_code == CLD_DUMPED;
+
                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
                 r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+                /* Now, reap the PID for good. */
+                if (zombie)
+                        waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+
                 break;
                 break;
+        }
 
         case SOURCE_DEFER:
                 r = s->defer.callback(s, s->userdata);
                 break;
 
 
         case SOURCE_DEFER:
                 r = s->defer.callback(s, s->userdata);
                 break;
 
-        case SOURCE_QUIT:
-                r = s->quit.callback(s, s->userdata);
+        case SOURCE_EXIT:
+                r = s->exit.callback(s, s->userdata);
                 break;
                 break;
+
+        case SOURCE_WATCHDOG:
+                assert_not_reached("Wut? I shouldn't exist.");
         }
 
         }
 
-        sd_event_source_unref(s);
+        s->dispatching = false;
 
 
-        return r;
+        if (r < 0)
+                log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
+
+        if (s->n_ref == 0)
+                source_free(s);
+        else if (r < 0)
+                sd_event_source_set_enabled(s, SD_EVENT_OFF);
+
+        return 1;
 }
 
 static int event_prepare(sd_event *e) {
 }
 
 static int event_prepare(sd_event *e) {
@@ -1699,22 +1855,30 @@ static int event_prepare(sd_event *e) {
                         return r;
 
                 assert(s->prepare);
                         return r;
 
                 assert(s->prepare);
+
+                s->dispatching = true;
                 r = s->prepare(s, s->userdata);
                 r = s->prepare(s, s->userdata);
+                s->dispatching = false;
+
                 if (r < 0)
                 if (r < 0)
-                        return r;
+                        log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
 
 
+                if (s->n_ref == 0)
+                        source_free(s);
+                else if (r < 0)
+                        sd_event_source_set_enabled(s, SD_EVENT_OFF);
         }
 
         return 0;
 }
 
         }
 
         return 0;
 }
 
-static int dispatch_quit(sd_event *e) {
+static int dispatch_exit(sd_event *e) {
         sd_event_source *p;
         int r;
 
         assert(e);
 
         sd_event_source *p;
         int r;
 
         assert(e);
 
-        p = prioq_peek(e->quit);
+        p = prioq_peek(e->exit);
         if (!p || p->enabled == SD_EVENT_OFF) {
                 e->state = SD_EVENT_FINISHED;
                 return 0;
         if (!p || p->enabled == SD_EVENT_OFF) {
                 e->state = SD_EVENT_FINISHED;
                 return 0;
@@ -1722,7 +1886,7 @@ static int dispatch_quit(sd_event *e) {
 
         sd_event_ref(e);
         e->iteration++;
 
         sd_event_ref(e);
         e->iteration++;
-        e->state = SD_EVENT_QUITTING;
+        e->state = SD_EVENT_EXITING;
 
         r = source_dispatch(p);
 
 
         r = source_dispatch(p);
 
@@ -1747,8 +1911,46 @@ static sd_event_source* event_next_pending(sd_event *e) {
         return p;
 }
 
         return p;
 }
 
+static int arm_watchdog(sd_event *e) {
+        struct itimerspec its = {};
+        usec_t t;
+        int r;
+
+        assert(e);
+        assert(e->watchdog_fd >= 0);
+
+        t = sleep_between(e,
+                          e->watchdog_last + (e->watchdog_period / 2),
+                          e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+        timespec_store(&its.it_value, t);
+
+        r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
+        if (r < 0)
+                return -errno;
+
+        return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+        assert(e);
+
+        if (!e->watchdog)
+                return 0;
+
+        /* Don't notify watchdog too often */
+        if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+                return 0;
+
+        sd_notify(false, "WATCHDOG=1");
+        e->watchdog_last = e->timestamp.monotonic;
+
+        return arm_watchdog(e);
+}
+
 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
-        struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
+        struct epoll_event *ev_queue;
+        unsigned ev_queue_max;
         sd_event_source *p;
         int r, i, m;
 
         sd_event_source *p;
         int r, i, m;
 
@@ -1757,8 +1959,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
 
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
 
-        if (e->quit_requested)
-                return dispatch_quit(e);
+        if (e->exit_requested)
+                return dispatch_exit(e);
 
         sd_event_ref(e);
         e->iteration++;
 
         sd_event_ref(e);
         e->iteration++;
@@ -1778,11 +1980,13 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
 
         if (event_next_pending(e) || e->need_process_child)
                 timeout = 0;
 
         if (event_next_pending(e) || e->need_process_child)
                 timeout = 0;
+        ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
+        ev_queue = newa(struct epoll_event, ev_queue_max);
 
 
-        m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
+        m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
         if (m < 0) {
                        timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
         if (m < 0) {
-                r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
+                r = errno == EAGAIN || errno == EINTR ? 1 : -errno;
                 goto finish;
         }
 
                 goto finish;
         }
 
@@ -1796,6 +2000,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
                         r = process_signal(e, ev_queue[i].events);
                         r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
                 else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
                         r = process_signal(e, ev_queue[i].events);
+                else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+                        r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
                 else
                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
 
                 else
                         r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
 
@@ -1803,6 +2009,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
                         goto finish;
         }
 
                         goto finish;
         }
 
+        r = process_watchdog(e);
+        if (r < 0)
+                goto finish;
+
         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
         if (r < 0)
                 goto finish;
         r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
         if (r < 0)
                 goto finish;
@@ -1819,7 +2029,7 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
 
         p = event_next_pending(e);
         if (!p) {
 
         p = event_next_pending(e);
         if (!p) {
-                r = 0;
+                r = 1;
                 goto finish;
         }
 
                 goto finish;
         }
 
@@ -1847,7 +2057,7 @@ _public_ int sd_event_loop(sd_event *e) {
                         goto finish;
         }
 
                         goto finish;
         }
 
-        r = 0;
+        r = e->exit_code;
 
 finish:
         sd_event_unref(e);
 
 finish:
         sd_event_unref(e);
@@ -1861,19 +2071,26 @@ _public_ int sd_event_get_state(sd_event *e) {
         return e->state;
 }
 
         return e->state;
 }
 
-_public_ int sd_event_get_quit(sd_event *e) {
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
         assert_return(e, -EINVAL);
         assert_return(e, -EINVAL);
+        assert_return(code, -EINVAL);
         assert_return(!event_pid_changed(e), -ECHILD);
 
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        return e->quit_requested;
+        if (!e->exit_requested)
+                return -ENODATA;
+
+        *code = e->exit_code;
+        return 0;
 }
 
 }
 
-_public_ int sd_event_request_quit(sd_event *e) {
+_public_ int sd_event_exit(sd_event *e, int code) {
         assert_return(e, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
         assert_return(e, -EINVAL);
         assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
         assert_return(!event_pid_changed(e), -ECHILD);
 
-        e->quit_requested = true;
+        e->exit_requested = true;
+        e->exit_code = code;
+
         return 0;
 }
 
         return 0;
 }
 
@@ -1899,7 +2116,7 @@ _public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
 
 _public_ int sd_event_default(sd_event **ret) {
 
 
 _public_ int sd_event_default(sd_event **ret) {
 
-        static __thread sd_event *default_event = NULL;
+        static thread_local sd_event *default_event = NULL;
         sd_event *e;
         int r;
 
         sd_event *e;
         int r;
 
@@ -1935,3 +2152,64 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
 
         return -ENXIO;
 }
 
         return -ENXIO;
 }
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+        int r;
+
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+
+        if (e->watchdog == !!b)
+                return e->watchdog;
+
+        if (b) {
+                struct epoll_event ev = {};
+
+                r = sd_watchdog_enabled(false, &e->watchdog_period);
+                if (r <= 0)
+                        return r;
+
+                /* Issue first ping immediately */
+                sd_notify(false, "WATCHDOG=1");
+                e->watchdog_last = now(CLOCK_MONOTONIC);
+
+                e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+                if (e->watchdog_fd < 0)
+                        return -errno;
+
+                r = arm_watchdog(e);
+                if (r < 0)
+                        goto fail;
+
+                ev.events = EPOLLIN;
+                ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
+
+                r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
+                if (r < 0) {
+                        r = -errno;
+                        goto fail;
+                }
+
+        } else {
+                if (e->watchdog_fd >= 0) {
+                        epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+                        close_nointr_nofail(e->watchdog_fd);
+                        e->watchdog_fd = -1;
+                }
+        }
+
+        e->watchdog = !!b;
+        return e->watchdog;
+
+fail:
+        close_nointr_nofail(e->watchdog_fd);
+        e->watchdog_fd = -1;
+        return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+        assert_return(e, -EINVAL);
+        assert_return(!event_pid_changed(e), -ECHILD);
+
+        return e->watchdog;
+}