X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Flibsystemd-bus%2Fsd-event.c;h=0317088af31b73b59d84345968df5518f206faf0;hp=96ba2ad269cc10cc02f4d3ff20704ba619c15952;hb=f98a58fe894d34e4d9675757180f34a8523c936e;hpb=46e8c8257f5dd0bcf964eb1f5cbb9ce43aac5201 diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c index 96ba2ad26..0317088af 100644 --- a/src/libsystemd-bus/sd-event.c +++ b/src/libsystemd-bus/sd-event.c @@ -23,16 +23,18 @@ #include #include +#include "sd-id128.h" +#include "sd-daemon.h" #include "macro.h" #include "prioq.h" #include "hashmap.h" #include "util.h" #include "time-util.h" -#include "sd-id128.h" +#include "missing.h" #include "sd-event.h" -#define EPOLL_QUEUE_MAX 64 +#define EPOLL_QUEUE_MAX 512U #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC) typedef enum EventSourceType { @@ -42,7 +44,8 @@ typedef enum EventSourceType { SOURCE_SIGNAL, SOURCE_CHILD, SOURCE_DEFER, - SOURCE_QUIT + SOURCE_EXIT, + SOURCE_WATCHDOG } EventSourceType; struct sd_event_source { @@ -50,11 +53,12 @@ struct sd_event_source { sd_event *event; void *userdata; - sd_prepare_handler_t prepare; + sd_event_handler_t prepare; EventSourceType type:4; int enabled:3; bool pending:1; + bool dispatching:1; int priority; unsigned pending_index; @@ -64,36 +68,36 @@ struct sd_event_source { union { struct { - sd_io_handler_t callback; + sd_event_io_handler_t callback; int fd; uint32_t events; uint32_t revents; bool registered:1; } io; struct { - sd_time_handler_t callback; + sd_event_time_handler_t callback; usec_t next, accuracy; unsigned earliest_index; unsigned latest_index; } time; struct { - sd_signal_handler_t callback; + sd_event_signal_handler_t callback; struct signalfd_siginfo siginfo; int sig; } signal; struct { - sd_child_handler_t callback; + sd_event_child_handler_t callback; siginfo_t siginfo; pid_t pid; int options; } child; struct { - sd_defer_handler_t callback; + sd_event_handler_t callback; } defer; struct { - sd_quit_handler_t callback; + sd_event_handler_t callback; unsigned prioq_index; - } quit; + } exit; }; }; @@ -104,6 +108,7 @@ struct sd_event { int signal_fd; int realtime_fd; int monotonic_fd; + int watchdog_fd; Prioq *pending; Prioq *prepare; @@ -128,7 +133,7 @@ struct sd_event { Hashmap *child_sources; unsigned n_enabled_child_sources; - Prioq *quit; + Prioq *exit; pid_t original_pid; @@ -136,8 +141,18 @@ struct sd_event { dual_timestamp timestamp; int state; - bool quit_requested:1; + bool exit_requested:1; bool need_process_child:1; + bool watchdog:1; + + int exit_code; + + pid_t tid; + sd_event **default_event_ptr; + + usec_t watchdog_last, watchdog_period; + + unsigned n_sources; }; static int pending_prioq_compare(const void *a, const void *b) { @@ -230,7 +245,7 @@ static int earliest_time_prioq_compare(const void *a, const void *b) { if (x->time.next < y->time.next) return -1; if (x->time.next > y->time.next) - return -1; + return 1; /* Stability for the rest */ if (x < y) @@ -263,7 +278,7 @@ static int latest_time_prioq_compare(const void *a, const void *b) { if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy) return -1; if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy) - return -1; + return 1; /* Stability for the rest */ if (x < y) @@ -274,11 +289,11 @@ static int latest_time_prioq_compare(const void *a, const void *b) { return 0; } -static int quit_prioq_compare(const void *a, const void *b) { +static int exit_prioq_compare(const void *a, const void *b) { const sd_event_source *x = a, *y = b; - assert(x->type == SOURCE_QUIT); - assert(y->type == SOURCE_QUIT); + assert(x->type == SOURCE_EXIT); + assert(y->type == SOURCE_EXIT); /* Enabled ones first */ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF) @@ -303,6 +318,10 @@ static int quit_prioq_compare(const void *a, const void *b) { static void event_free(sd_event *e) { assert(e); + assert(e->n_sources == 0); + + if (e->default_event_ptr) + *(e->default_event_ptr) = NULL; if (e->epoll_fd >= 0) close_nointr_nofail(e->epoll_fd); @@ -316,13 +335,16 @@ static void event_free(sd_event *e) { if (e->monotonic_fd >= 0) close_nointr_nofail(e->monotonic_fd); + if (e->watchdog_fd >= 0) + close_nointr_nofail(e->watchdog_fd); + prioq_free(e->pending); prioq_free(e->prepare); prioq_free(e->monotonic_earliest); prioq_free(e->monotonic_latest); prioq_free(e->realtime_earliest); prioq_free(e->realtime_latest); - prioq_free(e->quit); + prioq_free(e->exit); free(e->signal_sources); @@ -330,7 +352,7 @@ static void event_free(sd_event *e) { free(e); } -int sd_event_new(sd_event** ret) { +_public_ int sd_event_new(sd_event** ret) { sd_event *e; int r; @@ -341,7 +363,7 @@ int sd_event_new(sd_event** ret) { return -ENOMEM; e->n_ref = 1; - e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1; + e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1; e->realtime_next = e->monotonic_next = (usec_t) -1; e->original_pid = getpid(); @@ -367,7 +389,7 @@ fail: return r; } -sd_event* sd_event_ref(sd_event *e) { +_public_ sd_event* sd_event_ref(sd_event *e) { assert_return(e, NULL); assert(e->n_ref >= 1); @@ -376,8 +398,10 @@ sd_event* sd_event_ref(sd_event *e) { return e; } -sd_event* sd_event_unref(sd_event *e) { - assert_return(e, NULL); +_public_ sd_event* sd_event_unref(sd_event *e) { + + if (!e) + return NULL; assert(e->n_ref >= 1); e->n_ref--; @@ -449,6 +473,8 @@ static void source_free(sd_event_source *s) { assert(s); if (s->event) { + assert(s->event->n_sources > 0); + switch (s->type) { case SOURCE_IO: @@ -493,9 +519,16 @@ static void source_free(sd_event_source *s) { break; - case SOURCE_QUIT: - prioq_remove(s->event->quit, s, &s->quit.prioq_index); + case SOURCE_DEFER: + /* nothing */ + break; + + case SOURCE_EXIT: + prioq_remove(s->event->exit, s, &s->exit.prioq_index); break; + + case SOURCE_WATCHDOG: + assert_not_reached("Wut? I shouldn't exist."); } if (s->pending) @@ -504,6 +537,7 @@ static void source_free(sd_event_source *s) { if (s->prepare) prioq_remove(s->event->prepare, s, &s->prepare_index); + s->event->n_sources--; sd_event_unref(s->event); } @@ -514,7 +548,7 @@ static int source_set_pending(sd_event_source *s, bool b) { int r; assert(s); - assert(s->type != SOURCE_QUIT); + assert(s->type != SOURCE_EXIT); if (s->pending == b) return 0; @@ -532,6 +566,14 @@ static int source_set_pending(sd_event_source *s, bool b) { } else assert_se(prioq_remove(s->event->pending, s, &s->pending_index)); + if (s->type == SOURCE_REALTIME) { + prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index); + prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index); + } else if (s->type == SOURCE_MONOTONIC) { + prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index); + prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index); + } + return 0; } @@ -549,14 +591,16 @@ static sd_event_source *source_new(sd_event *e, EventSourceType type) { s->type = type; s->pending_index = s->prepare_index = PRIOQ_IDX_NULL; + e->n_sources ++; + return s; } -int sd_event_add_io( +_public_ int sd_event_add_io( sd_event *e, int fd, uint32_t events, - sd_io_handler_t callback, + sd_event_io_handler_t callback, void *userdata, sd_event_source **ret) { @@ -565,7 +609,7 @@ int sd_event_add_io( assert_return(e, -EINVAL); assert_return(fd >= 0, -EINVAL); - assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL); + assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); assert_return(callback, -EINVAL); assert_return(ret, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); @@ -621,15 +665,16 @@ static int event_setup_timer_fd( } /* When we sleep for longer, we try to realign the wakeup to - the same time wihtin each second, so that events all across - the system can be coalesced into a single CPU - wakeup. However, let's take some system-specific randomness - for this value, so that in a network of systems with synced - clocks timer events are distributed a bit. Here, we - calculate a perturbation usec offset from the boot ID. */ + the same time wihtin each minute/second/250ms, so that + events all across the system can be coalesced into a single + CPU wakeup. However, let's take some system-specific + randomness for this value, so that in a network of systems + with synced clocks timer events are distributed a + bit. Here, we calculate a perturbation usec offset from the + boot ID. */ if (sd_id128_get_boot(&bootid) >= 0) - e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC; + e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE; *timer_fd = fd; return 0; @@ -644,7 +689,7 @@ static int event_add_time_internal( Prioq **latest, uint64_t usec, uint64_t accuracy, - sd_time_handler_t callback, + sd_event_time_handler_t callback, void *userdata, sd_event_source **ret) { @@ -708,11 +753,23 @@ fail: return r; } -int sd_event_add_monotonic(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) { +_public_ int sd_event_add_monotonic(sd_event *e, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + sd_event_source **ret) { + return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret); } -int sd_event_add_realtime(sd_event *e, uint64_t usec, uint64_t accuracy, sd_time_handler_t callback, void *userdata, sd_event_source **ret) { +_public_ int sd_event_add_realtime(sd_event *e, + uint64_t usec, + uint64_t accuracy, + sd_event_time_handler_t callback, + void *userdata, + sd_event_source **ret) { + return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret); } @@ -748,10 +805,10 @@ static int event_update_signal_fd(sd_event *e) { return 0; } -int sd_event_add_signal( +_public_ int sd_event_add_signal( sd_event *e, int sig, - sd_signal_handler_t callback, + sd_event_signal_handler_t callback, void *userdata, sd_event_source **ret) { @@ -797,11 +854,11 @@ int sd_event_add_signal( return 0; } -int sd_event_add_child( +_public_ int sd_event_add_child( sd_event *e, pid_t pid, int options, - sd_child_handler_t callback, + sd_event_child_handler_t callback, void *userdata, sd_event_source **ret) { @@ -858,9 +915,9 @@ int sd_event_add_child( return 0; } -int sd_event_add_defer( +_public_ int sd_event_add_defer( sd_event *e, - sd_defer_handler_t callback, + sd_event_handler_t callback, void *userdata, sd_event_source **ret) { @@ -891,9 +948,9 @@ int sd_event_add_defer( return 0; } -int sd_event_add_quit( +_public_ int sd_event_add_exit( sd_event *e, - sd_quit_handler_t callback, + sd_event_handler_t callback, void *userdata, sd_event_source **ret) { @@ -906,22 +963,22 @@ int sd_event_add_quit( assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); - if (!e->quit) { - e->quit = prioq_new(quit_prioq_compare); - if (!e->quit) + if (!e->exit) { + e->exit = prioq_new(exit_prioq_compare); + if (!e->exit) return -ENOMEM; } - s = source_new(e, SOURCE_QUIT); + s = source_new(e, SOURCE_EXIT); if (!s) return -ENOMEM; - s->quit.callback = callback; + s->exit.callback = callback; s->userdata = userdata; - s->quit.prioq_index = PRIOQ_IDX_NULL; + s->exit.prioq_index = PRIOQ_IDX_NULL; s->enabled = SD_EVENT_ONESHOT; - r = prioq_put(s->event->quit, s, &s->quit.prioq_index); + r = prioq_put(s->event->exit, s, &s->exit.prioq_index); if (r < 0) { source_free(s); return r; @@ -931,7 +988,7 @@ int sd_event_add_quit( return 0; } -sd_event_source* sd_event_source_ref(sd_event_source *s) { +_public_ sd_event_source* sd_event_source_ref(sd_event_source *s) { assert_return(s, NULL); assert(s->n_ref >= 1); @@ -940,34 +997,49 @@ sd_event_source* sd_event_source_ref(sd_event_source *s) { return s; } -sd_event_source* sd_event_source_unref(sd_event_source *s) { - assert_return(s, NULL); +_public_ sd_event_source* sd_event_source_unref(sd_event_source *s) { + + if (!s) + return NULL; assert(s->n_ref >= 1); s->n_ref--; - if (s->n_ref <= 0) - source_free(s); + if (s->n_ref <= 0) { + /* Here's a special hack: when we are called from a + * dispatch handler we won't free the event source + * immediately, but we will detach the fd from the + * epoll. This way it is safe for the caller to unref + * the event source and immediately close the fd, but + * we still retain a valid event source object after + * the callback. */ + + if (s->dispatching) { + if (s->type == SOURCE_IO) + source_io_unregister(s); + } else + source_free(s); + } return NULL; } -sd_event *sd_event_get(sd_event_source *s) { +_public_ sd_event *sd_event_source_get_event(sd_event_source *s) { assert_return(s, NULL); return s->event; } -int sd_event_source_get_pending(sd_event_source *s) { +_public_ int sd_event_source_get_pending(sd_event_source *s) { assert_return(s, -EINVAL); - assert_return(s->type != SOURCE_QUIT, -EDOM); + assert_return(s->type != SOURCE_EXIT, -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); return s->pending; } -int sd_event_source_get_io_fd(sd_event_source *s) { +_public_ int sd_event_source_get_io_fd(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -975,7 +1047,43 @@ int sd_event_source_get_io_fd(sd_event_source *s) { return s->io.fd; } -int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) { +_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) { + int r; + + assert_return(s, -EINVAL); + assert_return(fd >= 0, -EINVAL); + assert_return(s->type == SOURCE_IO, -EDOM); + assert_return(!event_pid_changed(s->event), -ECHILD); + + if (s->io.fd == fd) + return 0; + + if (s->enabled == SD_EVENT_OFF) { + s->io.fd = fd; + s->io.registered = false; + } else { + int saved_fd; + + saved_fd = s->io.fd; + assert(s->io.registered); + + s->io.fd = fd; + s->io.registered = false; + + r = source_io_register(s, s->enabled, s->io.events); + if (r < 0) { + s->io.fd = saved_fd; + s->io.registered = true; + return r; + } + + epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL); + } + + return 0; +} + +_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) { assert_return(s, -EINVAL); assert_return(events, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); @@ -985,12 +1093,12 @@ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) { return 0; } -int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) { +_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) { int r; assert_return(s, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); - assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL); + assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -998,17 +1106,18 @@ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) { return 0; if (s->enabled != SD_EVENT_OFF) { - r = source_io_register(s, s->io.events, events); + r = source_io_register(s, s->enabled, events); if (r < 0) return r; } s->io.events = events; + source_set_pending(s, false); return 0; } -int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) { +_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) { assert_return(s, -EINVAL); assert_return(revents, -EINVAL); assert_return(s->type == SOURCE_IO, -EDOM); @@ -1019,7 +1128,7 @@ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) { return 0; } -int sd_event_source_get_signal(sd_event_source *s) { +_public_ int sd_event_source_get_signal(sd_event_source *s) { assert_return(s, -EINVAL); assert_return(s->type == SOURCE_SIGNAL, -EDOM); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1027,14 +1136,14 @@ int sd_event_source_get_signal(sd_event_source *s) { return s->signal.sig; } -int sd_event_source_get_priority(sd_event_source *s, int *priority) { +_public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) { assert_return(s, -EINVAL); assert_return(!event_pid_changed(s->event), -ECHILD); return s->priority; } -int sd_event_source_set_priority(sd_event_source *s, int priority) { +_public_ int sd_event_source_set_priority(sd_event_source *s, int priority) { assert_return(s, -EINVAL); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1050,13 +1159,13 @@ int sd_event_source_set_priority(sd_event_source *s, int priority) { if (s->prepare) prioq_reshuffle(s->event->prepare, s, &s->prepare_index); - if (s->type == SOURCE_QUIT) - prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index); + if (s->type == SOURCE_EXIT) + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); return 0; } -int sd_event_source_get_enabled(sd_event_source *s, int *m) { +_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) { assert_return(s, -EINVAL); assert_return(m, -EINVAL); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1065,7 +1174,7 @@ int sd_event_source_get_enabled(sd_event_source *s, int *m) { return 0; } -int sd_event_source_set_enabled(sd_event_source *s, int m) { +_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) { int r; assert_return(s, -EINVAL); @@ -1122,14 +1231,17 @@ int sd_event_source_set_enabled(sd_event_source *s, int m) { break; - case SOURCE_QUIT: + case SOURCE_EXIT: s->enabled = m; - prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index); + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); break; case SOURCE_DEFER: s->enabled = m; break; + + case SOURCE_WATCHDOG: + assert_not_reached("Wut? I shouldn't exist."); } } else { @@ -1177,14 +1289,17 @@ int sd_event_source_set_enabled(sd_event_source *s, int m) { } break; - case SOURCE_QUIT: + case SOURCE_EXIT: s->enabled = m; - prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index); + prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index); break; case SOURCE_DEFER: s->enabled = m; break; + + case SOURCE_WATCHDOG: + assert_not_reached("Wut? I shouldn't exist."); } } @@ -1197,7 +1312,7 @@ int sd_event_source_set_enabled(sd_event_source *s, int m) { return 0; } -int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { +_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { assert_return(s, -EINVAL); assert_return(usec, -EINVAL); assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM); @@ -1207,18 +1322,17 @@ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) { return 0; } -int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { +_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { assert_return(s, -EINVAL); assert_return(usec != (uint64_t) -1, -EINVAL); assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); - if (s->time.next == usec) - return 0; - s->time.next = usec; + source_set_pending(s, false); + if (s->type == SOURCE_REALTIME) { prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index); prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index); @@ -1230,7 +1344,7 @@ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) { return 0; } -int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) { +_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) { assert_return(s, -EINVAL); assert_return(usec, -EINVAL); assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM); @@ -1240,7 +1354,7 @@ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) { return 0; } -int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { +_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { assert_return(s, -EINVAL); assert_return(usec != (uint64_t) -1, -EINVAL); assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM); @@ -1250,11 +1364,10 @@ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { if (usec == 0) usec = DEFAULT_ACCURACY_USEC; - if (s->time.accuracy == usec) - return 0; - s->time.accuracy = usec; + source_set_pending(s, false); + if (s->type == SOURCE_REALTIME) prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index); else @@ -1263,7 +1376,7 @@ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) { return 0; } -int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { +_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { assert_return(s, -EINVAL); assert_return(pid, -EINVAL); assert_return(s->type == SOURCE_CHILD, -EDOM); @@ -1273,11 +1386,11 @@ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) { return 0; } -int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) { +_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) { int r; assert_return(s, -EINVAL); - assert_return(s->type != SOURCE_QUIT, -EDOM); + assert_return(s->type != SOURCE_EXIT, -EDOM); assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(s->event), -ECHILD); @@ -1305,12 +1418,23 @@ int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callbac return 0; } -void* sd_event_source_get_userdata(sd_event_source *s) { +_public_ void* sd_event_source_get_userdata(sd_event_source *s) { assert_return(s, NULL); return s->userdata; } +_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) { + void *ret; + + assert_return(s, NULL); + + ret = s->userdata; + s->userdata = userdata; + + return ret; +} + static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) { usec_t c; assert(e); @@ -1333,13 +1457,36 @@ static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) { dispatch as much as possible on the entire system. We implement this by waking up everywhere at the same time - within any given second if we can, synchronised via the + within any given minute if we can, synchronised via the perturbation value determined from the boot ID. If we can't, - then we try to find the same spot in every a 250ms - step. Otherwise, we pick the last possible time to wake up. + then we try to find the same spot in every 10s, then 1s and + then 250ms step. Otherwise, we pick the last possible time + to wake up. */ - c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb; + c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb; + if (c >= b) { + if (_unlikely_(c < USEC_PER_MINUTE)) + return b; + + c -= USEC_PER_MINUTE; + } + + if (c >= a) + return c; + + c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10)); + if (c >= b) { + if (_unlikely_(c < USEC_PER_SEC*10)) + return b; + + c -= USEC_PER_SEC*10; + } + + if (c >= a) + return c; + + c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC); if (c >= b) { if (_unlikely_(c < USEC_PER_SEC)) return b; @@ -1376,12 +1523,27 @@ static int event_arm_timer( usec_t t; int r; - assert_se(e); - assert_se(next); + assert(e); + assert(next); a = prioq_peek(earliest); - if (!a || a->enabled == SD_EVENT_OFF) + if (!a || a->enabled == SD_EVENT_OFF) { + + if (timer_fd < 0) + return 0; + + if (*next == (usec_t) -1) + return 0; + + /* disarm */ + r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL); + if (r < 0) + return r; + + *next = (usec_t) -1; + return 0; + } b = prioq_peek(latest); assert_se(b && b->enabled != SD_EVENT_OFF); @@ -1401,37 +1563,38 @@ static int event_arm_timer( r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL); if (r < 0) - return r; + return -errno; *next = t; return 0; } -static int process_io(sd_event *e, sd_event_source *s, uint32_t events) { +static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) { assert(e); assert(s); assert(s->type == SOURCE_IO); - s->io.revents = events; + /* If the event source was already pending, we just OR in the + * new revents, otherwise we reset the value. The ORing is + * necessary to handle EPOLLONESHOT events properly where + * readability might happen independently of writability, and + * we need to keep track of both */ - /* - If this is a oneshot event source, then we added it to the - epoll with EPOLLONESHOT, hence we know it's not registered - anymore. We can save a syscall here... - */ - - if (s->enabled == SD_EVENT_ONESHOT) - s->io.registered = false; + if (s->pending) + s->io.revents |= revents; + else + s->io.revents = revents; return source_set_pending(s, true); } -static int flush_timer(sd_event *e, int fd, uint32_t events) { +static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) { uint64_t x; ssize_t ss; assert(e); assert(fd >= 0); + assert_return(events == EPOLLIN, -EIO); ss = read(fd, &x, sizeof(x)); @@ -1442,9 +1605,12 @@ static int flush_timer(sd_event *e, int fd, uint32_t events) { return -errno; } - if (ss != sizeof(x)) + if (_unlikely_(ss != sizeof(x))) return -EIO; + if (next) + *next = (usec_t) -1; + return 0; } @@ -1499,6 +1665,10 @@ static int process_child(sd_event *e) { don't care about. Since this is O(n) this means that if you have a lot of processes you probably want to handle SIGCHLD yourself. + + We do not reap the children here (by using WNOWAIT), this + is only done after the event source is dispatched so that + the callback still sees the process as a zombie. */ HASHMAP_FOREACH(s, e->child_sources, i) { @@ -1511,11 +1681,27 @@ static int process_child(sd_event *e) { continue; zero(s->child.siginfo); - r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options); + r = waitid(P_PID, s->child.pid, &s->child.siginfo, + WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options); if (r < 0) return -errno; if (s->child.siginfo.si_pid != 0) { + bool zombie = + s->child.siginfo.si_code == CLD_EXITED || + s->child.siginfo.si_code == CLD_KILLED || + s->child.siginfo.si_code == CLD_DUMPED; + + if (!zombie && (s->child.options & WEXITED)) { + /* If the child isn't dead then let's + * immediately remove the state change + * from the queue, since there's no + * benefit in leaving it queued */ + + assert(s->child.options & (WSTOPPED|WCONTINUED)); + waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED))); + } + r = source_set_pending(s, true); if (r < 0) return r; @@ -1526,15 +1712,17 @@ static int process_child(sd_event *e) { } static int process_signal(sd_event *e, uint32_t events) { - struct signalfd_siginfo si; bool read_one = false; - ssize_t ss; int r; assert(e); + assert(e->signal_sources); + assert_return(events == EPOLLIN, -EIO); for (;;) { + struct signalfd_siginfo si; + ssize_t ss; sd_event_source *s; ss = read(e->signal_fd, &si, sizeof(si)); @@ -1545,22 +1733,21 @@ static int process_signal(sd_event *e, uint32_t events) { return -errno; } - if (ss != sizeof(si)) + if (_unlikely_(ss != sizeof(si))) return -EIO; read_one = true; + s = e->signal_sources[si.ssi_signo]; if (si.ssi_signo == SIGCHLD) { r = process_child(e); if (r < 0) return r; - if (r > 0 || !e->signal_sources[si.ssi_signo]) + if (r > 0 || !s) continue; - } else { - s = e->signal_sources[si.ssi_signo]; + } else if (!s) return -EIO; - } s->signal.siginfo = si; r = source_set_pending(s, true); @@ -1568,17 +1755,16 @@ static int process_signal(sd_event *e, uint32_t events) { return r; } - return 0; } static int source_dispatch(sd_event_source *s) { - int r; + int r = 0; assert(s); - assert(s->pending || s->type == SOURCE_QUIT); + assert(s->pending || s->type == SOURCE_EXIT); - if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) { + if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) { r = source_set_pending(s, false); if (r < 0) return r; @@ -1590,6 +1776,8 @@ static int source_dispatch(sd_event_source *s) { return r; } + s->dispatching = true; + switch (s->type) { case SOURCE_IO: @@ -1608,20 +1796,45 @@ static int source_dispatch(sd_event_source *s) { r = s->signal.callback(s, &s->signal.siginfo, s->userdata); break; - case SOURCE_CHILD: + case SOURCE_CHILD: { + bool zombie; + + zombie = s->child.siginfo.si_code == CLD_EXITED || + s->child.siginfo.si_code == CLD_KILLED || + s->child.siginfo.si_code == CLD_DUMPED; + r = s->child.callback(s, &s->child.siginfo, s->userdata); + + /* Now, reap the PID for good. */ + if (zombie) + waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED); + break; + } case SOURCE_DEFER: r = s->defer.callback(s, s->userdata); break; - case SOURCE_QUIT: - r = s->quit.callback(s, s->userdata); + case SOURCE_EXIT: + r = s->exit.callback(s, s->userdata); break; + + case SOURCE_WATCHDOG: + assert_not_reached("Wut? I shouldn't exist."); } - return r; + s->dispatching = false; + + if (r < 0) + log_debug("Event source %p returned error, disabling: %s", s, strerror(-r)); + + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + sd_event_source_set_enabled(s, SD_EVENT_OFF); + + return 1; } static int event_prepare(sd_event *e) { @@ -1642,22 +1855,30 @@ static int event_prepare(sd_event *e) { return r; assert(s->prepare); + + s->dispatching = true; r = s->prepare(s, s->userdata); + s->dispatching = false; + if (r < 0) - return r; + log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r)); + if (s->n_ref == 0) + source_free(s); + else if (r < 0) + sd_event_source_set_enabled(s, SD_EVENT_OFF); } return 0; } -static int dispatch_quit(sd_event *e) { +static int dispatch_exit(sd_event *e) { sd_event_source *p; int r; assert(e); - p = prioq_peek(e->quit); + p = prioq_peek(e->exit); if (!p || p->enabled == SD_EVENT_OFF) { e->state = SD_EVENT_FINISHED; return 0; @@ -1665,7 +1886,7 @@ static int dispatch_quit(sd_event *e) { sd_event_ref(e); e->iteration++; - e->state = SD_EVENT_QUITTING; + e->state = SD_EVENT_EXITING; r = source_dispatch(p); @@ -1690,8 +1911,46 @@ static sd_event_source* event_next_pending(sd_event *e) { return p; } -int sd_event_run(sd_event *e, uint64_t timeout) { - struct epoll_event ev_queue[EPOLL_QUEUE_MAX]; +static int arm_watchdog(sd_event *e) { + struct itimerspec its = {}; + usec_t t; + int r; + + assert(e); + assert(e->watchdog_fd >= 0); + + t = sleep_between(e, + e->watchdog_last + (e->watchdog_period / 2), + e->watchdog_last + (e->watchdog_period * 3 / 4)); + + timespec_store(&its.it_value, t); + + r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL); + if (r < 0) + return -errno; + + return 0; +} + +static int process_watchdog(sd_event *e) { + assert(e); + + if (!e->watchdog) + return 0; + + /* Don't notify watchdog too often */ + if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic) + return 0; + + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = e->timestamp.monotonic; + + return arm_watchdog(e); +} + +_public_ int sd_event_run(sd_event *e, uint64_t timeout) { + struct epoll_event *ev_queue; + unsigned ev_queue_max; sd_event_source *p; int r, i, m; @@ -1700,8 +1959,8 @@ int sd_event_run(sd_event *e, uint64_t timeout) { assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY); - if (e->quit_requested) - return dispatch_quit(e); + if (e->exit_requested) + return dispatch_exit(e); sd_event_ref(e); e->iteration++; @@ -1711,23 +1970,23 @@ int sd_event_run(sd_event *e, uint64_t timeout) { if (r < 0) goto finish; - if (event_next_pending(e) || e->need_process_child) - timeout = 0; + r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next); + if (r < 0) + goto finish; - if (timeout > 0) { - r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next); - if (r < 0) - goto finish; + r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next); + if (r < 0) + goto finish; - r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next); - if (r < 0) - goto finish; - } + if (event_next_pending(e) || e->need_process_child) + timeout = 0; + ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX); + ev_queue = newa(struct epoll_event, ev_queue_max); - m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX, + m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max, timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC)); if (m < 0) { - r = m; + r = errno == EAGAIN || errno == EINTR ? 1 : -errno; goto finish; } @@ -1736,11 +1995,13 @@ int sd_event_run(sd_event *e, uint64_t timeout) { for (i = 0; i < m; i++) { if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC)) - r = flush_timer(e, e->monotonic_fd, ev_queue[i].events); + r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next); else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME)) - r = flush_timer(e, e->realtime_fd, ev_queue[i].events); + r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next); else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL)) r = process_signal(e, ev_queue[i].events); + else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL); else r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); @@ -1748,6 +2009,10 @@ int sd_event_run(sd_event *e, uint64_t timeout) { goto finish; } + r = process_watchdog(e); + if (r < 0) + goto finish; + r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest); if (r < 0) goto finish; @@ -1764,7 +2029,7 @@ int sd_event_run(sd_event *e, uint64_t timeout) { p = event_next_pending(e); if (!p) { - r = 0; + r = 1; goto finish; } @@ -1777,7 +2042,7 @@ finish: return r; } -int sd_event_loop(sd_event *e) { +_public_ int sd_event_loop(sd_event *e) { int r; assert_return(e, -EINVAL); @@ -1792,37 +2057,44 @@ int sd_event_loop(sd_event *e) { goto finish; } - r = 0; + r = e->exit_code; finish: sd_event_unref(e); return r; } -int sd_event_get_state(sd_event *e) { +_public_ int sd_event_get_state(sd_event *e) { assert_return(e, -EINVAL); assert_return(!event_pid_changed(e), -ECHILD); return e->state; } -int sd_event_get_quit(sd_event *e) { +_public_ int sd_event_get_exit_code(sd_event *e, int *code) { assert_return(e, -EINVAL); + assert_return(code, -EINVAL); assert_return(!event_pid_changed(e), -ECHILD); - return e->quit_requested; + if (!e->exit_requested) + return -ENODATA; + + *code = e->exit_code; + return 0; } -int sd_event_request_quit(sd_event *e) { +_public_ int sd_event_exit(sd_event *e, int code) { assert_return(e, -EINVAL); assert_return(e->state != SD_EVENT_FINISHED, -ESTALE); assert_return(!event_pid_changed(e), -ECHILD); - e->quit_requested = true; + e->exit_requested = true; + e->exit_code = code; + return 0; } -int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) { +_public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) { assert_return(e, -EINVAL); assert_return(usec, -EINVAL); assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA); @@ -1832,7 +2104,7 @@ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) { return 0; } -int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) { +_public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) { assert_return(e, -EINVAL); assert_return(usec, -EINVAL); assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA); @@ -1841,3 +2113,103 @@ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) { *usec = e->timestamp.monotonic; return 0; } + +_public_ int sd_event_default(sd_event **ret) { + + static thread_local sd_event *default_event = NULL; + sd_event *e; + int r; + + if (!ret) + return !!default_event; + + if (default_event) { + *ret = sd_event_ref(default_event); + return 0; + } + + r = sd_event_new(&e); + if (r < 0) + return r; + + e->default_event_ptr = &default_event; + e->tid = gettid(); + default_event = e; + + *ret = e; + return 1; +} + +_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) { + assert_return(e, -EINVAL); + assert_return(tid, -EINVAL); + assert_return(!event_pid_changed(e), -ECHILD); + + if (e->tid != 0) { + *tid = e->tid; + return 0; + } + + return -ENXIO; +} + +_public_ int sd_event_set_watchdog(sd_event *e, int b) { + int r; + + assert_return(e, -EINVAL); + assert_return(!event_pid_changed(e), -ECHILD); + + if (e->watchdog == !!b) + return e->watchdog; + + if (b) { + struct epoll_event ev = {}; + + r = sd_watchdog_enabled(false, &e->watchdog_period); + if (r <= 0) + return r; + + /* Issue first ping immediately */ + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = now(CLOCK_MONOTONIC); + + e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC); + if (e->watchdog_fd < 0) + return -errno; + + r = arm_watchdog(e); + if (r < 0) + goto fail; + + ev.events = EPOLLIN; + ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG); + + r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev); + if (r < 0) { + r = -errno; + goto fail; + } + + } else { + if (e->watchdog_fd >= 0) { + epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL); + close_nointr_nofail(e->watchdog_fd); + e->watchdog_fd = -1; + } + } + + e->watchdog = !!b; + return e->watchdog; + +fail: + close_nointr_nofail(e->watchdog_fd); + e->watchdog_fd = -1; + return r; +} + +_public_ int sd_event_get_watchdog(sd_event *e) { + assert_return(e, -EINVAL); + assert_return(!event_pid_changed(e), -ECHILD); + + return e->watchdog; +}