#include <sys/wait.h>
#include "sd-id128.h"
+#include "sd-daemon.h"
#include "macro.h"
#include "prioq.h"
#include "hashmap.h"
SOURCE_SIGNAL,
SOURCE_CHILD,
SOURCE_DEFER,
- SOURCE_QUIT
+ SOURCE_EXIT,
+ SOURCE_WATCHDOG
} EventSourceType;
struct sd_event_source {
EventSourceType type:4;
int enabled:3;
bool pending:1;
+ bool dispatching:1;
int priority;
unsigned pending_index;
struct {
sd_event_handler_t callback;
unsigned prioq_index;
- } quit;
+ } exit;
};
};
int signal_fd;
int realtime_fd;
int monotonic_fd;
+ int watchdog_fd;
Prioq *pending;
Prioq *prepare;
Hashmap *child_sources;
unsigned n_enabled_child_sources;
- Prioq *quit;
+ Prioq *exit;
pid_t original_pid;
dual_timestamp timestamp;
int state;
- bool quit_requested:1;
+ bool exit_requested:1;
bool need_process_child:1;
+ bool watchdog:1;
+
+ int exit_code;
pid_t tid;
sd_event **default_event_ptr;
+
+ usec_t watchdog_last, watchdog_period;
};
static int pending_prioq_compare(const void *a, const void *b) {
return 0;
}
-static int quit_prioq_compare(const void *a, const void *b) {
+static int exit_prioq_compare(const void *a, const void *b) {
const sd_event_source *x = a, *y = b;
- assert(x->type == SOURCE_QUIT);
- assert(y->type == SOURCE_QUIT);
+ assert(x->type == SOURCE_EXIT);
+ assert(y->type == SOURCE_EXIT);
/* Enabled ones first */
if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
if (e->monotonic_fd >= 0)
close_nointr_nofail(e->monotonic_fd);
+ if (e->watchdog_fd >= 0)
+ close_nointr_nofail(e->watchdog_fd);
+
prioq_free(e->pending);
prioq_free(e->prepare);
prioq_free(e->monotonic_earliest);
prioq_free(e->monotonic_latest);
prioq_free(e->realtime_earliest);
prioq_free(e->realtime_latest);
- prioq_free(e->quit);
+ prioq_free(e->exit);
free(e->signal_sources);
return -ENOMEM;
e->n_ref = 1;
- e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
+ e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
e->realtime_next = e->monotonic_next = (usec_t) -1;
e->original_pid = getpid();
}
_public_ sd_event* sd_event_unref(sd_event *e) {
- assert_return(e, NULL);
+
+ if (!e)
+ return NULL;
assert(e->n_ref >= 1);
e->n_ref--;
/* nothing */
break;
- case SOURCE_QUIT:
- prioq_remove(s->event->quit, s, &s->quit.prioq_index);
+ case SOURCE_EXIT:
+ prioq_remove(s->event->exit, s, &s->exit.prioq_index);
break;
}
int r;
assert(s);
- assert(s->type != SOURCE_QUIT);
+ assert(s->type != SOURCE_EXIT);
if (s->pending == b)
return 0;
assert_return(e, -EINVAL);
assert_return(fd >= 0, -EINVAL);
- assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
assert_return(callback, -EINVAL);
assert_return(ret, -EINVAL);
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
}
/* When we sleep for longer, we try to realign the wakeup to
- the same time wihtin each second, so that events all across
- the system can be coalesced into a single CPU
- wakeup. However, let's take some system-specific randomness
- for this value, so that in a network of systems with synced
- clocks timer events are distributed a bit. Here, we
- calculate a perturbation usec offset from the boot ID. */
+ the same time wihtin each minute/second/250ms, so that
+ events all across the system can be coalesced into a single
+ CPU wakeup. However, let's take some system-specific
+ randomness for this value, so that in a network of systems
+ with synced clocks timer events are distributed a
+ bit. Here, we calculate a perturbation usec offset from the
+ boot ID. */
if (sd_id128_get_boot(&bootid) >= 0)
- e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_SEC;
+ e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
*timer_fd = fd;
return 0;
return 0;
}
-_public_ int sd_event_add_quit(
+_public_ int sd_event_add_exit(
sd_event *e,
sd_event_handler_t callback,
void *userdata,
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(e), -ECHILD);
- if (!e->quit) {
- e->quit = prioq_new(quit_prioq_compare);
- if (!e->quit)
+ if (!e->exit) {
+ e->exit = prioq_new(exit_prioq_compare);
+ if (!e->exit)
return -ENOMEM;
}
- s = source_new(e, SOURCE_QUIT);
+ s = source_new(e, SOURCE_EXIT);
if (!s)
return -ENOMEM;
- s->quit.callback = callback;
+ s->exit.callback = callback;
s->userdata = userdata;
- s->quit.prioq_index = PRIOQ_IDX_NULL;
+ s->exit.prioq_index = PRIOQ_IDX_NULL;
s->enabled = SD_EVENT_ONESHOT;
- r = prioq_put(s->event->quit, s, &s->quit.prioq_index);
+ r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
if (r < 0) {
source_free(s);
return r;
}
_public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
- assert_return(s, NULL);
+
+ if (!s)
+ return NULL;
assert(s->n_ref >= 1);
s->n_ref--;
- if (s->n_ref <= 0)
- source_free(s);
+ if (s->n_ref <= 0) {
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
+
+ if (s->dispatching) {
+ if (s->type == SOURCE_IO)
+ source_io_unregister(s);
+ } else
+ source_free(s);
+ }
return NULL;
}
-_public_ sd_event *sd_event_get(sd_event_source *s) {
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
assert_return(s, NULL);
return s->event;
_public_ int sd_event_source_get_pending(sd_event_source *s) {
assert_return(s, -EINVAL);
- assert_return(s->type != SOURCE_QUIT, -EDOM);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(s->event), -ECHILD);
assert_return(s, -EINVAL);
assert_return(s->type == SOURCE_IO, -EDOM);
- assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP)), -EINVAL);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(s->event), -ECHILD);
if (s->prepare)
prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
- if (s->type == SOURCE_QUIT)
- prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
return 0;
}
break;
- case SOURCE_QUIT:
+ case SOURCE_EXIT:
s->enabled = m;
- prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
break;
case SOURCE_DEFER:
}
break;
- case SOURCE_QUIT:
+ case SOURCE_EXIT:
s->enabled = m;
- prioq_reshuffle(s->event->quit, s, &s->quit.prioq_index);
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
break;
case SOURCE_DEFER:
int r;
assert_return(s, -EINVAL);
- assert_return(s->type != SOURCE_QUIT, -EDOM);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(s->event), -ECHILD);
return s->userdata;
}
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
usec_t c;
assert(e);
dispatch as much as possible on the entire system.
We implement this by waking up everywhere at the same time
- within any given second if we can, synchronised via the
+ within any given minute if we can, synchronised via the
perturbation value determined from the boot ID. If we can't,
- then we try to find the same spot in every a 250ms
- step. Otherwise, we pick the last possible time to wake up.
+ then we try to find the same spot in every 10s, then 1s and
+ then 250ms step. Otherwise, we pick the last possible time
+ to wake up.
*/
- c = (b / USEC_PER_SEC) * USEC_PER_SEC + e->perturb;
+ c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MINUTE))
+ return b;
+
+ c -= USEC_PER_MINUTE;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC*10))
+ return b;
+
+ c -= USEC_PER_SEC*10;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
if (c >= b) {
if (_unlikely_(c < USEC_PER_SEC))
return b;
usec_t t;
int r;
- assert_se(e);
- assert_se(next);
+ assert(e);
+ assert(next);
a = prioq_peek(earliest);
if (!a || a->enabled == SD_EVENT_OFF) {
+ if (timer_fd < 0)
+ return 0;
+
if (*next == (usec_t) -1)
return 0;
r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
if (r < 0)
- return r;
+ return -errno;
*next = t;
return 0;
assert(e);
assert(fd >= 0);
- assert(next);
assert_return(events == EPOLLIN, -EIO);
if (ss != sizeof(x))
return -EIO;
- *next = (usec_t) -1;
+ if (next)
+ *next = (usec_t) -1;
return 0;
}
don't care about. Since this is O(n) this means that if you
have a lot of processes you probably want to handle SIGCHLD
yourself.
+
+ We do not reap the children here (by using WNOWAIT), this
+ is only done after the event source is dispatched so that
+ the callback still sees the process as a zombie.
*/
HASHMAP_FOREACH(s, e->child_sources, i) {
continue;
zero(s->child.siginfo);
- r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
+ r = waitid(P_PID, s->child.pid, &s->child.siginfo,
+ WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
if (r < 0)
return -errno;
if (s->child.siginfo.si_pid != 0) {
+ bool zombie =
+ s->child.siginfo.si_code == CLD_EXITED ||
+ s->child.siginfo.si_code == CLD_KILLED ||
+ s->child.siginfo.si_code == CLD_DUMPED;
+
+ if (!zombie && (s->child.options & WEXITED)) {
+ /* If the child isn't dead then let's
+ * immediately remove the state change
+ * from the queue, since there's no
+ * benefit in leaving it queued */
+
+ assert(s->child.options & (WSTOPPED|WCONTINUED));
+ waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ }
+
r = source_set_pending(s, true);
if (r < 0)
return r;
return r;
}
-
return 0;
}
int r = 0;
assert(s);
- assert(s->pending || s->type == SOURCE_QUIT);
+ assert(s->pending || s->type == SOURCE_EXIT);
- if (s->type != SOURCE_DEFER && s->type != SOURCE_QUIT) {
+ if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
r = source_set_pending(s, false);
if (r < 0)
return r;
return r;
}
- sd_event_source_ref(s);
+ s->dispatching = true;
switch (s->type) {
r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
break;
- case SOURCE_CHILD:
+ case SOURCE_CHILD: {
+ bool zombie;
+
+ zombie = s->child.siginfo.si_code == CLD_EXITED ||
+ s->child.siginfo.si_code == CLD_KILLED ||
+ s->child.siginfo.si_code == CLD_DUMPED;
+
r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+ /* Now, reap the PID for good. */
+ if (zombie)
+ waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+
break;
+ }
case SOURCE_DEFER:
r = s->defer.callback(s, s->userdata);
break;
- case SOURCE_QUIT:
- r = s->quit.callback(s, s->userdata);
+ case SOURCE_EXIT:
+ r = s->exit.callback(s, s->userdata);
break;
}
- sd_event_source_unref(s);
+ s->dispatching = false;
- return r;
+ if (r < 0)
+ log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+
+ return 1;
}
static int event_prepare(sd_event *e) {
return r;
assert(s->prepare);
+
+ s->dispatching = true;
r = s->prepare(s, s->userdata);
+ s->dispatching = false;
+
if (r < 0)
- return r;
+ log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
}
return 0;
}
-static int dispatch_quit(sd_event *e) {
+static int dispatch_exit(sd_event *e) {
sd_event_source *p;
int r;
assert(e);
- p = prioq_peek(e->quit);
+ p = prioq_peek(e->exit);
if (!p || p->enabled == SD_EVENT_OFF) {
e->state = SD_EVENT_FINISHED;
return 0;
sd_event_ref(e);
e->iteration++;
- e->state = SD_EVENT_QUITTING;
+ e->state = SD_EVENT_EXITING;
r = source_dispatch(p);
return p;
}
+static int arm_watchdog(sd_event *e) {
+ struct itimerspec its = {};
+ usec_t t;
+ int r;
+
+ assert(e);
+ assert(e->watchdog_fd >= 0);
+
+ t = sleep_between(e,
+ e->watchdog_last + (e->watchdog_period / 2),
+ e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+ timespec_store(&its.it_value, t);
+
+ r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+ assert(e);
+
+ if (!e->watchdog)
+ return 0;
+
+ /* Don't notify watchdog too often */
+ if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+ return 0;
+
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = e->timestamp.monotonic;
+
+ return arm_watchdog(e);
+}
+
_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
sd_event_source *p;
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
- if (e->quit_requested)
- return dispatch_quit(e);
+ if (e->exit_requested)
+ return dispatch_exit(e);
sd_event_ref(e);
e->iteration++;
if (r < 0)
goto finish;
- if (event_next_pending(e) || e->need_process_child)
- timeout = 0;
+ r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
+ if (r < 0)
+ goto finish;
- if (timeout > 0) {
- r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
- if (r < 0)
- goto finish;
+ r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
+ if (r < 0)
+ goto finish;
- r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
- if (r < 0)
- goto finish;
- }
+ if (event_next_pending(e) || e->need_process_child)
+ timeout = 0;
m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX,
timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
r = process_signal(e, ev_queue[i].events);
+ else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
else
r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
goto finish;
}
+ r = process_watchdog(e);
+ if (r < 0)
+ goto finish;
+
r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
if (r < 0)
goto finish;
goto finish;
}
- r = 0;
+ r = e->exit_code;
finish:
sd_event_unref(e);
return e->state;
}
-_public_ int sd_event_get_quit(sd_event *e) {
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
assert_return(e, -EINVAL);
+ assert_return(code, -EINVAL);
assert_return(!event_pid_changed(e), -ECHILD);
- return e->quit_requested;
+ if (!e->exit_requested)
+ return -ENODATA;
+
+ *code = e->exit_code;
+ return 0;
}
-_public_ int sd_event_request_quit(sd_event *e) {
+_public_ int sd_event_exit(sd_event *e, int code) {
assert_return(e, -EINVAL);
assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
assert_return(!event_pid_changed(e), -ECHILD);
- e->quit_requested = true;
+ e->exit_requested = true;
+ e->exit_code = code;
+
return 0;
}
return -ENXIO;
}
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->watchdog == !!b)
+ return e->watchdog;
+
+ if (b) {
+ struct epoll_event ev = {};
+ const char *env;
+
+ env = getenv("WATCHDOG_USEC");
+ if (!env)
+ return false;
+
+ r = safe_atou64(env, &e->watchdog_period);
+ if (r < 0)
+ return r;
+ if (e->watchdog_period <= 0)
+ return -EIO;
+
+ /* Issue first ping immediately */
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = now(CLOCK_MONOTONIC);
+
+ e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (e->watchdog_fd < 0)
+ return -errno;
+
+ r = arm_watchdog(e);
+ if (r < 0)
+ goto fail;
+
+ ev.events = EPOLLIN;
+ ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
+
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ } else {
+ if (e->watchdog_fd >= 0) {
+ epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+ close_nointr_nofail(e->watchdog_fd);
+ e->watchdog_fd = -1;
+ }
+ }
+
+ e->watchdog = !!b;
+ return e->watchdog;
+
+fail:
+ close_nointr_nofail(e->watchdog_fd);
+ e->watchdog_fd = -1;
+ return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->watchdog;
+}