#include <sys/timerfd.h>
#include <sys/wait.h>
+#include "sd-id128.h"
+#include "sd-daemon.h"
#include "macro.h"
-#include "refcnt.h"
#include "prioq.h"
#include "hashmap.h"
#include "util.h"
#include "time-util.h"
+#include "missing.h"
#include "sd-event.h"
-#define EPOLL_QUEUE_MAX 64
+#define EPOLL_QUEUE_MAX 512U
+#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
typedef enum EventSourceType {
SOURCE_IO,
SOURCE_REALTIME,
SOURCE_SIGNAL,
SOURCE_CHILD,
- SOURCE_DEFER
+ SOURCE_DEFER,
+ SOURCE_EXIT,
+ SOURCE_WATCHDOG
} EventSourceType;
struct sd_event_source {
- RefCount n_ref;
+ unsigned n_ref;
sd_event *event;
void *userdata;
- sd_prepare_handler_t prepare;
+ sd_event_handler_t prepare;
EventSourceType type:4;
- sd_event_mute_t mute:3;
+ int enabled:3;
bool pending:1;
+ bool dispatching:1;
int priority;
unsigned pending_index;
union {
struct {
- sd_io_handler_t callback;
+ sd_event_io_handler_t callback;
int fd;
uint32_t events;
uint32_t revents;
bool registered:1;
} io;
struct {
- sd_time_handler_t callback;
- usec_t next;
- unsigned prioq_index;
+ sd_event_time_handler_t callback;
+ usec_t next, accuracy;
+ unsigned earliest_index;
+ unsigned latest_index;
} time;
struct {
- sd_signal_handler_t callback;
+ sd_event_signal_handler_t callback;
struct signalfd_siginfo siginfo;
int sig;
} signal;
struct {
- sd_child_handler_t callback;
+ sd_event_child_handler_t callback;
siginfo_t siginfo;
pid_t pid;
int options;
} child;
struct {
- sd_defer_handler_t callback;
+ sd_event_handler_t callback;
} defer;
+ struct {
+ sd_event_handler_t callback;
+ unsigned prioq_index;
+ } exit;
};
};
struct sd_event {
- RefCount n_ref;
+ unsigned n_ref;
int epoll_fd;
int signal_fd;
int realtime_fd;
int monotonic_fd;
+ int watchdog_fd;
Prioq *pending;
Prioq *prepare;
- Prioq *monotonic;
- Prioq *realtime;
+
+ /* For both clocks we maintain two priority queues each, one
+ * ordered for the earliest times the events may be
+ * dispatched, and one ordered by the latest times they must
+ * have been dispatched. The range between the top entries in
+ * the two prioqs is the time window we can freely schedule
+ * wakeups in */
+ Prioq *monotonic_earliest;
+ Prioq *monotonic_latest;
+ Prioq *realtime_earliest;
+ Prioq *realtime_latest;
+
+ usec_t realtime_next, monotonic_next;
+ usec_t perturb;
sigset_t sigset;
sd_event_source **signal_sources;
Hashmap *child_sources;
- unsigned n_unmuted_child_sources;
+ unsigned n_enabled_child_sources;
+
+ Prioq *exit;
+
+ pid_t original_pid;
unsigned iteration;
- unsigned processed_children;
+ dual_timestamp timestamp;
+ int state;
- usec_t realtime_next, monotonic_next;
+ bool exit_requested:1;
+ bool need_process_child:1;
+ bool watchdog:1;
+
+ int exit_code;
+
+ pid_t tid;
+ sd_event **default_event_ptr;
+
+ usec_t watchdog_last, watchdog_period;
- bool quit;
+ unsigned n_sources;
};
static int pending_prioq_compare(const void *a, const void *b) {
assert(x->pending);
assert(y->pending);
- /* Unmuted ones first */
- if (x->mute != SD_EVENT_MUTED && y->mute == SD_EVENT_MUTED)
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
return -1;
- if (x->mute == SD_EVENT_MUTED && y->mute != SD_EVENT_MUTED)
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
return 1;
/* Lower priority values first */
if (x->prepare_iteration > y->prepare_iteration)
return 1;
- /* Unmuted ones first */
- if (x->mute != SD_EVENT_MUTED && y->mute == SD_EVENT_MUTED)
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
return -1;
- if (x->mute == SD_EVENT_MUTED && y->mute != SD_EVENT_MUTED)
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
return 1;
/* Lower priority values first */
return 0;
}
-static int time_prioq_compare(const void *a, const void *b) {
+static int earliest_time_prioq_compare(const void *a, const void *b) {
const sd_event_source *x = a, *y = b;
assert(x->type == SOURCE_MONOTONIC || x->type == SOURCE_REALTIME);
assert(y->type == SOURCE_MONOTONIC || y->type == SOURCE_REALTIME);
- /* Unmuted ones first */
- if (x->mute != SD_EVENT_MUTED && y->mute == SD_EVENT_MUTED)
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
return -1;
- if (x->mute == SD_EVENT_MUTED && y->mute != SD_EVENT_MUTED)
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
return 1;
/* Move the pending ones to the end */
if (x->time.next < y->time.next)
return -1;
if (x->time.next > y->time.next)
+ return 1;
+
+ /* Stability for the rest */
+ if (x < y)
+ return -1;
+ if (x > y)
+ return 1;
+
+ return 0;
+}
+
+static int latest_time_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert((x->type == SOURCE_MONOTONIC && y->type == SOURCE_MONOTONIC) ||
+ (x->type == SOURCE_REALTIME && y->type == SOURCE_REALTIME));
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Move the pending ones to the end */
+ if (!x->pending && y->pending)
+ return -1;
+ if (x->pending && !y->pending)
+ return 1;
+
+ /* Order by time */
+ if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
return -1;
+ if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
+ return 1;
+
+ /* Stability for the rest */
+ if (x < y)
+ return -1;
+ if (x > y)
+ return 1;
+
+ return 0;
+}
+
+static int exit_prioq_compare(const void *a, const void *b) {
+ const sd_event_source *x = a, *y = b;
+
+ assert(x->type == SOURCE_EXIT);
+ assert(y->type == SOURCE_EXIT);
+
+ /* Enabled ones first */
+ if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
+ return -1;
+ if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
+ return 1;
+
+ /* Lower priority values first */
+ if (x->priority < y->priority)
+ return -1;
+ if (x->priority > y->priority)
+ return 1;
/* Stability for the rest */
if (x < y)
static void event_free(sd_event *e) {
assert(e);
+ assert(e->n_sources == 0);
+
+ if (e->default_event_ptr)
+ *(e->default_event_ptr) = NULL;
if (e->epoll_fd >= 0)
close_nointr_nofail(e->epoll_fd);
if (e->monotonic_fd >= 0)
close_nointr_nofail(e->monotonic_fd);
+ if (e->watchdog_fd >= 0)
+ close_nointr_nofail(e->watchdog_fd);
+
prioq_free(e->pending);
prioq_free(e->prepare);
- prioq_free(e->monotonic);
- prioq_free(e->realtime);
+ prioq_free(e->monotonic_earliest);
+ prioq_free(e->monotonic_latest);
+ prioq_free(e->realtime_earliest);
+ prioq_free(e->realtime_latest);
+ prioq_free(e->exit);
free(e->signal_sources);
free(e);
}
-int sd_event_new(sd_event** ret) {
+_public_ int sd_event_new(sd_event** ret) {
sd_event *e;
int r;
- if (!ret)
- return -EINVAL;
+ assert_return(ret, -EINVAL);
e = new0(sd_event, 1);
if (!e)
return -ENOMEM;
- e->n_ref = REFCNT_INIT;
- e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1;
+ e->n_ref = 1;
+ e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1;
e->realtime_next = e->monotonic_next = (usec_t) -1;
+ e->original_pid = getpid();
assert_se(sigemptyset(&e->sigset) == 0);
return r;
}
-sd_event* sd_event_ref(sd_event *e) {
- if (!e)
- return NULL;
+_public_ sd_event* sd_event_ref(sd_event *e) {
+ assert_return(e, NULL);
- assert_se(REFCNT_INC(e->n_ref) >= 2);
+ assert(e->n_ref >= 1);
+ e->n_ref++;
return e;
}
-sd_event* sd_event_unref(sd_event *e) {
+_public_ sd_event* sd_event_unref(sd_event *e) {
+
if (!e)
return NULL;
- if (REFCNT_DEC(e->n_ref) <= 0)
+ assert(e->n_ref >= 1);
+ e->n_ref--;
+
+ if (e->n_ref <= 0)
event_free(e);
return NULL;
}
+static bool event_pid_changed(sd_event *e) {
+ assert(e);
+
+ /* We don't support people creating am event loop and keeping
+ * it around over a fork(). Let's complain. */
+
+ return e->original_pid != getpid();
+}
+
static int source_io_unregister(sd_event_source *s) {
int r;
return 0;
}
-static int source_io_register(sd_event_source *s, sd_event_mute_t m, uint32_t events) {
+static int source_io_register(
+ sd_event_source *s,
+ int enabled,
+ uint32_t events) {
+
struct epoll_event ev = {};
int r;
assert(s);
assert(s->type == SOURCE_IO);
- assert(m != SD_EVENT_MUTED);
+ assert(enabled != SD_EVENT_OFF);
ev.events = events;
ev.data.ptr = s;
- if (m == SD_EVENT_ONESHOT)
+ if (enabled == SD_EVENT_ONESHOT)
ev.events |= EPOLLONESHOT;
if (s->io.registered)
assert(s);
if (s->event) {
+ assert(s->event->n_sources > 0);
+
switch (s->type) {
case SOURCE_IO:
break;
case SOURCE_MONOTONIC:
- prioq_remove(s->event->monotonic, s, &s->time.prioq_index);
+ prioq_remove(s->event->monotonic_earliest, s, &s->time.earliest_index);
+ prioq_remove(s->event->monotonic_latest, s, &s->time.latest_index);
break;
case SOURCE_REALTIME:
- prioq_remove(s->event->realtime, s, &s->time.prioq_index);
+ prioq_remove(s->event->realtime_earliest, s, &s->time.earliest_index);
+ prioq_remove(s->event->realtime_latest, s, &s->time.latest_index);
break;
case SOURCE_SIGNAL:
if (s->signal.sig > 0) {
- if (s->signal.sig != SIGCHLD || s->event->n_unmuted_child_sources == 0)
+ if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0)
assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
if (s->event->signal_sources)
case SOURCE_CHILD:
if (s->child.pid > 0) {
- if (s->mute != SD_EVENT_MUTED) {
- assert(s->event->n_unmuted_child_sources > 0);
- s->event->n_unmuted_child_sources--;
+ if (s->enabled != SD_EVENT_OFF) {
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
}
if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD])
}
break;
+
+ case SOURCE_DEFER:
+ /* nothing */
+ break;
+
+ case SOURCE_EXIT:
+ prioq_remove(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
+ case SOURCE_WATCHDOG:
+ assert_not_reached("Wut? I shouldn't exist.");
}
if (s->pending)
if (s->prepare)
prioq_remove(s->event->prepare, s, &s->prepare_index);
+ s->event->n_sources--;
sd_event_unref(s->event);
}
int r;
assert(s);
+ assert(s->type != SOURCE_EXIT);
if (s->pending == b)
return 0;
} else
assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
+ if (s->type == SOURCE_REALTIME) {
+ prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
+ } else if (s->type == SOURCE_MONOTONIC) {
+ prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
+ }
+
return 0;
}
if (!s)
return NULL;
- s->n_ref = REFCNT_INIT;
+ s->n_ref = 1;
s->event = sd_event_ref(e);
s->type = type;
- s->mute = SD_EVENT_UNMUTED;
s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
+ e->n_sources ++;
+
return s;
}
-int sd_event_add_io(
+_public_ int sd_event_add_io(
sd_event *e,
int fd,
uint32_t events,
- sd_io_handler_t callback,
+ sd_event_io_handler_t callback,
void *userdata,
sd_event_source **ret) {
sd_event_source *s;
int r;
- if (!e)
- return -EINVAL;
- if (fd < 0)
- return -EINVAL;
- if (events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP))
- return -EINVAL;
- if (!callback)
- return -EINVAL;
- if (!ret)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(fd >= 0, -EINVAL);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
s = source_new(e, SOURCE_IO);
if (!s)
s->io.events = events;
s->io.callback = callback;
s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
- r = source_io_register(s, s->mute, events);
+ r = source_io_register(s, s->enabled, events);
if (r < 0) {
source_free(s);
return -errno;
struct epoll_event ev = {};
int r, fd;
+ sd_id128_t bootid;
assert(e);
assert(timer_fd);
return -errno;
}
+ /* When we sleep for longer, we try to realign the wakeup to
+ the same time wihtin each minute/second/250ms, so that
+ events all across the system can be coalesced into a single
+ CPU wakeup. However, let's take some system-specific
+ randomness for this value, so that in a network of systems
+ with synced clocks timer events are distributed a
+ bit. Here, we calculate a perturbation usec offset from the
+ boot ID. */
+
+ if (sd_id128_get_boot(&bootid) >= 0)
+ e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
+
*timer_fd = fd;
return 0;
}
EventSourceType type,
int *timer_fd,
clockid_t id,
- Prioq **prioq,
+ Prioq **earliest,
+ Prioq **latest,
uint64_t usec,
- sd_time_handler_t callback,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
void *userdata,
sd_event_source **ret) {
sd_event_source *s;
int r;
- if (!e)
- return -EINVAL;
- if (!callback)
- return -EINVAL;
- if (!ret)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(usec != (uint64_t) -1, -EINVAL);
+ assert_return(accuracy != (uint64_t) -1, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
assert(timer_fd);
- assert(prioq);
+ assert(earliest);
+ assert(latest);
+
+ if (!*earliest) {
+ *earliest = prioq_new(earliest_time_prioq_compare);
+ if (!*earliest)
+ return -ENOMEM;
+ }
- if (!*prioq) {
- *prioq = prioq_new(time_prioq_compare);
- if (!*prioq)
+ if (!*latest) {
+ *latest = prioq_new(latest_time_prioq_compare);
+ if (!*latest)
return -ENOMEM;
}
return -ENOMEM;
s->time.next = usec;
+ s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
s->time.callback = callback;
- s->time.prioq_index = PRIOQ_IDX_NULL;
+ s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
- r = prioq_put(*prioq, s, &s->time.prioq_index);
- if (r < 0) {
- source_free(s);
- return r;
- }
+ r = prioq_put(*earliest, s, &s->time.earliest_index);
+ if (r < 0)
+ goto fail;
+
+ r = prioq_put(*latest, s, &s->time.latest_index);
+ if (r < 0)
+ goto fail;
*ret = s;
return 0;
+
+fail:
+ source_free(s);
+ return r;
}
-int sd_event_add_monotonic(sd_event *e, uint64_t usec, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
- return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic, usec, callback, userdata, ret);
+_public_ int sd_event_add_monotonic(sd_event *e,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
+ return event_add_time_internal(e, SOURCE_MONOTONIC, &e->monotonic_fd, CLOCK_MONOTONIC, &e->monotonic_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
}
-int sd_event_add_realtime(sd_event *e, uint64_t usec, sd_time_handler_t callback, void *userdata, sd_event_source **ret) {
- return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime, usec, callback, userdata, ret);
+_public_ int sd_event_add_realtime(sd_event *e,
+ uint64_t usec,
+ uint64_t accuracy,
+ sd_event_time_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
+ return event_add_time_internal(e, SOURCE_REALTIME, &e->realtime_fd, CLOCK_REALTIME, &e->realtime_earliest, &e->monotonic_latest, usec, accuracy, callback, userdata, ret);
}
static int event_update_signal_fd(sd_event *e) {
return 0;
}
-int sd_event_add_signal(sd_event *e, int sig, sd_signal_handler_t callback, void *userdata, sd_event_source **ret) {
+_public_ int sd_event_add_signal(
+ sd_event *e,
+ int sig,
+ sd_event_signal_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
sd_event_source *s;
int r;
- if (!e)
- return -EINVAL;
- if (sig <= 0)
- return -EINVAL;
- if (sig >= _NSIG)
- return -EINVAL;
- if (!callback)
- return -EINVAL;
- if (!ret)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(sig > 0, -EINVAL);
+ assert_return(sig < _NSIG, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
if (!e->signal_sources) {
e->signal_sources = new0(sd_event_source*, _NSIG);
s->signal.sig = sig;
s->signal.callback = callback;
s->userdata = userdata;
+ s->enabled = SD_EVENT_ON;
e->signal_sources[sig] = s;
assert_se(sigaddset(&e->sigset, sig) == 0);
- if (sig != SIGCHLD || e->n_unmuted_child_sources == 0) {
+ if (sig != SIGCHLD || e->n_enabled_child_sources == 0) {
r = event_update_signal_fd(e);
if (r < 0) {
source_free(s);
return 0;
}
-int sd_event_add_child(sd_event *e, pid_t pid, int options, sd_child_handler_t callback, void *userdata, sd_event_source **ret) {
+_public_ int sd_event_add_child(
+ sd_event *e,
+ pid_t pid,
+ int options,
+ sd_event_child_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
sd_event_source *s;
int r;
- if (!e)
- return -EINVAL;
- if (pid <= 1)
- return -EINVAL;
- if (options & ~(WEXITED|WSTOPPED|WCONTINUED))
- return -EINVAL;
- if (!callback)
- return -EINVAL;
- if (!ret)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(pid > 1, -EINVAL);
+ assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
+ assert_return(options != 0, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
r = hashmap_ensure_allocated(&e->child_sources, trivial_hash_func, trivial_compare_func);
if (r < 0)
s->child.options = options;
s->child.callback = callback;
s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
if (r < 0) {
return r;
}
- e->n_unmuted_child_sources ++;
+ e->n_enabled_child_sources ++;
assert_se(sigaddset(&e->sigset, SIGCHLD) == 0);
}
}
+ e->need_process_child = true;
+
*ret = s;
return 0;
}
-int sd_event_add_defer(sd_event *e, sd_defer_handler_t callback, void *userdata, sd_event_source **ret) {
+_public_ int sd_event_add_defer(
+ sd_event *e,
+ sd_event_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
sd_event_source *s;
int r;
- if (!e)
- return -EINVAL;
- if (!ret)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
s = source_new(e, SOURCE_DEFER);
if (!s)
s->defer.callback = callback;
s->userdata = userdata;
+ s->enabled = SD_EVENT_ONESHOT;
r = source_set_pending(s, true);
if (r < 0) {
return 0;
}
-sd_event_source* sd_event_source_ref(sd_event_source *s) {
+_public_ int sd_event_add_exit(
+ sd_event *e,
+ sd_event_handler_t callback,
+ void *userdata,
+ sd_event_source **ret) {
+
+ sd_event_source *s;
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(callback, -EINVAL);
+ assert_return(ret, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!e->exit) {
+ e->exit = prioq_new(exit_prioq_compare);
+ if (!e->exit)
+ return -ENOMEM;
+ }
+
+ s = source_new(e, SOURCE_EXIT);
if (!s)
- return NULL;
+ return -ENOMEM;
+
+ s->exit.callback = callback;
+ s->userdata = userdata;
+ s->exit.prioq_index = PRIOQ_IDX_NULL;
+ s->enabled = SD_EVENT_ONESHOT;
+
+ r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
+ if (r < 0) {
+ source_free(s);
+ return r;
+ }
+
+ *ret = s;
+ return 0;
+}
+
+_public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
+ assert_return(s, NULL);
- assert_se(REFCNT_INC(s->n_ref) >= 2);
+ assert(s->n_ref >= 1);
+ s->n_ref++;
return s;
}
-sd_event_source* sd_event_source_unref(sd_event_source *s) {
+_public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
+
if (!s)
return NULL;
- if (REFCNT_DEC(s->n_ref) <= 0)
- source_free(s);
+ assert(s->n_ref >= 1);
+ s->n_ref--;
+
+ if (s->n_ref <= 0) {
+ /* Here's a special hack: when we are called from a
+ * dispatch handler we won't free the event source
+ * immediately, but we will detach the fd from the
+ * epoll. This way it is safe for the caller to unref
+ * the event source and immediately close the fd, but
+ * we still retain a valid event source object after
+ * the callback. */
+
+ if (s->dispatching) {
+ if (s->type == SOURCE_IO)
+ source_io_unregister(s);
+ } else
+ source_free(s);
+ }
return NULL;
}
-int sd_event_source_get_pending(sd_event_source *s) {
- if (!s)
- return -EINVAL;
+_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
+ assert_return(s, NULL);
+
+ return s->event;
+}
+
+_public_ int sd_event_source_get_pending(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
return s->pending;
}
-int sd_event_source_get_io_fd(sd_event_source *s) {
- if (!s)
- return -EINVAL;
- if (s->type != SOURCE_IO)
- return -EDOM;
+_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
return s->io.fd;
}
-int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
- if (!s)
- return -EINVAL;
- if (s->type != SOURCE_IO)
- return -EDOM;
- if (!events)
- return -EINVAL;
+_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
+ int r;
+
+ assert_return(s, -EINVAL);
+ assert_return(fd >= 0, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (s->io.fd == fd)
+ return 0;
+
+ if (s->enabled == SD_EVENT_OFF) {
+ s->io.fd = fd;
+ s->io.registered = false;
+ } else {
+ int saved_fd;
+
+ saved_fd = s->io.fd;
+ assert(s->io.registered);
+
+ s->io.fd = fd;
+ s->io.registered = false;
+
+ r = source_io_register(s, s->enabled, s->io.events);
+ if (r < 0) {
+ s->io.fd = saved_fd;
+ s->io.registered = true;
+ return r;
+ }
+
+ epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
+ assert_return(s, -EINVAL);
+ assert_return(events, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
*events = s->io.events;
return 0;
}
-int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
+_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
int r;
- if (!s)
- return -EINVAL;
- if (!s->type != SOURCE_IO)
- return -EDOM;
- if (events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP))
- return -EINVAL;
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
if (s->io.events == events)
return 0;
- if (s->mute != SD_EVENT_MUTED) {
- r = source_io_register(s, s->io.events, events);
+ if (s->enabled != SD_EVENT_OFF) {
+ r = source_io_register(s, s->enabled, events);
if (r < 0)
return r;
}
s->io.events = events;
+ source_set_pending(s, false);
return 0;
}
-int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
- if (!s)
- return -EINVAL;
- if (s->type != SOURCE_IO)
- return -EDOM;
- if (!revents)
- return -EINVAL;
- if (!s->pending)
- return -ENODATA;
+_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
+ assert_return(s, -EINVAL);
+ assert_return(revents, -EINVAL);
+ assert_return(s->type == SOURCE_IO, -EDOM);
+ assert_return(s->pending, -ENODATA);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
*revents = s->io.revents;
return 0;
}
-int sd_event_source_get_signal(sd_event_source *s) {
- if (!s)
- return -EINVAL;
- if (s->type != SOURCE_SIGNAL)
- return -EDOM;
+_public_ int sd_event_source_get_signal(sd_event_source *s) {
+ assert_return(s, -EINVAL);
+ assert_return(s->type == SOURCE_SIGNAL, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
return s->signal.sig;
}
-int sd_event_source_get_priority(sd_event_source *s, int *priority) {
- if (!s)
- return -EINVAL;
+_public_ int sd_event_source_get_priority(sd_event_source *s, int *priority) {
+ assert_return(s, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
return s->priority;
}
-int sd_event_source_set_priority(sd_event_source *s, int priority) {
- if (!s)
- return -EINVAL;
+_public_ int sd_event_source_set_priority(sd_event_source *s, int priority) {
+ assert_return(s, -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
if (s->priority == priority)
return 0;
s->priority = priority;
if (s->pending)
- assert_se(prioq_reshuffle(s->event->pending, s, &s->pending_index) == 0);
+ prioq_reshuffle(s->event->pending, s, &s->pending_index);
if (s->prepare)
- assert_se(prioq_reshuffle(s->event->prepare, s, &s->prepare_index) == 0);
+ prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
+
+ if (s->type == SOURCE_EXIT)
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
return 0;
}
-int sd_event_source_get_mute(sd_event_source *s, sd_event_mute_t *m) {
- if (!s)
- return -EINVAL;
- if (!m)
- return -EINVAL;
+_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
+ assert_return(s, -EINVAL);
+ assert_return(m, -EINVAL);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
- *m = s->mute;
+ *m = s->enabled;
return 0;
}
-int sd_event_source_set_mute(sd_event_source *s, sd_event_mute_t m) {
+_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
int r;
- if (!s)
- return -EINVAL;
- if (m != SD_EVENT_MUTED && m != SD_EVENT_UNMUTED && !SD_EVENT_ONESHOT)
- return -EINVAL;
+ assert_return(s, -EINVAL);
+ assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
- if (s->mute == m)
+ if (s->enabled == m)
return 0;
- if (m == SD_EVENT_MUTED) {
+ if (m == SD_EVENT_OFF) {
switch (s->type) {
if (r < 0)
return r;
- s->mute = m;
+ s->enabled = m;
break;
case SOURCE_MONOTONIC:
- s->mute = m;
- prioq_reshuffle(s->event->monotonic, s, &s->time.prioq_index);
+ s->enabled = m;
+ prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
break;
case SOURCE_REALTIME:
- s->mute = m;
- prioq_reshuffle(s->event->realtime, s, &s->time.prioq_index);
+ s->enabled = m;
+ prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
break;
case SOURCE_SIGNAL:
- s->mute = m;
- if (s->signal.sig != SIGCHLD || s->event->n_unmuted_child_sources == 0) {
+ s->enabled = m;
+ if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
assert_se(sigdelset(&s->event->sigset, s->signal.sig) == 0);
event_update_signal_fd(s->event);
}
break;
case SOURCE_CHILD:
- s->mute = m;
+ s->enabled = m;
- assert(s->event->n_unmuted_child_sources > 0);
- s->event->n_unmuted_child_sources--;
+ assert(s->event->n_enabled_child_sources > 0);
+ s->event->n_enabled_child_sources--;
if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
assert_se(sigdelset(&s->event->sigset, SIGCHLD) == 0);
break;
+ case SOURCE_EXIT:
+ s->enabled = m;
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
case SOURCE_DEFER:
- s->mute = m;
+ s->enabled = m;
break;
+
+ case SOURCE_WATCHDOG:
+ assert_not_reached("Wut? I shouldn't exist.");
}
} else {
if (r < 0)
return r;
- s->mute = m;
+ s->enabled = m;
break;
case SOURCE_MONOTONIC:
- s->mute = m;
- prioq_reshuffle(s->event->monotonic, s, &s->time.prioq_index);
+ s->enabled = m;
+ prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
break;
case SOURCE_REALTIME:
- s->mute = m;
- prioq_reshuffle(s->event->realtime, s, &s->time.prioq_index);
+ s->enabled = m;
+ prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
break;
case SOURCE_SIGNAL:
- s->mute = m;
+ s->enabled = m;
- if (s->signal.sig != SIGCHLD || s->event->n_unmuted_child_sources == 0) {
+ if (s->signal.sig != SIGCHLD || s->event->n_enabled_child_sources == 0) {
assert_se(sigaddset(&s->event->sigset, s->signal.sig) == 0);
event_update_signal_fd(s->event);
}
break;
case SOURCE_CHILD:
- s->mute = m;
+ s->enabled = m;
- if (s->mute == SD_EVENT_MUTED) {
- s->event->n_unmuted_child_sources++;
+ if (s->enabled == SD_EVENT_OFF) {
+ s->event->n_enabled_child_sources++;
if (!s->event->signal_sources || !s->event->signal_sources[SIGCHLD]) {
assert_se(sigaddset(&s->event->sigset, SIGCHLD) == 0);
}
break;
+ case SOURCE_EXIT:
+ s->enabled = m;
+ prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
+ break;
+
case SOURCE_DEFER:
- s->mute = m;
+ s->enabled = m;
break;
+
+ case SOURCE_WATCHDOG:
+ assert_not_reached("Wut? I shouldn't exist.");
}
}
return 0;
}
-int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
- if (!s)
- return -EINVAL;
- if (!usec)
- return -EINVAL;
- if (s->type != SOURCE_REALTIME && s->type != SOURCE_MONOTONIC)
- return -EDOM;
+_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
*usec = s->time.next;
return 0;
}
-int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
- if (!s)
- return -EINVAL;
- if (s->type != SOURCE_REALTIME && s->type != SOURCE_MONOTONIC)
- return -EDOM;
-
- if (s->time.next == usec)
- return 0;
+_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec != (uint64_t) -1, -EINVAL);
+ assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
s->time.next = usec;
+ source_set_pending(s, false);
+
+ if (s->type == SOURCE_REALTIME) {
+ prioq_reshuffle(s->event->realtime_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
+ } else {
+ prioq_reshuffle(s->event->monotonic_earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
+ }
+
+ return 0;
+}
+
+_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *usec = s->time.accuracy;
+ return 0;
+}
+
+_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
+ assert_return(s, -EINVAL);
+ assert_return(usec != (uint64_t) -1, -EINVAL);
+ assert_return(s->type == SOURCE_REALTIME || s->type == SOURCE_MONOTONIC, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ if (usec == 0)
+ usec = DEFAULT_ACCURACY_USEC;
+
+ s->time.accuracy = usec;
+
+ source_set_pending(s, false);
+
if (s->type == SOURCE_REALTIME)
- prioq_reshuffle(s->event->realtime, s, &s->time.prioq_index);
+ prioq_reshuffle(s->event->realtime_latest, s, &s->time.latest_index);
else
- prioq_reshuffle(s->event->monotonic, s, &s->time.prioq_index);
+ prioq_reshuffle(s->event->monotonic_latest, s, &s->time.latest_index);
return 0;
}
-int sd_event_source_set_prepare(sd_event_source *s, sd_prepare_handler_t callback) {
+_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
+ assert_return(s, -EINVAL);
+ assert_return(pid, -EINVAL);
+ assert_return(s->type == SOURCE_CHILD, -EDOM);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
+
+ *pid = s->child.pid;
+ return 0;
+}
+
+_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
int r;
- if (!s)
- return -EINVAL;
+ assert_return(s, -EINVAL);
+ assert_return(s->type != SOURCE_EXIT, -EDOM);
+ assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(s->event), -ECHILD);
if (s->prepare == callback)
return 0;
return 0;
}
-void* sd_event_source_get_userdata(sd_event_source *s) {
- if (!s)
- return NULL;
+_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
+ assert_return(s, NULL);
return s->userdata;
}
+_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
+ void *ret;
+
+ assert_return(s, NULL);
+
+ ret = s->userdata;
+ s->userdata = userdata;
+
+ return ret;
+}
+
+static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
+ usec_t c;
+ assert(e);
+ assert(a <= b);
+
+ if (a <= 0)
+ return 0;
+
+ if (b <= a + 1)
+ return a;
+
+ /*
+ Find a good time to wake up again between times a and b. We
+ have two goals here:
+
+ a) We want to wake up as seldom as possible, hence prefer
+ later times over earlier times.
+
+ b) But if we have to wake up, then let's make sure to
+ dispatch as much as possible on the entire system.
+
+ We implement this by waking up everywhere at the same time
+ within any given minute if we can, synchronised via the
+ perturbation value determined from the boot ID. If we can't,
+ then we try to find the same spot in every 10s, then 1s and
+ then 250ms step. Otherwise, we pick the last possible time
+ to wake up.
+ */
+
+ c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MINUTE))
+ return b;
+
+ c -= USEC_PER_MINUTE;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC*10))
+ return b;
+
+ c -= USEC_PER_SEC*10;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_SEC))
+ return b;
+
+ c -= USEC_PER_SEC;
+ }
+
+ if (c >= a)
+ return c;
+
+ c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
+ if (c >= b) {
+ if (_unlikely_(c < USEC_PER_MSEC*250))
+ return b;
+
+ c -= USEC_PER_MSEC*250;
+ }
+
+ if (c >= a)
+ return c;
+
+ return b;
+}
+
static int event_arm_timer(
sd_event *e,
int timer_fd,
- Prioq *prioq,
+ Prioq *earliest,
+ Prioq *latest,
usec_t *next) {
struct itimerspec its = {};
- sd_event_source *s;
+ sd_event_source *a, *b;
+ usec_t t;
int r;
- assert_se(e);
- assert_se(next);
+ assert(e);
+ assert(next);
+
+ a = prioq_peek(earliest);
+ if (!a || a->enabled == SD_EVENT_OFF) {
+
+ if (timer_fd < 0)
+ return 0;
+
+ if (*next == (usec_t) -1)
+ return 0;
+
+ /* disarm */
+ r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return r;
+
+ *next = (usec_t) -1;
- s = prioq_peek(prioq);
- if (!s || s->mute == SD_EVENT_MUTED)
return 0;
+ }
- if (*next == s->time.next)
+ b = prioq_peek(latest);
+ assert_se(b && b->enabled != SD_EVENT_OFF);
+
+ t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
+ if (*next == t)
return 0;
assert_se(timer_fd >= 0);
- if (s->time.next == 0) {
+ if (t == 0) {
/* We don' want to disarm here, just mean some time looooong ago. */
its.it_value.tv_sec = 0;
its.it_value.tv_nsec = 1;
} else
- timespec_store(&its.it_value, s->time.next);
+ timespec_store(&its.it_value, t);
r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL);
if (r < 0)
- return r;
+ return -errno;
- *next = s->time.next;
+ *next = t;
return 0;
}
s->io.revents = events;
- /*
- If this is a oneshot event source, then we added it to the
- epoll with EPOLLONESHOT, hence we know it's not registered
- anymore. We can save a syscall here...
- */
-
- if (s->mute == SD_EVENT_ONESHOT)
- s->io.registered = false;
-
return source_set_pending(s, true);
}
-static int flush_timer(sd_event *e, int fd, uint32_t events) {
+static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
uint64_t x;
ssize_t ss;
assert(e);
+ assert(fd >= 0);
- if (events != EPOLLIN)
- return -EIO;
+ assert_return(events == EPOLLIN, -EIO);
ss = read(fd, &x, sizeof(x));
if (ss < 0) {
if (ss != sizeof(x))
return -EIO;
+ if (next)
+ *next = (usec_t) -1;
+
return 0;
}
-static int process_timer(sd_event *e, usec_t n, Prioq *prioq) {
+static int process_timer(
+ sd_event *e,
+ usec_t n,
+ Prioq *earliest,
+ Prioq *latest) {
+
sd_event_source *s;
int r;
assert(e);
for (;;) {
- s = prioq_peek(prioq);
+ s = prioq_peek(earliest);
if (!s ||
s->time.next > n ||
- s->mute == SD_EVENT_MUTED ||
+ s->enabled == SD_EVENT_OFF ||
s->pending)
break;
if (r < 0)
return r;
- r = prioq_reshuffle(prioq, s, &s->time.prioq_index);
- if (r < 0)
- return r;
+ prioq_reshuffle(earliest, s, &s->time.earliest_index);
+ prioq_reshuffle(latest, s, &s->time.latest_index);
}
return 0;
assert(e);
+ e->need_process_child = false;
+
/*
So, this is ugly. We iteratively invoke waitid() with P_PID
+ WNOHANG for each PID we wait for, instead of using
don't care about. Since this is O(n) this means that if you
have a lot of processes you probably want to handle SIGCHLD
yourself.
+
+ We do not reap the children here (by using WNOWAIT), this
+ is only done after the event source is dispatched so that
+ the callback still sees the process as a zombie.
*/
HASHMAP_FOREACH(s, e->child_sources, i) {
if (s->pending)
continue;
- if (s->mute == SD_EVENT_MUTED)
+ if (s->enabled == SD_EVENT_OFF)
continue;
zero(s->child.siginfo);
- r = waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|s->child.options);
+ r = waitid(P_PID, s->child.pid, &s->child.siginfo,
+ WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
if (r < 0)
return -errno;
if (s->child.siginfo.si_pid != 0) {
+ bool zombie =
+ s->child.siginfo.si_code == CLD_EXITED ||
+ s->child.siginfo.si_code == CLD_KILLED ||
+ s->child.siginfo.si_code == CLD_DUMPED;
+
+ if (!zombie && (s->child.options & WEXITED)) {
+ /* If the child isn't dead then let's
+ * immediately remove the state change
+ * from the queue, since there's no
+ * benefit in leaving it queued */
+
+ assert(s->child.options & (WSTOPPED|WCONTINUED));
+ waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
+ }
+
r = source_set_pending(s, true);
if (r < 0)
return r;
}
}
- e->processed_children = e->iteration;
return 0;
}
static int process_signal(sd_event *e, uint32_t events) {
- struct signalfd_siginfo si;
bool read_one = false;
- ssize_t ss;
int r;
- if (events != EPOLLIN)
- return -EIO;
+ assert(e);
+ assert(e->signal_sources);
+
+ assert_return(events == EPOLLIN, -EIO);
for (;;) {
+ struct signalfd_siginfo si;
+ ssize_t ss;
sd_event_source *s;
ss = read(e->signal_fd, &si, sizeof(si));
read_one = true;
+ s = e->signal_sources[si.ssi_signo];
if (si.ssi_signo == SIGCHLD) {
r = process_child(e);
if (r < 0)
return r;
- if (r > 0 || !e->signal_sources[si.ssi_signo])
+ if (r > 0 || !s)
continue;
- } else {
- s = e->signal_sources[si.ssi_signo];
+ } else
if (!s)
return -EIO;
- }
s->signal.siginfo = si;
r = source_set_pending(s, true);
return r;
}
-
return 0;
}
static int source_dispatch(sd_event_source *s) {
- int r;
+ int r = 0;
assert(s);
- assert(s->pending);
+ assert(s->pending || s->type == SOURCE_EXIT);
- r = source_set_pending(s, false);
- if (r < 0)
- return r;
+ if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
+ r = source_set_pending(s, false);
+ if (r < 0)
+ return r;
+ }
- if (s->mute == SD_EVENT_ONESHOT) {
- r = sd_event_source_set_mute(s, SD_EVENT_MUTED);
+ if (s->enabled == SD_EVENT_ONESHOT) {
+ r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
if (r < 0)
return r;
}
+ s->dispatching = true;
+
switch (s->type) {
case SOURCE_IO:
r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
break;
- case SOURCE_CHILD:
+ case SOURCE_CHILD: {
+ bool zombie;
+
+ zombie = s->child.siginfo.si_code == CLD_EXITED ||
+ s->child.siginfo.si_code == CLD_KILLED ||
+ s->child.siginfo.si_code == CLD_DUMPED;
+
r = s->child.callback(s, &s->child.siginfo, s->userdata);
+
+ /* Now, reap the PID for good. */
+ if (zombie)
+ waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
+
break;
+ }
case SOURCE_DEFER:
r = s->defer.callback(s, s->userdata);
break;
+
+ case SOURCE_EXIT:
+ r = s->exit.callback(s, s->userdata);
+ break;
+
+ case SOURCE_WATCHDOG:
+ assert_not_reached("Wut? I shouldn't exist.");
}
- return r;
+ s->dispatching = false;
+
+ if (r < 0)
+ log_debug("Event source %p returned error, disabling: %s", s, strerror(-r));
+
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
+
+ return 1;
}
static int event_prepare(sd_event *e) {
sd_event_source *s;
s = prioq_peek(e->prepare);
- if (!s || s->prepare_iteration == e->iteration || s->mute == SD_EVENT_MUTED)
+ if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
break;
s->prepare_iteration = e->iteration;
return r;
assert(s->prepare);
+
+ s->dispatching = true;
r = s->prepare(s, s->userdata);
+ s->dispatching = false;
+
if (r < 0)
- return r;
+ log_debug("Prepare callback of event source %p returned error, disabling: %s", s, strerror(-r));
+ if (s->n_ref == 0)
+ source_free(s);
+ else if (r < 0)
+ sd_event_source_set_enabled(s, SD_EVENT_OFF);
}
return 0;
}
-int sd_event_run(sd_event *e, uint64_t timeout) {
- struct epoll_event ev_queue[EPOLL_QUEUE_MAX];
+static int dispatch_exit(sd_event *e) {
+ sd_event_source *p;
+ int r;
+
+ assert(e);
+
+ p = prioq_peek(e->exit);
+ if (!p || p->enabled == SD_EVENT_OFF) {
+ e->state = SD_EVENT_FINISHED;
+ return 0;
+ }
+
+ sd_event_ref(e);
+ e->iteration++;
+ e->state = SD_EVENT_EXITING;
+
+ r = source_dispatch(p);
+
+ e->state = SD_EVENT_PASSIVE;
+ sd_event_unref(e);
+
+ return r;
+}
+
+static sd_event_source* event_next_pending(sd_event *e) {
+ sd_event_source *p;
+
+ assert(e);
+
+ p = prioq_peek(e->pending);
+ if (!p)
+ return NULL;
+
+ if (p->enabled == SD_EVENT_OFF)
+ return NULL;
+
+ return p;
+}
+
+static int arm_watchdog(sd_event *e) {
+ struct itimerspec its = {};
+ usec_t t;
+ int r;
+
+ assert(e);
+ assert(e->watchdog_fd >= 0);
+
+ t = sleep_between(e,
+ e->watchdog_last + (e->watchdog_period / 2),
+ e->watchdog_last + (e->watchdog_period * 3 / 4));
+
+ timespec_store(&its.it_value, t);
+
+ r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
+ if (r < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int process_watchdog(sd_event *e) {
+ assert(e);
+
+ if (!e->watchdog)
+ return 0;
+
+ /* Don't notify watchdog too often */
+ if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
+ return 0;
+
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = e->timestamp.monotonic;
+
+ return arm_watchdog(e);
+}
+
+_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
+ struct epoll_event *ev_queue;
+ unsigned ev_queue_max;
sd_event_source *p;
int r, i, m;
- dual_timestamp n;
- if (!e)
- return -EINVAL;
- if (e->quit)
- return -ESTALE;
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
+
+ if (e->exit_requested)
+ return dispatch_exit(e);
+ sd_event_ref(e);
e->iteration++;
+ e->state = SD_EVENT_RUNNING;
r = event_prepare(e);
if (r < 0)
- return r;
+ goto finish;
- r = event_arm_timer(e, e->monotonic_fd, e->monotonic, &e->monotonic_next);
+ r = event_arm_timer(e, e->monotonic_fd, e->monotonic_earliest, e->monotonic_latest, &e->monotonic_next);
if (r < 0)
- return r;
+ goto finish;
- r = event_arm_timer(e, e->realtime_fd, e->realtime, &e->realtime_next);
+ r = event_arm_timer(e, e->realtime_fd, e->realtime_earliest, e->realtime_latest, &e->realtime_next);
if (r < 0)
- return r;
+ goto finish;
- if (e->iteration == 1 && !hashmap_isempty(e->child_sources))
- /* On the first iteration, there might be already some
- * zombies for us to care for, hence, don't wait */
+ if (event_next_pending(e) || e->need_process_child)
timeout = 0;
- else {
- p = prioq_peek(e->pending);
- if (p && p->mute != SD_EVENT_MUTED)
- timeout = 0;
+ ev_queue_max = CLAMP(e->n_sources, 1U, EPOLL_QUEUE_MAX);
+ ev_queue = newa(struct epoll_event, ev_queue_max);
+
+ m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
+ timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
+ if (m < 0) {
+ r = errno == EAGAIN || errno == EINTR ? 0 : -errno;
+ goto finish;
}
- m = epoll_wait(e->epoll_fd, ev_queue, EPOLL_QUEUE_MAX, timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
- if (m < 0)
- return m;
-
- dual_timestamp_get(&n);
+ dual_timestamp_get(&e->timestamp);
for (i = 0; i < m; i++) {
if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_MONOTONIC))
- r = flush_timer(e, e->monotonic_fd, ev_queue[i].events);
+ r = flush_timer(e, e->monotonic_fd, ev_queue[i].events, &e->monotonic_next);
else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_REALTIME))
- r = flush_timer(e, e->realtime_fd, ev_queue[i].events);
+ r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next);
else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL))
r = process_signal(e, ev_queue[i].events);
+ else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
+ r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
else
r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
if (r < 0)
- return r;
+ goto finish;
}
- r = process_timer(e, n.monotonic, e->monotonic);
+ r = process_watchdog(e);
if (r < 0)
- return r;
+ goto finish;
- r = process_timer(e, n.realtime, e->realtime);
+ r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest);
if (r < 0)
- return r;
+ goto finish;
+
+ r = process_timer(e, e->timestamp.realtime, e->realtime_earliest, e->realtime_latest);
+ if (r < 0)
+ goto finish;
- if (e->iteration == 1 && e->processed_children != 1) {
- /* On the first iteration, make sure we really process
- * all children which might already be zombies. */
+ if (e->need_process_child) {
r = process_child(e);
if (r < 0)
- return r;
+ goto finish;
}
- p = prioq_peek(e->pending);
- if (!p || p->mute == SD_EVENT_MUTED)
- return 0;
+ p = event_next_pending(e);
+ if (!p) {
+ r = 0;
+ goto finish;
+ }
- return source_dispatch(p);
+ r = source_dispatch(p);
+
+finish:
+ e->state = SD_EVENT_PASSIVE;
+ sd_event_unref(e);
+
+ return r;
}
-int sd_event_loop(sd_event *e) {
+_public_ int sd_event_loop(sd_event *e) {
int r;
- if (!e)
- return -EINVAL;
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+ assert_return(e->state == SD_EVENT_PASSIVE, -EBUSY);
- while (!e->quit) {
+ sd_event_ref(e);
+
+ while (e->state != SD_EVENT_FINISHED) {
r = sd_event_run(e, (uint64_t) -1);
if (r < 0)
- return r;
+ goto finish;
}
+ r = e->exit_code;
+
+finish:
+ sd_event_unref(e);
+ return r;
+}
+
+_public_ int sd_event_get_state(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->state;
+}
+
+_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
+ assert_return(e, -EINVAL);
+ assert_return(code, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (!e->exit_requested)
+ return -ENODATA;
+
+ *code = e->exit_code;
return 0;
}
-int sd_event_quit(sd_event *e) {
- if (!e)
- return EINVAL;
+_public_ int sd_event_exit(sd_event *e, int code) {
+ assert_return(e, -EINVAL);
+ assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ e->exit_requested = true;
+ e->exit_code = code;
- return e->quit;
+ return 0;
}
-int sd_event_request_quit(sd_event *e) {
- if (!e)
- return -EINVAL;
+_public_ int sd_event_get_now_realtime(sd_event *e, uint64_t *usec) {
+ assert_return(e, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
+ assert_return(!event_pid_changed(e), -ECHILD);
- e->quit = true;
+ *usec = e->timestamp.realtime;
return 0;
}
-sd_event *sd_event_get(sd_event_source *s) {
- if (!s)
- return NULL;
+_public_ int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec) {
+ assert_return(e, -EINVAL);
+ assert_return(usec, -EINVAL);
+ assert_return(dual_timestamp_is_set(&e->timestamp), -ENODATA);
+ assert_return(!event_pid_changed(e), -ECHILD);
- return s->event;
+ *usec = e->timestamp.monotonic;
+ return 0;
+}
+
+_public_ int sd_event_default(sd_event **ret) {
+
+ static __thread sd_event *default_event = NULL;
+ sd_event *e;
+ int r;
+
+ if (!ret)
+ return !!default_event;
+
+ if (default_event) {
+ *ret = sd_event_ref(default_event);
+ return 0;
+ }
+
+ r = sd_event_new(&e);
+ if (r < 0)
+ return r;
+
+ e->default_event_ptr = &default_event;
+ e->tid = gettid();
+ default_event = e;
+
+ *ret = e;
+ return 1;
+}
+
+_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
+ assert_return(e, -EINVAL);
+ assert_return(tid, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->tid != 0) {
+ *tid = e->tid;
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+_public_ int sd_event_set_watchdog(sd_event *e, int b) {
+ int r;
+
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ if (e->watchdog == !!b)
+ return e->watchdog;
+
+ if (b) {
+ struct epoll_event ev = {};
+ const char *env;
+
+ env = getenv("WATCHDOG_USEC");
+ if (!env)
+ return false;
+
+ r = safe_atou64(env, &e->watchdog_period);
+ if (r < 0)
+ return r;
+ if (e->watchdog_period <= 0)
+ return -EIO;
+
+ /* Issue first ping immediately */
+ sd_notify(false, "WATCHDOG=1");
+ e->watchdog_last = now(CLOCK_MONOTONIC);
+
+ e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
+ if (e->watchdog_fd < 0)
+ return -errno;
+
+ r = arm_watchdog(e);
+ if (r < 0)
+ goto fail;
+
+ ev.events = EPOLLIN;
+ ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
+
+ r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ } else {
+ if (e->watchdog_fd >= 0) {
+ epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
+ close_nointr_nofail(e->watchdog_fd);
+ e->watchdog_fd = -1;
+ }
+ }
+
+ e->watchdog = !!b;
+ return e->watchdog;
+
+fail:
+ close_nointr_nofail(e->watchdog_fd);
+ e->watchdog_fd = -1;
+ return r;
+}
+
+_public_ int sd_event_get_watchdog(sd_event *e) {
+ assert_return(e, -EINVAL);
+ assert_return(!event_pid_changed(e), -ECHILD);
+
+ return e->watchdog;
}