From cde93897cdefdd7c7f66c400a61e42ceee5f6a46 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 11 Dec 2013 18:14:52 +0100 Subject: [PATCH] event: hook up sd-event with the service watchdog logic Adds a new call sd_event_set_watchdog() that can be used to hook up the event loop with the watchdog supervision logic of systemd. If enabled and $WATCHDOG_USEC is set the event loop will ping the invoking systemd daemon right after coming back from epoll_wait() but not more often than $WATCHDOG_USEC/4. The epoll_wait() will sleep no longer than $WATCHDOG_USEC/4*3, to make sure the service manager is called in time. This means that setting WatchdogSec= in a .service file and calling sd_event_set_watchdog() in your daemon is enough to hook it up with the watchdog logic. --- Makefile.am | 8 +- TODO | 2 +- src/hostname/hostnamed.c | 2 + src/libsystemd-bus/libsystemd-bus.sym | 1 + src/libsystemd-bus/sd-event.c | 126 ++++++++++++++++++++++++-- src/libsystemd-bus/test-event.c | 2 + src/locale/localed.c | 2 + src/login/logind.c | 2 + src/machine/machined.c | 2 + src/network/networkd-manager.c | 2 + src/socket-proxy/socket-proxyd.c | 2 + src/systemd/sd-event.h | 1 + src/timedate/timedated.c | 2 + units/systemd-hostnamed.service.in | 1 + units/systemd-localed.service.in | 1 + units/systemd-logind.service.in | 1 + units/systemd-machined.service.in | 1 + units/systemd-networkd.service.in | 1 + units/systemd-timedated.service.in | 1 + 19 files changed, 150 insertions(+), 10 deletions(-) diff --git a/Makefile.am b/Makefile.am index 19da6eab9..9e4b1363c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -652,10 +652,12 @@ test_rtnl_SOURCES = \ test_rtnl_LDADD = \ libsystemd-rtnl.la \ libsystemd-bus-internal.la \ + libsystemd-daemon-internal.la \ libsystemd-id128-internal.la \ libsystemd-shared.la -tests += test-rtnl +tests += \ + test-rtnl # ------------------------------------------------------------------------------ noinst_LTLIBRARIES += \ @@ -3966,10 +3968,12 @@ test_network_LDADD = \ libudev-internal.la \ libsystemd-bus-internal.la \ libsystemd-id128-internal.la \ + libsystemd-daemon-internal.la \ libsystemd-rtnl.la \ libsystemd-shared.la -tests += test-network +tests += \ + test-network EXTRA_DIST += \ src/network/networkd-gperf.gperf \ diff --git a/TODO b/TODO index 8dc8f63a1..d909ab9d8 100644 --- a/TODO +++ b/TODO @@ -137,7 +137,6 @@ Features: but do not return anything up to the event loop caller. Instead add parameter to sd_event_request_quit() to take retval. This way errors rippling upwards are the option, not the default - - native support for watchdog stuff * in the final killing spree, detect processes from the root directory, and complain loudly if they have argv[0][0] == '@' set. @@ -311,6 +310,7 @@ Features: boot, and causes the journal to be moved back to /run on shutdown, so that we don't keep /var busy. This needs to happen synchronously, hence doing this via signals is not going to work. + - port to sd-event, enable watchdog from event loop * document: - document that deps in [Unit] sections ignore Alias= fileds in diff --git a/src/hostname/hostnamed.c b/src/hostname/hostnamed.c index f7ae50dcd..ece2b4167 100644 --- a/src/hostname/hostnamed.c +++ b/src/hostname/hostnamed.c @@ -627,6 +627,8 @@ int main(int argc, char *argv[]) { goto finish; } + sd_event_set_watchdog(event, true); + r = connect_bus(&context, event, &bus); if (r < 0) goto finish; diff --git a/src/libsystemd-bus/libsystemd-bus.sym b/src/libsystemd-bus/libsystemd-bus.sym index 7bc1ef9ad..4a849b382 100644 --- a/src/libsystemd-bus/libsystemd-bus.sym +++ b/src/libsystemd-bus/libsystemd-bus.sym @@ -238,6 +238,7 @@ global: sd_event_request_quit; sd_event_get_now_realtime; sd_event_get_now_monotonic; + sd_event_set_watchdog; sd_event_source_ref; sd_event_source_unref; diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c index eb0392300..9fceb7b13 100644 --- a/src/libsystemd-bus/sd-event.c +++ b/src/libsystemd-bus/sd-event.c @@ -24,6 +24,7 @@ #include #include "sd-id128.h" +#include "sd-daemon.h" #include "macro.h" #include "prioq.h" #include "hashmap.h" @@ -43,7 +44,8 @@ typedef enum EventSourceType { SOURCE_SIGNAL, SOURCE_CHILD, SOURCE_DEFER, - SOURCE_QUIT + SOURCE_QUIT, + SOURCE_WATCHDOG } EventSourceType; struct sd_event_source { @@ -105,6 +107,7 @@ struct sd_event { int signal_fd; int realtime_fd; int monotonic_fd; + int watchdog_fd; Prioq *pending; Prioq *prepare; @@ -139,9 +142,12 @@ struct sd_event { bool quit_requested:1; bool need_process_child:1; + bool watchdog:1; pid_t tid; sd_event **default_event_ptr; + + usec_t watchdog_last, watchdog_period; }; static int pending_prioq_compare(const void *a, const void *b) { @@ -323,6 +329,9 @@ static void event_free(sd_event *e) { if (e->monotonic_fd >= 0) close_nointr_nofail(e->monotonic_fd); + if (e->watchdog_fd >= 0) + close_nointr_nofail(e->watchdog_fd); + prioq_free(e->pending); prioq_free(e->prepare); prioq_free(e->monotonic_earliest); @@ -348,7 +357,7 @@ _public_ int sd_event_new(sd_event** ret) { return -ENOMEM; e->n_ref = 1; - e->signal_fd = e->realtime_fd = e->monotonic_fd = e->epoll_fd = -1; + e->signal_fd = e->realtime_fd = e->monotonic_fd = e->watchdog_fd = e->epoll_fd = -1; e->realtime_next = e->monotonic_next = (usec_t) -1; e->original_pid = getpid(); @@ -1422,8 +1431,8 @@ static int event_arm_timer( usec_t t; int r; - assert_se(e); - assert_se(next); + assert(e); + assert(next); a = prioq_peek(earliest); if (!a || a->enabled == SD_EVENT_OFF) { @@ -1462,7 +1471,7 @@ static int event_arm_timer( r = timerfd_settime(timer_fd, TFD_TIMER_ABSTIME, &its, NULL); if (r < 0) - return r; + return -errno; *next = t; return 0; @@ -1484,7 +1493,6 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) { assert(e); assert(fd >= 0); - assert(next); assert_return(events == EPOLLIN, -EIO); @@ -1499,7 +1507,8 @@ static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) { if (ss != sizeof(x)) return -EIO; - *next = (usec_t) -1; + if (next) + *next = (usec_t) -1; return 0; } @@ -1782,6 +1791,43 @@ static sd_event_source* event_next_pending(sd_event *e) { return p; } +static int arm_watchdog(sd_event *e) { + struct itimerspec its = {}; + usec_t t; + int r; + + assert(e); + assert(e->watchdog_fd >= 0); + + t = sleep_between(e, + e->watchdog_last + (e->watchdog_period / 2), + e->watchdog_last + (e->watchdog_period * 3 / 4)); + + timespec_store(&its.it_value, t); + + r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL); + if (r < 0) + return -errno; + + return 0; +} + +static int process_watchdog(sd_event *e) { + assert(e); + + if (!e->watchdog) + return 0; + + /* Don't notify watchdog too often */ + if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic) + return 0; + + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = e->timestamp.monotonic; + + return arm_watchdog(e); +} + _public_ int sd_event_run(sd_event *e, uint64_t timeout) { struct epoll_event ev_queue[EPOLL_QUEUE_MAX]; sd_event_source *p; @@ -1831,6 +1877,8 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) { r = flush_timer(e, e->realtime_fd, ev_queue[i].events, &e->realtime_next); else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_SIGNAL)) r = process_signal(e, ev_queue[i].events); + else if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG)) + r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL); else r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events); @@ -1838,6 +1886,10 @@ _public_ int sd_event_run(sd_event *e, uint64_t timeout) { goto finish; } + r = process_watchdog(e); + if (r < 0) + goto finish; + r = process_timer(e, e->timestamp.monotonic, e->monotonic_earliest, e->monotonic_latest); if (r < 0) goto finish; @@ -1970,3 +2022,63 @@ _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) { return -ENXIO; } + +_public_ int sd_event_set_watchdog(sd_event *e, int b) { + int r; + + assert_return(e, -EINVAL); + + if (e->watchdog == !!b) + return e->watchdog; + + if (b) { + struct epoll_event ev = {}; + const char *env; + + env = getenv("WATCHDOG_USEC"); + if (!env) + return false; + + r = safe_atou64(env, &e->watchdog_period); + if (r < 0) + return r; + if (e->watchdog_period <= 0) + return -EIO; + + /* Issue first ping immediately */ + sd_notify(false, "WATCHDOG=1"); + e->watchdog_last = now(CLOCK_MONOTONIC); + + e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC); + if (e->watchdog_fd < 0) + return -errno; + + r = arm_watchdog(e); + if (r < 0) + goto fail; + + ev.events = EPOLLIN; + ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG); + + r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev); + if (r < 0) { + r = -errno; + goto fail; + } + + } else { + if (e->watchdog_fd >= 0) { + epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL); + close_nointr_nofail(e->watchdog_fd); + e->watchdog_fd = -1; + } + } + + e->watchdog = !!b; + return e->watchdog; + +fail: + close_nointr_nofail(e->watchdog_fd); + e->watchdog_fd = -1; + return r; +} diff --git a/src/libsystemd-bus/test-event.c b/src/libsystemd-bus/test-event.c index 5360d8758..2b91eb0a7 100644 --- a/src/libsystemd-bus/test-event.c +++ b/src/libsystemd-bus/test-event.c @@ -165,6 +165,8 @@ int main(int argc, char *argv[]) { assert_se(sd_event_default(&e) >= 0); + assert_se(sd_event_set_watchdog(e, true) >= 0); + got_a = false, got_b = false, got_c = false, got_d = 0; /* Add a oneshot handler, trigger it, re-enable it, and trigger diff --git a/src/locale/localed.c b/src/locale/localed.c index abb610d33..7c41822a2 100644 --- a/src/locale/localed.c +++ b/src/locale/localed.c @@ -1137,6 +1137,8 @@ int main(int argc, char *argv[]) { goto finish; } + sd_event_set_watchdog(event, true); + r = connect_bus(&context, event, &bus); if (r < 0) goto finish; diff --git a/src/login/logind.c b/src/login/logind.c index 87d46ee60..5ce79b23c 100644 --- a/src/login/logind.c +++ b/src/login/logind.c @@ -96,6 +96,8 @@ Manager *manager_new(void) { return NULL; } + sd_event_set_watchdog(m->event, true); + return m; } diff --git a/src/machine/machined.c b/src/machine/machined.c index 35b33c301..d63960352 100644 --- a/src/machine/machined.c +++ b/src/machine/machined.c @@ -59,6 +59,8 @@ Manager *manager_new(void) { return NULL; } + sd_event_set_watchdog(m->event, true); + return m; } diff --git a/src/network/networkd-manager.c b/src/network/networkd-manager.c index c8ec23970..d59b9139a 100644 --- a/src/network/networkd-manager.c +++ b/src/network/networkd-manager.c @@ -35,6 +35,8 @@ int manager_new(Manager **ret) { if (r < 0) return r; + sd_event_set_watchdog(m->event, true); + r = sd_rtnl_open(RTMGRP_LINK | RTMGRP_IPV4_IFADDR, &m->rtnl); if (r < 0) return r; diff --git a/src/socket-proxy/socket-proxyd.c b/src/socket-proxy/socket-proxyd.c index 432558d19..c6f56be60 100644 --- a/src/socket-proxy/socket-proxyd.c +++ b/src/socket-proxy/socket-proxyd.c @@ -632,6 +632,8 @@ int main(int argc, char *argv[]) { goto finish; } + sd_event_set_watchdog(event, true); + n = sd_listen_fds(1); if (n < 0) { log_error("Failed to receive sockets from parent."); diff --git a/src/systemd/sd-event.h b/src/systemd/sd-event.h index 63b223c85..3551077f3 100644 --- a/src/systemd/sd-event.h +++ b/src/systemd/sd-event.h @@ -93,6 +93,7 @@ int sd_event_get_quit(sd_event *e); int sd_event_request_quit(sd_event *e); int sd_event_get_now_realtime(sd_event *e, uint64_t *usec); int sd_event_get_now_monotonic(sd_event *e, uint64_t *usec); +int sd_event_set_watchdog(sd_event *e, int b); sd_event_source* sd_event_source_ref(sd_event_source *s); sd_event_source* sd_event_source_unref(sd_event_source *s); diff --git a/src/timedate/timedated.c b/src/timedate/timedated.c index 6d4388c70..af2b0785c 100644 --- a/src/timedate/timedated.c +++ b/src/timedate/timedated.c @@ -836,6 +836,8 @@ int main(int argc, char *argv[]) { goto finish; } + sd_event_set_watchdog(event, true); + r = connect_bus(&context, event, &bus); if (r < 0) goto finish; diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in index 874f6c274..3f5ef75c0 100644 --- a/units/systemd-hostnamed.service.in +++ b/units/systemd-hostnamed.service.in @@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/hostnamed ExecStart=@rootlibexecdir@/systemd-hostnamed BusName=org.freedesktop.hostname1 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE +WatchdogSec=1min diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in index 6818a4c5c..1951123a0 100644 --- a/units/systemd-localed.service.in +++ b/units/systemd-localed.service.in @@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/localed ExecStart=@rootlibexecdir@/systemd-localed BusName=org.freedesktop.locale1 CapabilityBoundingSet= +WatchdogSec=1min diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in index 31b5cd011..90196681f 100644 --- a/units/systemd-logind.service.in +++ b/units/systemd-logind.service.in @@ -19,6 +19,7 @@ Restart=always RestartSec=0 BusName=org.freedesktop.login1 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG +WatchdogSec=1min # Increase the default a bit in order to allow many simultaneous # logins since we keep one fd open per session. diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in index 26bfe0353..a23dca92b 100644 --- a/units/systemd-machined.service.in +++ b/units/systemd-machined.service.in @@ -18,3 +18,4 @@ Restart=always RestartSec=0 BusName=org.freedesktop.machine1 CapabilityBoundingSet=CAP_KILL +WatchdogSec=1min diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in index 066d852e3..95205cdee 100644 --- a/units/systemd-networkd.service.in +++ b/units/systemd-networkd.service.in @@ -17,3 +17,4 @@ Type=notify Restart=always RestartSec=0 ExecStart=@rootlibexecdir@/systemd-networkd +WatchdogSec=1min diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in index dd3eb1b33..f7fb6577c 100644 --- a/units/systemd-timedated.service.in +++ b/units/systemd-timedated.service.in @@ -14,3 +14,4 @@ Documentation=http://www.freedesktop.org/wiki/Software/systemd/timedated ExecStart=@rootlibexecdir@/systemd-timedated BusName=org.freedesktop.timedate1 CapabilityBoundingSet=CAP_SYS_TIME +WatchdogSec=1min -- 2.30.2