From 09812eb764b440651f3ff4cb5d37bd343f800560 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 22 Dec 2013 22:14:05 +0100 Subject: [PATCH 1/1] sd-daemon: introduce sd_watchdog_enabled() for parsing $WATCHDOG_USEC Also, introduce a new environment variable named $WATCHDOG_PID which cotnains the PID of the process that is supposed to send the keep-alive events. This is similar how $LISTEN_FDS and $LISTEN_PID work together, and protects against confusing processes further down the process tree due to inherited environment. --- Makefile-man.am | 2 + man/sd-daemon.xml | 1 + man/sd_notify.xml | 17 +- man/sd_watchdog_enabled.xml | 198 ++++++++++++++++++++ src/core/execute.c | 18 +- src/core/execute.h | 1 + src/core/mount.c | 1 + src/core/service.c | 9 +- src/core/socket.c | 1 + src/core/swap.c | 1 + src/libsystemd-bus/sd-event.c | 11 +- src/libsystemd-daemon/libsystemd-daemon.sym | 5 + src/libsystemd-daemon/sd-daemon.c | 66 +++++++ src/systemd/sd-daemon.h | 21 ++- 14 files changed, 326 insertions(+), 26 deletions(-) create mode 100644 man/sd_watchdog_enabled.xml diff --git a/Makefile-man.am b/Makefile-man.am index 968de7da6..df8860a3f 100644 --- a/Makefile-man.am +++ b/Makefile-man.am @@ -41,6 +41,7 @@ MANPAGES += \ man/sd_journal_stream_fd.3 \ man/sd_listen_fds.3 \ man/sd_notify.3 \ + man/sd_watchdog_enabled.3 \ man/shutdown.8 \ man/sysctl.d.5 \ man/systemctl.1 \ @@ -1133,6 +1134,7 @@ EXTRA_DIST += \ man/sd_seat_get_active.xml \ man/sd_session_is_active.xml \ man/sd_uid_get_state.xml \ + man/sd_watchdog_enabled.xml \ man/shutdown.xml \ man/sysctl.d.xml \ man/systemctl.xml \ diff --git a/man/sd-daemon.xml b/man/sd-daemon.xml index 6e804e1a6..74011123b 100644 --- a/man/sd-daemon.xml +++ b/man/sd-daemon.xml @@ -167,6 +167,7 @@ sd_notify3, sd_booted3, sd_is_fifo3, + sd_watchdog_enabled3, daemon7, systemd.service5, systemd.socket5, diff --git a/man/sd_notify.xml b/man/sd_notify.xml index 55965ffce..683967cd4 100644 --- a/man/sd_notify.xml +++ b/man/sd_notify.xml @@ -164,11 +164,15 @@ systemd.service5 for details. It is recommended to send this message if the - WATCHDOG_USEC= - environment variable has been set for - the service process, in every half the - time interval that is specified in the - variable. + $WATCHDOG_PID + environment variable has been set to + the PID of the service process, in + every half the time interval that is + specified in the + $WATCHDOG_USEC + environment variable. See + sd_watchdog_enabled3 + for details. @@ -311,7 +315,8 @@ systemd1, sd-daemon3, daemon7, - systemd.service5 + systemd.service5, + sd_watchdog_enabled3 diff --git a/man/sd_watchdog_enabled.xml b/man/sd_watchdog_enabled.xml new file mode 100644 index 000000000..e42ae430e --- /dev/null +++ b/man/sd_watchdog_enabled.xml @@ -0,0 +1,198 @@ + + + + + + + + + sd_watchdog_enabled + systemd + + + + Developer + Lennart + Poettering + lennart@poettering.net + + + + + + sd_watchdog_enabled + 3 + + + + sd_watchdog_enabled + Check whether the service manager expects watchdog keep-alive notifications from a service + + + + + #include <systemd/sd-daemon.h> + + + int sd_watchdog_enabled + int unset_environment + const uint64_t *usec + + + + + + Description + sd_watchdog_enabled() may + be called by a service to detect whether the service + manager expects regular keep-alive watchdog + notification events from it, and the timeout after + which the manager will act on the service if it did + not get such a notification. + + If the unset_environment + parameter is non-zero, + sd_watchdog_enabled() will unset + the $WATCHDOG_USEC and + $WATCHDOG_PID environment variables + before returning (regardless whether the function call + itself succeeded or not). Further calls to + sd_watchdog_enabled() will then + return with zero, but the variable is no longer + inherited by child processes. + + If the usec parameter is + non-NULL sd_watchdog_enabled() + will return the timeout in µs for the watchdog + logic. The service manager will usually terminate a + service when it did not get a notification message + within the specified time after startup and after each + previous message. It is recommended that a daemon + sends a keep-alive notification message to the service + manager every half of the time returned + here. Notification messages may be sent with + sd_notify3 + with a message string of + WATCHDOG=1. + + To enable service supervision with the watchdog + logic use WatchdogSec= in service + files. See + systemd.service5 + for details. + + + + Return Value + + On failure, this call returns a negative + errno-style error code. If the service manager expects + watchdog keep-alive notification messages to be sent, + > 0 is returned, otherwise 0 is returned. Only if + the return value is > 0 the + usec parameter is valid after + the call. + + + + Notes + + This function is provided by the reference + implementation of APIs for new-style daemons and + distributed with the systemd package. The algorithm + it implements is simple, and can easily be + reimplemented in daemons if it is important to support + this interface without using the reference + implementation. + + Internally, this functions parses the + $WATCHDOG_PID and + $WATCHDOG_USEC environment + variable. The call will ignore these variables if + $WATCHDOG_PID does containe the PID + of the current process, under the assumption that in + that case the variables were set for a different + process further up the process tree. + + For details about the algorithm check the + liberally licensed reference implementation sources: + + and + + sd_watchdog_enabled() is + implemented in the reference implementation's + sd-daemon.c and + sd-daemon.h files. These + interfaces are available as shared library, which can + be compiled and linked to with the + libsystemd-daemon pkg-config1 + file. Alternatively, applications consuming these APIs + may copy the implementation into their source + tree. For more details about the reference + implementation see + sd-daemon3. + + If the reference implementation is used as + drop-in files and -DDISABLE_SYSTEMD is set during + compilation, these functions will always return 0 and + otherwise become a NOP. + + + + Environment + + + + $WATCHDOG_PID + + Set by the system + manager for supervised process for + which watchdog support is enabled, and + contains the PID of that process. See + above for details. + + + + $WATCHDOG_USEC + + Set by the system + manager for supervised process for + which watchdog support is enabled, and + contains the watchdog timeout in µs + See above for + details. + + + + + + See Also + + systemd1, + sd-daemon3, + daemon7, + systemd.service5, + sd_notify3 + + + + diff --git a/src/core/execute.c b/src/core/execute.c index 426571789..9b33ec0ec 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1020,6 +1020,7 @@ static void do_idle_pipe_dance(int idle_pipe[4]) { static int build_environment( ExecContext *c, unsigned n_fds, + usec_t watchdog_usec, const char *home, const char *username, const char *shell, @@ -1032,7 +1033,7 @@ static int build_environment( assert(c); assert(ret); - our_env = new(char*, 8); + our_env = new0(char*, 10); if (!our_env) return -ENOMEM; @@ -1046,6 +1047,16 @@ static int build_environment( our_env[n_env++] = x; } + if (watchdog_usec > 0) { + if (asprintf(&x, "WATCHDOG_PID=%lu", (unsigned long) getpid()) < 0) + return -ENOMEM; + our_env[n_env++] = x; + + if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0) + return -ENOMEM; + our_env[n_env++] = x; + } + if (home) { x = strappend("HOME=", home); if (!x) @@ -1084,7 +1095,7 @@ static int build_environment( } our_env[n_env++] = NULL; - assert(n_env <= 8); + assert(n_env <= 10); *ret = our_env; our_env = NULL; @@ -1104,6 +1115,7 @@ int exec_spawn(ExecCommand *command, CGroupControllerMask cgroup_supported, const char *cgroup_path, const char *unit_id, + usec_t watchdog_usec, int idle_pipe[4], ExecRuntime *runtime, pid_t *ret) { @@ -1560,7 +1572,7 @@ int exec_spawn(ExecCommand *command, } } - err = build_environment(context, n_fds, home, username, shell, &our_env); + err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env); if (r < 0) { r = EXIT_MEMORY; goto fail_child; diff --git a/src/core/execute.h b/src/core/execute.h index bd3db0b63..989373f48 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -181,6 +181,7 @@ int exec_spawn(ExecCommand *command, CGroupControllerMask cgroup_mask, const char *cgroup_path, const char *unit_id, + usec_t watchdog_usec, int pipe_fd[2], ExecRuntime *runtime, pid_t *ret); diff --git a/src/core/mount.c b/src/core/mount.c index 2b7934e40..09efa1b6e 100644 --- a/src/core/mount.c +++ b/src/core/mount.c @@ -787,6 +787,7 @@ static int mount_spawn(Mount *m, ExecCommand *c, pid_t *_pid) { UNIT(m)->manager->cgroup_supported, UNIT(m)->cgroup_path, UNIT(m)->id, + 0, NULL, m->exec_runtime, &pid); diff --git a/src/core/service.c b/src/core/service.c index 87eaa2937..4eb3d9e66 100644 --- a/src/core/service.c +++ b/src/core/service.c @@ -1750,7 +1750,7 @@ static int service_spawn( if (r < 0) goto fail; - our_env = new0(char*, 5); + our_env = new0(char*, 4); if (!our_env) { r = -ENOMEM; goto fail; @@ -1768,12 +1768,6 @@ static int service_spawn( goto fail; } - if (s->watchdog_usec > 0) - if (asprintf(our_env + n_env++, "WATCHDOG_USEC=%llu", (unsigned long long) s->watchdog_usec) < 0) { - r = -ENOMEM; - goto fail; - } - if (UNIT(s)->manager->running_as != SYSTEMD_SYSTEM) if (asprintf(our_env + n_env++, "MANAGERPID=%lu", (unsigned long) getpid()) < 0) { r = -ENOMEM; @@ -1804,6 +1798,7 @@ static int service_spawn( UNIT(s)->manager->cgroup_supported, path, UNIT(s)->id, + s->watchdog_usec, s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL, s->exec_runtime, &pid); diff --git a/src/core/socket.c b/src/core/socket.c index 244222135..31fc2a252 100644 --- a/src/core/socket.c +++ b/src/core/socket.c @@ -1254,6 +1254,7 @@ static int socket_spawn(Socket *s, ExecCommand *c, pid_t *_pid) { UNIT(s)->manager->cgroup_supported, UNIT(s)->cgroup_path, UNIT(s)->id, + 0, NULL, s->exec_runtime, &pid); diff --git a/src/core/swap.c b/src/core/swap.c index 79862e5b8..e0627db96 100644 --- a/src/core/swap.c +++ b/src/core/swap.c @@ -645,6 +645,7 @@ static int swap_spawn(Swap *s, ExecCommand *c, pid_t *_pid) { UNIT(s)->manager->cgroup_supported, UNIT(s)->cgroup_path, UNIT(s)->id, + 0, NULL, s->exec_runtime, &pid); diff --git a/src/libsystemd-bus/sd-event.c b/src/libsystemd-bus/sd-event.c index a1baac52a..bfc798ca6 100644 --- a/src/libsystemd-bus/sd-event.c +++ b/src/libsystemd-bus/sd-event.c @@ -2164,17 +2164,10 @@ _public_ int sd_event_set_watchdog(sd_event *e, int b) { if (b) { struct epoll_event ev = {}; - const char *env; - env = getenv("WATCHDOG_USEC"); - if (!env) - return false; - - r = safe_atou64(env, &e->watchdog_period); - if (r < 0) + r = sd_watchdog_enabled(false, &e->watchdog_period); + if (r <= 0) return r; - if (e->watchdog_period <= 0) - return -EIO; /* Issue first ping immediately */ sd_notify(false, "WATCHDOG=1"); diff --git a/src/libsystemd-daemon/libsystemd-daemon.sym b/src/libsystemd-daemon/libsystemd-daemon.sym index f44023893..aa9be51c6 100644 --- a/src/libsystemd-daemon/libsystemd-daemon.sym +++ b/src/libsystemd-daemon/libsystemd-daemon.sym @@ -25,3 +25,8 @@ global: local: *; }; + +LIBSYSTEMD_DAEMON_209 { +global: + sd_watchdog_enabled; +} LIBSYSTEMD_DAEMON_31; diff --git a/src/libsystemd-daemon/sd-daemon.c b/src/libsystemd-daemon/sd-daemon.c index 485b30102..94230c9ed 100644 --- a/src/libsystemd-daemon/sd-daemon.c +++ b/src/libsystemd-daemon/sd-daemon.c @@ -518,3 +518,69 @@ _sd_export_ int sd_booted(void) { return !!S_ISDIR(st.st_mode); #endif } + +_sd_export_ int sd_watchdog_enabled(int unset_environment, uint64_t *usec) { + +#if defined(DISABLE_SYSTEMD) || !defined(__linux__) + return 0; +#else + unsigned long long ll; + unsigned long l; + const char *e; + char *p = NULL; + int r; + + e = getenv("WATCHDOG_PID"); + if (!e) { + r = 0; + goto finish; + } + + errno = 0; + l = strtoul(e, &p, 10); + if (errno > 0) { + r = -errno; + goto finish; + } + if (!p || p == e || *p || l <= 0) { + r = -EINVAL; + goto finish; + } + + /* Is this for us? */ + if (getpid() != (pid_t) l) { + r = 0; + goto finish; + } + + e = getenv("WATCHDOG_USEC"); + if (!e) { + r = -EINVAL; + goto finish; + } + + errno = 0; + ll = strtoull(e, &p, 10); + if (errno > 0) { + r = -errno; + goto finish; + } + if (!p || p == e || *p || l <= 0) { + r = -EINVAL; + goto finish; + } + + if (usec) + *usec = ll; + + r = 1; + +finish: + if (unset_environment) { + unsetenv("WATCHDOG_PID"); + unsetenv("WATCHDOG_USEC"); + } + + return r; +#endif +} diff --git a/src/systemd/sd-daemon.h b/src/systemd/sd-daemon.h index daa3f4c85..43deb8cc7 100644 --- a/src/systemd/sd-daemon.h +++ b/src/systemd/sd-daemon.h @@ -186,6 +186,8 @@ int sd_is_socket_unix(int fd, int type, int listening, const char *path, size_t the file descriptor is a POSIX Message Queue of the specified name, 0 otherwise. If path is NULL a message queue name check is not done. Returns a negative errno style error code on failure. + + See sd_is_mq(3) for more information. */ int sd_is_mq(int fd, const char *path); @@ -220,7 +222,8 @@ int sd_is_mq(int fd, const char *path); WATCHDOG=1 Tells systemd to update the watchdog timestamp. Services using this feature should do this in regular intervals. A watchdog framework can use the - timestamps to detect failed services. + timestamps to detect failed services. Also see + sd_watchdog_enabled() below. Daemons can choose to send additional variables. However, it is recommended to prefix variable names not listed above with X_. @@ -275,6 +278,22 @@ int sd_notifyf(int unset_environment, const char *format, ...) _sd_printf_attr_( */ int sd_booted(void); +/* + Returns > 0 if the service manager expects watchdog keep-alive + events to be sent regularly via sd_notify(0, "WATCHDOG=1"). Returns + 0 if it does not expect this. If the usec argument is non-NULL + returns the watchdog timeout in µs after which the service manager + will act on a process that has not sent a watchdog keep alive + message. This function is useful to implement services that + recognize automatically if they are being run under supervision of + systemd with WatchdogSec= set. It is recommended for clients to + generate keep-alive pings via sd_notify(0, "WATCHDOG=1") every half + of the returned time. + + See sd_watchdog_enabled(3) for more information. +*/ +int sd_watchdog_enabled(int unset_environment, uint64_t *usec); + #ifdef __cplusplus } #endif -- 2.30.2