chiark / gitweb /
service: process watchdog timeouts with lowest priority
[elogind.git] / src / core / service.c
index e408338b10de04adca84fe22b1e4069cc5274256..3b3f9563808a43b78035e53568cdc7c07b852975 100644 (file)
@@ -123,6 +123,8 @@ static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events,
 static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void *userdata);
 
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
+
 static void service_init(Unit *u) {
         Service *s = SERVICE(u);
 
@@ -242,27 +244,17 @@ static void service_stop_watchdog(Service *s) {
         assert(s);
 
         s->watchdog_event_source = sd_event_source_unref(s->watchdog_event_source);
-        s->watchdog_timestamp = (struct dual_timestamp) { 0, 0 };
+        s->watchdog_timestamp = DUAL_TIMESTAMP_NULL;
 }
 
-static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
-
-static void service_handle_watchdog(Service *s) {
-        usec_t nw;
+static void service_start_watchdog(Service *s) {
         int r;
 
         assert(s);
 
-        if (s->watchdog_usec == 0)
+        if (s->watchdog_usec <= 0)
                 return;
 
-        nw = now(CLOCK_MONOTONIC);
-        if (nw >=  s->watchdog_timestamp.monotonic + s->watchdog_usec) {
-                log_error_unit(UNIT(s)->id, "%s watchdog timeout!", UNIT(s)->id);
-                service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_WATCHDOG);
-                return;
-        }
-
         if (s->watchdog_event_source) {
                 r = sd_event_source_set_time(s->watchdog_event_source, s->watchdog_timestamp.monotonic + s->watchdog_usec);
                 if (r < 0) {
@@ -270,21 +262,28 @@ static void service_handle_watchdog(Service *s) {
                         return;
                 }
 
-                r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
-        } else
+                r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ONESHOT);
+        } else {
                 r = sd_event_add_monotonic(UNIT(s)->manager->event, s->watchdog_timestamp.monotonic + s->watchdog_usec, 0, service_dispatch_watchdog, s, &s->watchdog_event_source);
+                if (r < 0) {
+                        log_warning_unit(UNIT(s)->id, "%s failed to add watchdog timer: %s", UNIT(s)->id, strerror(-r));
+                        return;
+                }
+
+                /* Let's process everything else which might be a sign
+                 * of living before we consider a service died. */
+                r = sd_event_source_set_priority(s->watchdog_event_source, SD_EVENT_PRIORITY_IDLE);
+        }
 
         if (r < 0)
-                log_warning_unit(UNIT(s)->id,
-                                 "%s failed to install watchdog timer: %s",
-                                 UNIT(s)->id, strerror(-r));
+                log_warning_unit(UNIT(s)->id, "%s failed to install watchdog timer: %s", UNIT(s)->id, strerror(-r));
 }
 
 static void service_reset_watchdog(Service *s) {
         assert(s);
 
         dual_timestamp_get(&s->watchdog_timestamp);
-        service_handle_watchdog(s);
+        service_start_watchdog(s);
 }
 
 static void service_done(Unit *u) {
@@ -304,7 +303,8 @@ static void service_done(Unit *u) {
         s->status_text = NULL;
 
         cgroup_context_done(&s->cgroup_context);
-        exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager));
+        exec_context_done(&s->exec_context);
+        s->exec_runtime = exec_runtime_unref(s->exec_runtime);
         exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
         s->control_command = NULL;
         s->main_command = NULL;
@@ -789,7 +789,7 @@ static int service_load_sysv_path(Service *s, const char *path) {
                                                 goto finish;
                                         }
 
-                                        r = sysv_translate_facility(n, path_get_file_name(path), &m);
+                                        r = sysv_translate_facility(n, basename(path), &m);
                                         free(n);
 
                                         if (r < 0)
@@ -841,7 +841,7 @@ static int service_load_sysv_path(Service *s, const char *path) {
                                                 goto finish;
                                         }
 
-                                        r = sysv_translate_facility(n, path_get_file_name(path), &m);
+                                        r = sysv_translate_facility(n, basename(path), &m);
                                         if (r < 0) {
                                                 log_error_unit(u->id,
                                                                "[%s:%u] Failed to translate LSB dependency %s, ignoring: %s",
@@ -1476,6 +1476,7 @@ static int service_search_main_pid(Service *s) {
 static void service_set_state(Service *s, ServiceState state) {
         ServiceState old_state;
         const UnitActiveState *table;
+
         assert(s);
 
         table = s->type == SERVICE_IDLE ? state_translation_table_idle : state_translation_table;
@@ -1485,62 +1486,43 @@ static void service_set_state(Service *s, ServiceState state) {
 
         service_unwatch_pid_file(s);
 
-        if (state != SERVICE_START_PRE &&
-            state != SERVICE_START &&
-            state != SERVICE_START_POST &&
-            state != SERVICE_RELOAD &&
-            state != SERVICE_STOP &&
-            state != SERVICE_STOP_SIGTERM &&
-            state != SERVICE_STOP_SIGKILL &&
-            state != SERVICE_STOP_POST &&
-            state != SERVICE_FINAL_SIGTERM &&
-            state != SERVICE_FINAL_SIGKILL &&
-            state != SERVICE_AUTO_RESTART)
+        if (!IN_SET(state,
+                    SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+                    SERVICE_RELOAD,
+                    SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL,
+                    SERVICE_AUTO_RESTART))
                 s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 
-        if (state != SERVICE_START &&
-            state != SERVICE_START_POST &&
-            state != SERVICE_RUNNING &&
-            state != SERVICE_RELOAD &&
-            state != SERVICE_STOP &&
-            state != SERVICE_STOP_SIGTERM &&
-            state != SERVICE_STOP_SIGKILL) {
+        if (!IN_SET(state,
+                    SERVICE_START, SERVICE_START_POST,
+                    SERVICE_RUNNING, SERVICE_RELOAD,
+                    SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL)) {
                 service_unwatch_main_pid(s);
                 s->main_command = NULL;
         }
 
-        if (state != SERVICE_START_PRE &&
-            state != SERVICE_START &&
-            state != SERVICE_START_POST &&
-            state != SERVICE_RELOAD &&
-            state != SERVICE_STOP &&
-            state != SERVICE_STOP_SIGTERM &&
-            state != SERVICE_STOP_SIGKILL &&
-            state != SERVICE_STOP_POST &&
-            state != SERVICE_FINAL_SIGTERM &&
-            state != SERVICE_FINAL_SIGKILL) {
+        if (!IN_SET(state,
+                    SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+                    SERVICE_RELOAD,
+                    SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL)) {
                 service_unwatch_control_pid(s);
                 s->control_command = NULL;
                 s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
         }
 
-        if (state != SERVICE_START_PRE &&
-            state != SERVICE_START &&
-            state != SERVICE_START_POST &&
-            state != SERVICE_RUNNING &&
-            state != SERVICE_RELOAD &&
-            state != SERVICE_STOP &&
-            state != SERVICE_STOP_SIGTERM &&
-            state != SERVICE_STOP_SIGKILL &&
-            state != SERVICE_STOP_POST &&
-            state != SERVICE_FINAL_SIGTERM &&
-            state != SERVICE_FINAL_SIGKILL &&
+        if (!IN_SET(state,
+                    SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
+                    SERVICE_RUNNING, SERVICE_RELOAD,
+                    SERVICE_STOP, SERVICE_STOP_SIGTERM, SERVICE_STOP_SIGKILL, SERVICE_STOP_POST,
+                    SERVICE_FINAL_SIGTERM, SERVICE_FINAL_SIGKILL) &&
             !(state == SERVICE_DEAD && UNIT(s)->job)) {
                 service_close_socket_fd(s);
                 service_connection_unref(s);
         }
 
-        if (state == SERVICE_STOP || state == SERVICE_STOP_SIGTERM)
+        if (!IN_SET(state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
                 service_stop_watchdog(s);
 
         /* For the inactive states unit_notify() will trim the cgroup,
@@ -1565,10 +1547,7 @@ static void service_set_state(Service *s, ServiceState state) {
         }
 
         if (old_state != state)
-                log_debug_unit(UNIT(s)->id,
-                               "%s changed %s -> %s", UNIT(s)->id,
-                               service_state_to_string(old_state),
-                               service_state_to_string(state));
+                log_debug_unit(UNIT(s)->id, "%s changed %s -> %s", UNIT(s)->id, service_state_to_string(old_state), service_state_to_string(state));
 
         unit_notify(UNIT(s), table[old_state], table[state], s->reload_result == SERVICE_SUCCESS);
         s->reload_result = SERVICE_SUCCESS;
@@ -1649,9 +1628,8 @@ static int service_coldplug(Unit *u) {
                                         return r;
                         }
 
-                if (s->deserialized_state == SERVICE_START_POST ||
-                    s->deserialized_state == SERVICE_RUNNING)
-                        service_handle_watchdog(s);
+                if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
+                        service_start_watchdog(s);
 
                 service_set_state(s, s->deserialized_state);
         }
@@ -1751,6 +1729,10 @@ static int service_spawn(
 
         unit_realize_cgroup(UNIT(s));
 
+        r = unit_setup_exec_runtime(UNIT(s));
+        if (r < 0)
+                goto fail;
+
         if (pass_fds ||
             s->exec_context.std_input == EXEC_INPUT_SOCKET ||
             s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
@@ -1834,6 +1816,7 @@ static int service_spawn(
                        path,
                        UNIT(s)->id,
                        s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL,
+                       s->exec_runtime,
                        &pid);
         if (r < 0)
                 goto fail;
@@ -1932,7 +1915,8 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         s->forbid_restart = false;
 
         /* we want fresh tmpdirs in case service is started again immediately */
-        exec_context_tmp_dirs_done(&s->exec_context);
+        exec_runtime_destroy(s->exec_runtime);
+        s->exec_runtime = exec_runtime_unref(s->exec_runtime);
 
         /* Try to delete the pid file. At this point it will be
          * out-of-date, and some software might be confused by it, so
@@ -2095,9 +2079,7 @@ static void service_enter_start_post(Service *s) {
         assert(s);
 
         service_unwatch_control_pid(s);
-
-        if (s->watchdog_usec > 0)
-                service_reset_watchdog(s);
+        service_reset_watchdog(s);
 
         s->control_command = s->exec_command[SERVICE_EXEC_START_POST];
         if (s->control_command) {
@@ -2639,14 +2621,7 @@ static int service_serialize(Unit *u, FILE *f, FDSet *fds) {
                 }
         }
         if (dual_timestamp_is_set(&s->watchdog_timestamp))
-                dual_timestamp_serialize(f, "watchdog-timestamp",
-                                         &s->watchdog_timestamp);
-
-        if (s->exec_context.tmp_dir)
-                unit_serialize_item(u, f, "tmp-dir", s->exec_context.tmp_dir);
-
-        if (s->exec_context.var_tmp_dir)
-                unit_serialize_item(u, f, "var-tmp-dir", s->exec_context.var_tmp_dir);
+                dual_timestamp_serialize(f, "watchdog-timestamp", &s->watchdog_timestamp);
 
         if (s->forbid_restart)
                 unit_serialize_item(u, f, "forbid-restart", yes_no(s->forbid_restart));
@@ -2771,23 +2746,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
                 dual_timestamp_deserialize(value, &s->main_exec_status.exit_timestamp);
         else if (streq(key, "watchdog-timestamp"))
                 dual_timestamp_deserialize(value, &s->watchdog_timestamp);
-        else if (streq(key, "tmp-dir")) {
-                char *t;
-
-                t = strdup(value);
-                if (!t)
-                        return log_oom();
-
-                s->exec_context.tmp_dir = t;
-        } else if (streq(key, "var-tmp-dir")) {
-                char *t;
-
-                t = strdup(value);
-                if (!t)
-                        return log_oom();
-
-                s->exec_context.var_tmp_dir = t;
-        } else if (streq(key, "forbid-restart")) {
+        else if (streq(key, "forbid-restart")) {
                 int b;
 
                 b = parse_boolean(value);
@@ -2842,7 +2801,7 @@ _pure_ static bool service_check_snapshot(Unit *u) {
 
         assert(s);
 
-        return !s->got_socket_fd;
+        return (s->socket_fd < 0);
 }
 
 static int service_retry_pid_file(Service *s) {
@@ -2916,7 +2875,12 @@ static int service_demand_pid_file(Service *s) {
 }
 
 static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events, void *userdata) {
-        Service *s = SERVICE(userdata);
+        PathSpec *p = userdata;
+        Service *s;
+
+        assert(p);
+
+        s = SERVICE(p->unit);
 
         assert(s);
         assert(fd >= 0);
@@ -2926,7 +2890,7 @@ static int service_dispatch_io(sd_event_source *source, int fd, uint32_t events,
 
         log_debug_unit(UNIT(s)->id, "inotify event for %s", UNIT(s)->id);
 
-        if (path_spec_fd_event(s->pid_file_pathspec, events) < 0)
+        if (path_spec_fd_event(p, events) < 0)
                 goto fail;
 
         if (service_retry_pid_file(s) == 0)
@@ -3315,7 +3279,9 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
         assert(s);
         assert(source == s->watchdog_event_source);
 
-        service_handle_watchdog(s);
+        log_error_unit(UNIT(s)->id, "%s watchdog timeout!", UNIT(s)->id);
+        service_enter_signal(s, SERVICE_STOP_SIGTERM, SERVICE_FAILURE_WATCHDOG);
+
         return 0;
 }
 
@@ -3456,11 +3422,10 @@ static void service_notify_message(Unit *u, pid_t pid, char **tags) {
                 }
 
         }
+
         if (strv_find(tags, "WATCHDOG=1")) {
-                log_debug_unit(u->id,
-                               "%s: got WATCHDOG=1", u->id);
-                if (dual_timestamp_is_set(&s->watchdog_timestamp))
-                        service_reset_watchdog(s);
+                log_debug_unit(u->id, "%s: got WATCHDOG=1", u->id);
+                service_reset_watchdog(s);
         }
 
         /* Notify clients about changed status or main pid */
@@ -3502,7 +3467,7 @@ static int service_enumerate(Manager *m) {
                         d = opendir(path);
                         if (!d) {
                                 if (errno != ENOENT)
-                                        log_warning("opendir(%s) failed: %s", path, strerror(errno));
+                                        log_warning("opendir(%s) failed: %m", path);
 
                                 continue;
                         }
@@ -3535,7 +3500,7 @@ static int service_enumerate(Manager *m) {
                                 if (access(fpath, X_OK) < 0) {
 
                                         if (errno != ENOENT)
-                                                log_warning("access() failed on %s: %s", fpath, strerror(errno));
+                                                log_warning("access() failed on %s: %m", fpath);
 
                                         continue;
                                 }
@@ -3683,11 +3648,14 @@ static void service_bus_name_owner_change(
                     s->state == SERVICE_RUNNING ||
                     s->state == SERVICE_RELOAD)) {
 
+                _cleanup_bus_creds_unref_ sd_bus_creds *creds = NULL;
                 pid_t pid;
 
                 /* Try to acquire PID from bus service */
 
-                r = sd_bus_get_owner_pid(u->manager->api_bus, name, &pid);
+                r = sd_bus_get_owner(u->manager->api_bus, name, SD_BUS_CREDS_PID, &creds);
+                if (r >= 0)
+                        r = sd_bus_creds_get_pid(creds, &pid);
                 if (r >= 0) {
                         log_debug_unit(u->id, "%s's D-Bus name %s is now owned by process %u", u->id, name, (unsigned) pid);
 
@@ -3716,7 +3684,6 @@ int service_set_socket_fd(Service *s, int fd, Socket *sock) {
                 return -EAGAIN;
 
         s->socket_fd = fd;
-        s->got_socket_fd = true;
 
         unit_ref_set(&s->accept_socket, UNIT(sock));
 
@@ -3830,6 +3797,7 @@ const UnitVTable service_vtable = {
         .exec_context_offset = offsetof(Service, exec_context),
         .cgroup_context_offset = offsetof(Service, cgroup_context),
         .kill_context_offset = offsetof(Service, kill_context),
+        .exec_runtime_offset = offsetof(Service, exec_runtime),
 
         .sections =
                 "Unit\0"