chiark / gitweb /
service: Implement 'on-watchdog' restart option
[elogind.git] / src / core / service.c
index 5803f798e0f702ca463d77029cec4d75585d25cb..08b929e4fe7af4b5ff4763c7177b9e13ef9f0460 100644 (file)
@@ -141,6 +141,7 @@ static void service_init(Unit *u) {
 
         exec_context_init(&s->exec_context);
         kill_context_init(&s->kill_context);
+        cgroup_context_init(&s->cgroup_context);
 
         RATELIMIT_INIT(s->start_limit, 10*USEC_PER_SEC, 5);
 
@@ -190,6 +191,8 @@ static int service_set_main_pid(Service *s, pid_t pid) {
         if (pid == getpid())
                 return -EINVAL;
 
+        service_unwatch_main_pid(s);
+
         s->main_pid = pid;
         s->main_pid_known = true;
 
@@ -220,7 +223,7 @@ static void service_close_socket_fd(Service *s) {
 static void service_connection_unref(Service *s) {
         assert(s);
 
-        if (!UNIT_DEREF(s->accept_socket))
+        if (!UNIT_ISSET(s->accept_socket))
                 return;
 
         socket_connection_unref(SOCKET(UNIT_DEREF(s->accept_socket)));
@@ -235,7 +238,7 @@ static void service_stop_watchdog(Service *s) {
         s->watchdog_timestamp.monotonic = 0;
 }
 
-static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart);
+static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
 
 static void service_handle_watchdog(Service *s) {
         usec_t offset;
@@ -249,7 +252,7 @@ static void service_handle_watchdog(Service *s) {
         offset = now(CLOCK_MONOTONIC) - s->watchdog_timestamp.monotonic;
         if (offset >= s->watchdog_usec) {
                 log_error_unit(UNIT(s)->id, "%s watchdog timeout!", UNIT(s)->id);
-                service_enter_dead(s, SERVICE_FAILURE_WATCHDOG, true);
+                service_enter_signal(s, SERVICE_STOP_SIGKILL, SERVICE_FAILURE_WATCHDOG);
                 return;
         }
 
@@ -283,6 +286,7 @@ static void service_done(Unit *u) {
         free(s->status_text);
         s->status_text = NULL;
 
+        cgroup_context_done(&s->cgroup_context);
         exec_context_done(&s->exec_context, manager_is_reloading_or_reexecuting(u->manager));
         exec_command_free_array(s->exec_command, _SERVICE_EXEC_COMMAND_MAX);
         s->control_command = NULL;
@@ -981,7 +985,7 @@ static int service_load_sysv_name(Service *s, const char *name) {
         assert(s);
         assert(name);
 
-       /* For SysV services we strip the *.sh suffixes. */
+        /* For SysV services we strip the *.sh suffixes. */
         if (endswith(name, ".sh.service"))
                 return -ENOENT;
 
@@ -1109,6 +1113,12 @@ static int service_verify(Service *s) {
                 return -EINVAL;
         }
 
+        if (s->type == SERVICE_ONESHOT && s->restart != SERVICE_RESTART_NO) {
+                log_error_unit(UNIT(s)->id,
+                                "%s has Restart setting other than no, which isn't allowed for Type=oneshot services. Refusing.", UNIT(s)->id);
+                return -EINVAL;
+        }
+
         if (s->type == SERVICE_DBUS && !s->bus_name) {
                 log_error_unit(UNIT(s)->id,
                                "%s is of type D-Bus but no D-Bus service name has been specified. Refusing.", UNIT(s)->id);
@@ -1191,27 +1201,32 @@ static int service_load(Unit *u) {
         assert(s);
 
         /* Load a .service file */
-        if ((r = unit_load_fragment(u)) < 0)
+        r = unit_load_fragment(u);
+        if (r < 0)
                 return r;
 
 #ifdef HAVE_SYSV_COMPAT
         /* Load a classic init script as a fallback, if we couldn't find anything */
-        if (u->load_state == UNIT_STUB)
-                if ((r = service_load_sysv(s)) < 0)
+        if (u->load_state == UNIT_STUB) {
+                r = service_load_sysv(s);
+                if (r < 0)
                         return r;
+        }
 #endif
 
         /* Still nothing found? Then let's give up */
         if (u->load_state == UNIT_STUB)
                 return -ENOENT;
 
-        /* We were able to load something, then let's add in the
-         * dropin directories. */
-        if ((r = unit_load_dropin(unit_follow_merge(u))) < 0)
-                return r;
-
         /* This is a new unit? Then let's add in some extras */
         if (u->load_state == UNIT_LOADED) {
+
+                /* We were able to load something, then let's add in
+                 * the dropin directories. */
+                r = unit_load_dropin(u);
+                if (r < 0)
+                        return r;
+
                 if (s->type == _SERVICE_TYPE_INVALID)
                         s->type = s->bus_name ? SERVICE_DBUS : SERVICE_SIMPLE;
 
@@ -1225,7 +1240,7 @@ static int service_load(Unit *u) {
                 if (r < 0)
                         return r;
 
-                r = unit_add_default_cgroups(u);
+                r = unit_add_default_slice(u);
                 if (r < 0)
                         return r;
 
@@ -1453,7 +1468,7 @@ static int service_search_main_pid(Service *s) {
 
         assert(s->main_pid <= 0);
 
-        pid = cgroup_bonding_search_main_pid_list(UNIT(s)->cgroup_bondings);
+        pid = unit_search_main_pid(UNIT(s));
         if (pid <= 0)
                 return -ENOENT;
 
@@ -1578,7 +1593,7 @@ static void service_set_state(Service *s, ServiceState state) {
         /* For the inactive states unit_notify() will trim the cgroup,
          * but for exit we have to do that ourselves... */
         if (state == SERVICE_EXITED && UNIT(s)->manager->n_reloading <= 0)
-                cgroup_bonding_trim_list(UNIT(s)->cgroup_bondings, true);
+                unit_destroy_cgroup(UNIT(s));
 
         if (old_state != state)
                 log_debug_unit(UNIT(s)->id,
@@ -1610,6 +1625,7 @@ static int service_coldplug(Unit *u) {
                     s->deserialized_state == SERVICE_FINAL_SIGTERM ||
                     s->deserialized_state == SERVICE_FINAL_SIGKILL ||
                     s->deserialized_state == SERVICE_AUTO_RESTART) {
+
                         if (s->deserialized_state == SERVICE_AUTO_RESTART || s->timeout_start_usec > 0) {
                                 usec_t k;
 
@@ -1747,11 +1763,14 @@ static int service_spawn(
         unsigned n_fds = 0, n_env = 0;
         _cleanup_strv_free_ char
                 **argv = NULL, **final_env = NULL, **our_env = NULL;
+        const char *path;
 
         assert(s);
         assert(c);
         assert(_pid);
 
+        unit_realize_cgroup(UNIT(s));
+
         if (pass_fds ||
             s->exec_context.std_input == EXEC_INPUT_SOCKET ||
             s->exec_context.std_output == EXEC_OUTPUT_SOCKET ||
@@ -1807,7 +1826,7 @@ static int service_spawn(
                         goto fail;
                 }
 
-        if (s->meta.manager->running_as != SYSTEMD_SYSTEM)
+        if (UNIT(s)->manager->running_as != SYSTEMD_SYSTEM)
                 if (asprintf(our_env + n_env++, "MANAGERPID=%lu", (unsigned long) getpid()) < 0) {
                         r = -ENOMEM;
                         goto fail;
@@ -1819,6 +1838,12 @@ static int service_spawn(
                 goto fail;
         }
 
+        if (is_control && UNIT(s)->cgroup_path) {
+                path = strappenda(UNIT(s)->cgroup_path, "/control");
+                cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
+        } else
+                path = UNIT(s)->cgroup_path;
+
         r = exec_spawn(c,
                        argv,
                        &s->exec_context,
@@ -1828,9 +1853,8 @@ static int service_spawn(
                        apply_chroot,
                        apply_tty_stdin,
                        UNIT(s)->manager->confirm_spawn,
-                       UNIT(s)->cgroup_bondings,
-                       UNIT(s)->cgroup_attributes,
-                       is_control ? "control" : NULL,
+                       UNIT(s)->cgroup_mask,
+                       path,
                        UNIT(s)->id,
                        s->type == SERVICE_IDLE ? UNIT(s)->manager->idle_pipe : NULL,
                        &pid);
@@ -1865,7 +1889,7 @@ static int main_pid_good(Service *s) {
 
                 /* If it's an alien child let's check if it is still
                  * alive ... */
-                if (s->main_pid_alien)
+                if (s->main_pid_alien && s->main_pid > 0)
                         return kill(s->main_pid, 0) >= 0 || errno != ESRCH;
 
                 /* .. otherwise assume we'll get a SIGCHLD for it,
@@ -1878,7 +1902,7 @@ static int main_pid_good(Service *s) {
         return -EAGAIN;
 }
 
-static int control_pid_good(Service *s) {
+_pure_ static int control_pid_good(Service *s) {
         assert(s);
 
         return s->control_pid > 0;
@@ -1889,7 +1913,10 @@ static int cgroup_good(Service *s) {
 
         assert(s);
 
-        r = cgroup_bonding_is_empty_list(UNIT(s)->cgroup_bondings);
+        if (!UNIT(s)->cgroup_path)
+                return 0;
+
+        r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true);
         if (r < 0)
                 return r;
 
@@ -1910,6 +1937,7 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
             (s->restart == SERVICE_RESTART_ALWAYS ||
              (s->restart == SERVICE_RESTART_ON_SUCCESS && s->result == SERVICE_SUCCESS) ||
              (s->restart == SERVICE_RESTART_ON_FAILURE && s->result != SERVICE_SUCCESS) ||
+             (s->restart == SERVICE_RESTART_ON_WATCHDOG && s->result == SERVICE_FAILURE_WATCHDOG) ||
              (s->restart == SERVICE_RESTART_ON_ABORT && (s->result == SERVICE_FAILURE_SIGNAL ||
                                                          s->result == SERVICE_FAILURE_CORE_DUMP))) &&
             (s->result != SERVICE_FAILURE_EXIT_CODE ||
@@ -1930,6 +1958,12 @@ static void service_enter_dead(Service *s, ServiceResult f, bool allow_restart)
         /* we want fresh tmpdirs in case service is started again immediately */
         exec_context_tmp_dirs_done(&s->exec_context);
 
+        /* Try to delete the pid file. At this point it will be
+         * out-of-date, and some software might be confused by it, so
+         * let's remove it. */
+        if (s->pid_file)
+                unlink_noerrno(s->pid_file);
+
         return;
 
 fail:
@@ -1939,8 +1973,6 @@ fail:
         service_enter_dead(s, SERVICE_FAILURE_RESOURCES, false);
 }
 
-static void service_enter_signal(Service *s, ServiceState state, ServiceResult f);
-
 static void service_enter_stop_post(Service *s, ServiceResult f) {
         int r;
         assert(s);
@@ -1970,7 +2002,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
 
                 service_set_state(s, SERVICE_STOP_POST);
         } else
-                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_SUCCESS);
+                service_enter_dead(s, SERVICE_SUCCESS, true);
 
         return;
 
@@ -2121,25 +2153,33 @@ fail:
         service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
 }
 
+static void service_kill_control_processes(Service *s) {
+        char *p;
+
+        if (!UNIT(s)->cgroup_path)
+                return;
+
+        p = strappenda(UNIT(s)->cgroup_path, "/control");
+        cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, p, SIGKILL, true, true, true, NULL);
+}
+
 static void service_enter_start(Service *s) {
+        ExecCommand *c;
         pid_t pid;
         int r;
-        ExecCommand *c;
 
         assert(s);
 
         assert(s->exec_command[SERVICE_EXEC_START]);
         assert(!s->exec_command[SERVICE_EXEC_START]->command_next || s->type == SERVICE_ONESHOT);
 
-        if (s->type == SERVICE_FORKING)
-                service_unwatch_control_pid(s);
-        else
-                service_unwatch_main_pid(s);
+        service_unwatch_control_pid(s);
+        service_unwatch_main_pid(s);
 
         /* We want to ensure that nobody leaks processes from
          * START_PRE here, so let's go on a killing spree, People
          * should not spawn long running processes from START_PRE. */
-        cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL, true, true, NULL, "control");
+        service_kill_control_processes(s);
 
         if (s->type == SERVICE_FORKING) {
                 s->control_command_id = SERVICE_EXEC_START;
@@ -2215,11 +2255,9 @@ static void service_enter_start_pre(Service *s) {
 
         s->control_command = s->exec_command[SERVICE_EXEC_START_PRE];
         if (s->control_command) {
-
                 /* Before we start anything, let's clear up what might
                  * be left from previous runs. */
-                cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL,
-                                         true,true, NULL, "control");
+                service_kill_control_processes(s);
 
                 s->control_command_id = SERVICE_EXEC_START_PRE;
 
@@ -2570,7 +2608,7 @@ static int service_reload(Unit *u) {
         return 0;
 }
 
-static bool service_can_reload(Unit *u) {
+_pure_ static bool service_can_reload(Unit *u) {
         Service *s = SERVICE(u);
 
         assert(s);
@@ -2782,7 +2820,7 @@ static int service_deserialize_item(Unit *u, const char *key, const char *value,
         return 0;
 }
 
-static UnitActiveState service_active_state(Unit *u) {
+_pure_ static UnitActiveState service_active_state(Unit *u) {
         const UnitActiveState *table;
 
         assert(u);
@@ -2818,7 +2856,7 @@ static bool service_check_gc(Unit *u) {
         return false;
 }
 
-static bool service_check_snapshot(Unit *u) {
+_pure_ static bool service_check_snapshot(Unit *u) {
         Service *s = SERVICE(u);
 
         assert(s);
@@ -3043,7 +3081,6 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                 }
 
         } else if (s->control_pid == pid) {
-
                 s->control_pid = 0;
 
                 if (s->control_command) {
@@ -3064,8 +3101,7 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
                 /* Immediately get rid of the cgroup, so that the
                  * kernel doesn't delay the cgroup empty messages for
                  * the service cgroup any longer than necessary */
-                cgroup_bonding_kill_list(UNIT(s)->cgroup_bondings, SIGKILL,
-                                         true, true, NULL, "control");
+                service_kill_control_processes(s);
 
                 if (s->control_command &&
                     s->control_command->command_next &&
@@ -3294,13 +3330,12 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
         }
 }
 
-static void service_cgroup_notify_event(Unit *u) {
+static void service_notify_cgroup_empty_event(Unit *u) {
         Service *s = SERVICE(u);
 
         assert(u);
 
-        log_debug_unit(u->id,
-                       "%s: cgroup is empty", u->id);
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
 
         switch (s->state) {
 
@@ -3729,6 +3764,7 @@ static void service_reset_failed(Unit *u) {
 
 static int service_kill(Unit *u, KillWho who, int signo, DBusError *error) {
         Service *s = SERVICE(u);
+
         return unit_kill_common(u, who, signo, s->main_pid, s->control_pid, error);
 }
 
@@ -3756,6 +3792,7 @@ static const char* const service_restart_table[_SERVICE_RESTART_MAX] = {
         [SERVICE_RESTART_NO] = "no",
         [SERVICE_RESTART_ON_SUCCESS] = "on-success",
         [SERVICE_RESTART_ON_FAILURE] = "on-failure",
+        [SERVICE_RESTART_ON_WATCHDOG] = "on-watchdog",
         [SERVICE_RESTART_ON_ABORT] = "on-abort",
         [SERVICE_RESTART_ALWAYS] = "always"
 };
@@ -3821,8 +3858,9 @@ const UnitVTable service_vtable = {
                 "Service\0"
                 "Install\0",
 
+        .private_section = "Service",
         .exec_context_offset = offsetof(Service, exec_context),
-        .exec_section = "Service",
+        .cgroup_context_offset = offsetof(Service, cgroup_context),
 
         .init = service_init,
         .done = service_done,
@@ -3855,7 +3893,7 @@ const UnitVTable service_vtable = {
 
         .reset_failed = service_reset_failed,
 
-        .cgroup_notify_empty = service_cgroup_notify_event,
+        .notify_cgroup_empty = service_notify_cgroup_empty_event,
         .notify_message = service_notify_message,
 
         .bus_name_owner_change = service_bus_name_owner_change,
@@ -3864,6 +3902,10 @@ const UnitVTable service_vtable = {
         .bus_interface = "org.freedesktop.systemd1.Service",
         .bus_message_handler = bus_service_message_handler,
         .bus_invalidating_properties =  bus_service_invalidating_properties,
+        .bus_set_property = bus_service_set_property,
+        .bus_commit_properties = bus_service_commit_properties,
+
+        .can_transient = true,
 
 #ifdef HAVE_SYSV_COMPAT
         .enumerate = service_enumerate,