chiark / gitweb /
service: properly handle if the main process is down and we wait for the cgroup to...
[elogind.git] / src / service.c
index d52856af897985afe57b2e4ca51a4910172ed872..94f2f37afc1c15ce74da44ccac4b4ed71bfed7f7 100644 (file)
@@ -95,7 +95,7 @@ static const UnitActiveState state_translation_table[_SERVICE_STATE_MAX] = {
         [SERVICE_STOP_POST] = UNIT_DEACTIVATING,
         [SERVICE_FINAL_SIGTERM] = UNIT_DEACTIVATING,
         [SERVICE_FINAL_SIGKILL] = UNIT_DEACTIVATING,
-        [SERVICE_MAINTENANCE] = UNIT_MAINTENANCE,
+        [SERVICE_FAILED] = UNIT_FAILED,
         [SERVICE_AUTO_RESTART] = UNIT_ACTIVATING
 };
 
@@ -251,9 +251,17 @@ static int sysv_translate_facility(const char *name, char **_r) {
                 "$time",      SPECIAL_RTC_SET_TARGET,
 
                 /* Debian extensions */
+#ifdef TARGET_DEBIAN
                 "$mail-transport-agent", SPECIAL_MAIL_TRANSFER_AGENT_TARGET,
+#endif
                 "$mail-transfer-agent",  SPECIAL_MAIL_TRANSFER_AGENT_TARGET,
-                "$x-display-manager",    SPECIAL_DISPLAY_MANAGER_SERVICE
+                "$x-display-manager",    SPECIAL_DISPLAY_MANAGER_SERVICE,
+
+#ifdef TARGET_FEDORA
+                /* Fedora extensions, lacking the $ prefix */
+                "MTA",        SPECIAL_MAIL_TRANSFER_AGENT_TARGET,
+                "smtpdaemon", SPECIAL_MAIL_TRANSFER_AGENT_TARGET
+#endif
         };
 
         unsigned i;
@@ -1189,7 +1197,7 @@ static void service_set_state(Service *s, ServiceState state) {
             state == SERVICE_STOP_POST ||
             state == SERVICE_FINAL_SIGTERM ||
             state == SERVICE_FINAL_SIGKILL ||
-            state == SERVICE_MAINTENANCE ||
+            state == SERVICE_FAILED ||
             state == SERVICE_AUTO_RESTART)
                 service_notify_sockets_dead(s);
 
@@ -1513,7 +1521,7 @@ static void service_enter_dead(Service *s, bool success, bool allow_restart) {
 
                 service_set_state(s, SERVICE_AUTO_RESTART);
         } else
-                service_set_state(s, s->failure ? SERVICE_MAINTENANCE : SERVICE_DEAD);
+                service_set_state(s, s->failure ? SERVICE_FAILED : SERVICE_DEAD);
 
         s->forbid_restart = false;
 
@@ -1562,7 +1570,8 @@ fail:
 
 static void service_enter_signal(Service *s, ServiceState state, bool success) {
         int r;
-        bool sent = false;
+        Set *pid_set = NULL;
+        bool wait_for_exit = false;
 
         assert(s);
 
@@ -1572,38 +1581,53 @@ static void service_enter_signal(Service *s, ServiceState state, bool success) {
         if (s->exec_context.kill_mode != KILL_NONE) {
                 int sig = (state == SERVICE_STOP_SIGTERM || state == SERVICE_FINAL_SIGTERM) ? s->exec_context.kill_signal : SIGKILL;
 
-                if (s->exec_context.kill_mode == KILL_CONTROL_GROUP) {
+                if (s->main_pid > 0) {
+                        if (kill(s->exec_context.kill_mode == KILL_PROCESS_GROUP ?
+                                 -s->main_pid :
+                                 s->main_pid, sig) < 0 && errno != ESRCH)
 
-                        if ((r = cgroup_bonding_kill_list(s->meta.cgroup_bondings, sig)) < 0) {
-                                if (r != -EAGAIN && r != -ESRCH)
-                                        goto fail;
-                        } else
-                                sent = true;
+                                log_warning("Failed to kill main process %li: %m", (long) s->main_pid);
+                        else
+                                wait_for_exit = true;
                 }
 
-                if (!sent) {
-                        r = 0;
+                if (s->control_pid > 0) {
+                        if (kill(s->exec_context.kill_mode == KILL_PROCESS_GROUP ?
+                                 -s->control_pid :
+                                 s->control_pid, sig) < 0 && errno != ESRCH)
 
-                        if (s->main_pid > 0) {
-                                if (kill(s->exec_context.kill_mode == KILL_PROCESS ? s->main_pid : -s->main_pid, sig) < 0 && errno != ESRCH)
-                                        r = -errno;
-                                else
-                                        sent = true;
-                        }
+                                log_warning("Failed to kill control process %li: %m", (long) s->control_pid);
+                        else
+                                wait_for_exit = true;
+                }
 
-                        if (s->control_pid > 0) {
-                                if (kill(s->exec_context.kill_mode == KILL_PROCESS ? s->control_pid : -s->control_pid, sig) < 0 && errno != ESRCH)
-                                        r = -errno;
-                                else
-                                        sent = true;
-                        }
+                if (s->exec_context.kill_mode == KILL_CONTROL_GROUP) {
 
-                        if (r < 0)
+                        if (!(pid_set = set_new(trivial_hash_func, trivial_compare_func))) {
+                                r = -ENOMEM;
                                 goto fail;
+                        }
+
+                        /* Exclude the main/control pids from being killed via the cgroup */
+                        if (s->main_pid > 0)
+                                if ((r = set_put(pid_set, LONG_TO_PTR(s->main_pid))) < 0)
+                                        goto fail;
+
+                        if (s->control_pid > 0)
+                                if ((r = set_put(pid_set, LONG_TO_PTR(s->control_pid))) < 0)
+                                        goto fail;
+
+                        if ((r = cgroup_bonding_kill_list(s->meta.cgroup_bondings, sig, pid_set)) < 0) {
+                                if (r != -EAGAIN && r != -ESRCH && r != -ENOENT)
+                                        log_warning("Failed to kill control group: %s", strerror(-r));
+                        } else if (r > 0)
+                                wait_for_exit = true;
+
+                        set_free(pid_set);
                 }
         }
 
-        if (sent && (s->main_pid > 0 || s->control_pid > 0)) {
+        if (wait_for_exit) {
                 if (s->timeout_usec > 0)
                         if ((r = unit_watch_timer(UNIT(s), s->timeout_usec, &s->timer_watch)) < 0)
                                 goto fail;
@@ -1623,6 +1647,9 @@ fail:
                 service_enter_stop_post(s, false);
         else
                 service_enter_dead(s, false, true);
+
+        if (pid_set)
+                set_free(pid_set);
 }
 
 static void service_enter_stop(Service *s, bool success) {
@@ -1814,7 +1841,7 @@ static void service_enter_restart(Service *s) {
 
         service_enter_dead(s, true, false);
 
-        if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(s), JOB_FAIL, false, NULL, NULL)) < 0)
+        if ((r = manager_add_job(s->meta.manager, JOB_START, UNIT(s), JOB_FAIL, false, &error, NULL)) < 0)
                 goto fail;
 
         log_debug("%s scheduled restart job.", s->meta.id);
@@ -1958,7 +1985,7 @@ static int service_start(Unit *u) {
             s->state == SERVICE_START_POST)
                 return 0;
 
-        assert(s->state == SERVICE_DEAD || s->state == SERVICE_MAINTENANCE || s->state == SERVICE_AUTO_RESTART);
+        assert(s->state == SERVICE_DEAD || s->state == SERVICE_FAILED || s->state == SERVICE_AUTO_RESTART);
 
         /* Make sure we don't enter a busy loop of some kind. */
         if (!ratelimit_test(&s->ratelimit)) {
@@ -2512,7 +2539,7 @@ static void service_timer_event(Unit *u, uint64_t elapsed, Watch* w) {
                 break;
 
         case SERVICE_FINAL_SIGKILL:
-                log_warning("%s still around after SIGKILL (2). Entering maintenance mode.", u->meta.id);
+                log_warning("%s still around after SIGKILL (2). Entering failed mode.", u->meta.id);
                 service_enter_dead(s, false, true);
                 break;
 
@@ -2545,6 +2572,20 @@ static void service_cgroup_notify_event(Unit *u) {
                 service_enter_running(s, true);
                 break;
 
+        case SERVICE_STOP_SIGTERM:
+        case SERVICE_STOP_SIGKILL:
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_stop_post(s, true);
+
+                break;
+
+        case SERVICE_FINAL_SIGTERM:
+        case SERVICE_FINAL_SIGKILL:
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_dead(s, true, true);
+
+                break;
+
         default:
                 ;
         }
@@ -2845,12 +2886,12 @@ int service_set_socket_fd(Service *s, int fd, Socket *sock) {
         return 0;
 }
 
-static void service_reset_maintenance(Unit *u) {
+static void service_reset_failed(Unit *u) {
         Service *s = SERVICE(u);
 
         assert(s);
 
-        if (s->state == SERVICE_MAINTENANCE)
+        if (s->state == SERVICE_FAILED)
                 service_set_state(s, SERVICE_DEAD);
 
         s->failure = false;
@@ -2870,7 +2911,7 @@ static const char* const service_state_table[_SERVICE_STATE_MAX] = {
         [SERVICE_STOP_POST] = "stop-post",
         [SERVICE_FINAL_SIGTERM] = "final-sigterm",
         [SERVICE_FINAL_SIGKILL] = "final-sigkill",
-        [SERVICE_MAINTENANCE] = "maintenance",
+        [SERVICE_FAILED] = "failed",
         [SERVICE_AUTO_RESTART] = "auto-restart",
 };
 
@@ -2943,7 +2984,7 @@ const UnitVTable service_vtable = {
         .sigchld_event = service_sigchld_event,
         .timer_event = service_timer_event,
 
-        .reset_maintenance = service_reset_maintenance,
+        .reset_failed = service_reset_failed,
 
         .cgroup_notify_empty = service_cgroup_notify_event,
         .notify_message = service_notify_message,