chiark / gitweb /
core: fix detection of dead processes
authorYuxuan Shui <yshuiv7@gmail.com>
Fri, 14 Feb 2014 18:38:50 +0000 (02:38 +0800)
committerZbigniew Jędrzejewski-Szmek <zbyszek@in.waw.pl>
Sat, 15 Feb 2014 01:47:52 +0000 (20:47 -0500)
Commit 5ba6985b moves the UNIT_VTABLE(u)->sigchld_event before systemd
actually reaps the zombie. Which leads to service_load_pid_file accepting
zombie as a valid pid.

This fixes timeouts like:
[ 2746.602243] systemd[1]: chronyd.service stop-sigterm timed out. Killing.
[ 2836.852545] systemd[1]: chronyd.service still around after SIGKILL. Ignoring.
[ 2927.102187] systemd[1]: chronyd.service stop-final-sigterm timed out. Killing.
[ 3017.352560] systemd[1]: chronyd.service still around after final SIGKILL. Entering failed mode.

src/core/service.c
src/shared/util.c
src/shared/util.h

index 51297846f9af8a9e3cf471c0b847936f590fb3ad..3a2ef015708311fd6bb3a0a838428c8504eb5407 100644 (file)
@@ -1376,6 +1376,14 @@ static int service_load_pid_file(Service *s, bool may_warn) {
                 return -ESRCH;
         }
 
+        if (get_process_state(pid) == 'Z') {
+                if (may_warn)
+                        log_info_unit(UNIT(s)->id,
+                                      "PID "PID_FMT" read from file %s is a zombie.",
+                                      pid, s->pid_file);
+                return -ESRCH;
+        }
+
         if (s->main_pid_known) {
                 if (pid == s->main_pid)
                         return 0;
index 3482b9b743b8e1cc7861dcf8c746807cc7dcd6eb..b1a9db1d4660681535439efeffea5716c5255e54 100644 (file)
@@ -513,6 +513,31 @@ char *truncate_nl(char *s) {
         return s;
 }
 
+int get_process_state(pid_t pid) {
+        const char *p;
+        char state;
+        int r;
+        _cleanup_free_ char *line = NULL;
+
+        assert(pid >= 0);
+
+        p = procfs_file_alloca(pid, "stat");
+        r = read_one_line_file(p, &line);
+        if (r < 0)
+                return r;
+
+        p = strrchr(line, ')');
+        if (!p)
+                return -EIO;
+
+        p++;
+
+        if (sscanf(p, " %c", &state) != 1)
+                return -EIO;
+
+        return (unsigned char) state;
+}
+
 int get_process_comm(pid_t pid, char **name) {
         const char *p;
         int r;
index 9aea3a4e505ffe59b616e13fb2aabd67f2ad86bf..8dede1f7ff1598948401e0ebb09298e1649e5bd0 100644 (file)
@@ -238,6 +238,7 @@ char *file_in_same_dir(const char *path, const char *filename);
 
 int rmdir_parents(const char *path, const char *stop);
 
+char get_process_state(pid_t pid);
 int get_process_comm(pid_t pid, char **name);
 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line);
 int get_process_exe(pid_t pid, char **name);