core: fix detection of dead processes

[elogind.git] / src / core / unit.c
diff --git a/src/core/unit.c b/src/core/unit.c

index 63576a4b700b6479bfd6bd200fc84fdec8929f8a..0277675f60c5229ecba79c36ea9eca2e887eb3da 100644 (file)
--- a/src/core/unit.c
+++ b/src/core/unit.c
@@ -482,6 +482,8 @@ void unit_free(Unit *u) {
  
          set_free_free(u->names);
  
+        unit_unwatch_all_pids(u);
+
          condition_free_list(u->conditions);
  
          unit_ref_unset(&u->slice);
@@ -1532,7 +1534,9 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
           * sucessfully, since there's no change of state in that case. Which is
           * why it is handled in service_set_state() */
          if (UNIT_IS_INACTIVE_OR_FAILED(os) != UNIT_IS_INACTIVE_OR_FAILED(ns)) {
-                ExecContext *ec = unit_get_exec_context(u);
+                ExecContext *ec;
+
+                ec = unit_get_exec_context(u);
                  if (ec && exec_context_may_touch_console(ec)) {
                          if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
                                  m->n_on_console --;
@@ -1628,12 +1632,11 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                  }
  
                  /* stop unneeded units regardless if going down was expected or not */
-                if (UNIT_IS_ACTIVE_OR_ACTIVATING(os) && UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+                if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
                          check_unneeded_dependencies(u);
  
                  if (ns != os && ns == UNIT_FAILED) {
-                        log_notice_unit(u->id,
-                                        "Unit %s entered failed state.", u->id);
+                        log_notice_unit(u->id, "Unit %s entered failed state.", u->id);
                          unit_start_on_failure(u);
                  }
          }
@@ -1697,20 +1700,142 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
  }
  
  int unit_watch_pid(Unit *u, pid_t pid) {
+        int q, r;
+
          assert(u);
          assert(pid >= 1);
  
-        /* Watch a specific PID. We only support one unit watching
-         * each PID for now. */
+        /* Watch a specific PID. We only support one or two units
+         * watching each PID for now, not more. */
+
+        r = hashmap_ensure_allocated(&u->manager->watch_pids1, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
+        r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
+        r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        if (r == -EEXIST) {
+                r = hashmap_ensure_allocated(&u->manager->watch_pids2, trivial_hash_func, trivial_compare_func);
+                if (r < 0)
+                        return r;
+
+                r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+        }
+
+        q = set_put(u->pids, LONG_TO_PTR(pid));
+        if (q < 0)
+                return q;
  
-        return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        return r;
  }
  
  void unit_unwatch_pid(Unit *u, pid_t pid) {
          assert(u);
          assert(pid >= 1);
  
-        hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+        set_remove(u->pids, LONG_TO_PTR(pid));
+}
+
+static int watch_pids_in_path(Unit *u, const char *path) {
+        _cleanup_closedir_ DIR *d = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        int ret = 0, r;
+
+        assert(u);
+        assert(path);
+
+        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */
+
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+        if (r >= 0) {
+                pid_t pid;
+
+                while ((r = cg_read_pid(f, &pid)) > 0) {
+                        r = unit_watch_pid(u, pid);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+        if (r >= 0) {
+                char *fn;
+
+                while ((r = cg_read_subgroup(d, &fn)) > 0) {
+                        _cleanup_free_ char *p = NULL;
+
+                        p = strjoin(path, "/", fn, NULL);
+                        free(fn);
+
+                        if (!p)
+                                return -ENOMEM;
+
+                        r = watch_pids_in_path(u, p);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        return ret;
+}
+
+
+int unit_watch_all_pids(Unit *u) {
+        assert(u);
+
+        if (!u->cgroup_path)
+                return -ENOENT;
+
+        /* Adds all PIDs from our cgroup to the set of PIDs we watch */
+
+        return watch_pids_in_path(u, u->cgroup_path);
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        SET_FOREACH(e, u->pids, i) {
+                hashmap_remove_value(u->manager->watch_pids1, e, u);
+                hashmap_remove_value(u->manager->watch_pids2, e, u);
+        }
+
+        set_free(u->pids);
+        u->pids = NULL;
+}
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        /* Cleans dead PIDs from our list */
+
+        SET_FOREACH(e, u->pids, i) {
+                pid_t pid = PTR_TO_LONG(e);
+
+                if (pid == except1 || pid == except2)
+                        continue;
+
+                if (kill(pid, 0) < 0 && errno == ESRCH)
+                        set_remove(u->pids, e);
+        }
  }
  
  bool unit_job_is_applicable(Unit *u, JobType j) {
@@ -2944,7 +3069,7 @@ int unit_kill_context(
                          if (!main_pid_alien)
                                  wait_for_exit = true;
  
-                        if (c->send_sighup)
+                        if (c->send_sighup && !sigkill)
                                  kill(main_pid, SIGHUP);
                  }
          }
@@ -2960,7 +3085,7 @@ int unit_kill_context(
                  } else {
                          wait_for_exit = true;
  
-                        if (c->send_sighup)
+                        if (c->send_sighup && !sigkill)
                                  kill(control_pid, SIGHUP);
                  }
          }
@@ -2979,26 +3104,24 @@ int unit_kill_context(
                                  log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r));
                  } else if (r > 0) {
  
-                        /* FIXME: Now, we don't actually wait for any
-                         * of the processes that are neither control
-                         * nor main process. We should wait for them
-                         * of course, but that's hard since the cgroup
-                         * notification logic is so unreliable. It is
-                         * not available at all in containers, and on
-                         * the host it gets confused by
-                         * subgroups. Hence, for now, let's not wait
-                         * for these processes -- but when the kernel
-                         * gets fixed we really should correct
-                         * that. */
-
-                        if (c->send_sighup) {
+                        /* FIXME: For now, we will not wait for the
+                         * cgroup members to die, simply because
+                         * cgroup notification is unreliable. It
+                         * doesn't work at all in containers, and
+                         * outside of containers it can be confused
+                         * easily by leaving directories in the
+                         * cgroup. */
+
+                        /* wait_for_exit = true; */
+
+                        if (c->send_sighup && !sigkill) {
                                  set_free(pid_set);
  
                                  pid_set = unit_pid_set(main_pid, control_pid);
                                  if (!pid_set)
                                          return -ENOMEM;
  
-                                cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, true, true, false, pid_set);
+                                cg_kill_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, SIGHUP, false, true, false, pid_set);
                          }
                  }
          }