chiark / gitweb /
core: fix detection of dead processes
[elogind.git] / src / core / unit.c
index 6e1c112124146f8f39f1dcfc46cf3981bf2ebc16..0277675f60c5229ecba79c36ea9eca2e887eb3da 100644 (file)
@@ -1534,7 +1534,9 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
          * sucessfully, since there's no change of state in that case. Which is
          * why it is handled in service_set_state() */
         if (UNIT_IS_INACTIVE_OR_FAILED(os) != UNIT_IS_INACTIVE_OR_FAILED(ns)) {
-                ExecContext *ec = unit_get_exec_context(u);
+                ExecContext *ec;
+
+                ec = unit_get_exec_context(u);
                 if (ec && exec_context_may_touch_console(ec)) {
                         if (UNIT_IS_INACTIVE_OR_FAILED(ns)) {
                                 m->n_on_console --;
@@ -1630,12 +1632,11 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
                 }
 
                 /* stop unneeded units regardless if going down was expected or not */
-                if (UNIT_IS_ACTIVE_OR_ACTIVATING(os) && UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
+                if (UNIT_IS_INACTIVE_OR_DEACTIVATING(ns))
                         check_unneeded_dependencies(u);
 
                 if (ns != os && ns == UNIT_FAILED) {
-                        log_notice_unit(u->id,
-                                        "Unit %s entered failed state.", u->id);
+                        log_notice_unit(u->id, "Unit %s entered failed state.", u->id);
                         unit_start_on_failure(u);
                 }
         }
@@ -1704,16 +1705,27 @@ int unit_watch_pid(Unit *u, pid_t pid) {
         assert(u);
         assert(pid >= 1);
 
+        /* Watch a specific PID. We only support one or two units
+         * watching each PID for now, not more. */
+
+        r = hashmap_ensure_allocated(&u->manager->watch_pids1, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
         r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
         if (r < 0)
                 return r;
 
-        /* Watch a specific PID. We only support one unit watching
-         * each PID for now. */
+        r = hashmap_put(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        if (r == -EEXIST) {
+                r = hashmap_ensure_allocated(&u->manager->watch_pids2, trivial_hash_func, trivial_compare_func);
+                if (r < 0)
+                        return r;
 
-        r = set_put(u->pids, LONG_TO_PTR(pid));
+                r = hashmap_put(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
+        }
 
-        q = hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        q = set_put(u->pids, LONG_TO_PTR(pid));
         if (q < 0)
                 return q;
 
@@ -1724,7 +1736,8 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
         assert(u);
         assert(pid >= 1);
 
-        hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids1, LONG_TO_PTR(pid), u);
+        hashmap_remove_value(u->manager->watch_pids2, LONG_TO_PTR(pid), u);
         set_remove(u->pids, LONG_TO_PTR(pid));
 }
 
@@ -1797,8 +1810,10 @@ void unit_unwatch_all_pids(Unit *u) {
 
         assert(u);
 
-        SET_FOREACH(e, u->pids, i)
-                hashmap_remove_value(u->manager->watch_pids, e, u);
+        SET_FOREACH(e, u->pids, i) {
+                hashmap_remove_value(u->manager->watch_pids1, e, u);
+                hashmap_remove_value(u->manager->watch_pids2, e, u);
+        }
 
         set_free(u->pids);
         u->pids = NULL;
@@ -3089,7 +3104,15 @@ int unit_kill_context(
                                 log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r));
                 } else if (r > 0) {
 
-                        wait_for_exit = true;
+                        /* FIXME: For now, we will not wait for the
+                         * cgroup members to die, simply because
+                         * cgroup notification is unreliable. It
+                         * doesn't work at all in containers, and
+                         * outside of containers it can be confused
+                         * easily by leaving directories in the
+                         * cgroup. */
+
+                        /* wait_for_exit = true; */
 
                         if (c->send_sighup && !sigkill) {
                                 set_free(pid_set);