chiark / gitweb /
core: watch SIGCHLD more closely to track processes of units with no reliable cgroup...
authorLennart Poettering <lennart@poettering.net>
Thu, 6 Feb 2014 16:17:51 +0000 (17:17 +0100)
committerLennart Poettering <lennart@poettering.net>
Fri, 7 Feb 2014 14:14:36 +0000 (15:14 +0100)
When a process dies that we can associate with a specific unit, start
watching all other processes of that unit, so that we can associate
those processes with the unit too.

Also, for service units start doing this as soon as we get the first
SIGCHLD for either control or main process, so that we can follow the
processes of the service from one to the other, as long as process that
remain are processes of the ones we watched that died and got reassigned
to us as parent.

Similar, for scope units start doing this as soon as the scope
controller abandons the unit, and thus management entirely reverts to
systemd. To abandon a unit introduce a new Abandon() scope unit method
call.

src/core/dbus-scope.c
src/core/manager.c
src/core/scope.c
src/core/scope.h
src/core/service.c
src/core/unit.c
src/core/unit.h

index d5a20485167319ef13ac78c6bd7a41458b9e703a..c46c972c2690a9db2532d7ad9445452c15722720 100644 (file)
 #include "bus-util.h"
 #include "bus-internal.h"
 
+static int bus_scope_abandon(sd_bus *bus, sd_bus_message *message, void *userdata, sd_bus_error *error) {
+        Scope *s = userdata;
+
+        assert(bus);
+        assert(message);
+        assert(s);
+
+        return scope_abandon(s);
+}
+
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_result, scope_result, ScopeResult);
 
 const sd_bus_vtable bus_scope_vtable[] = {
@@ -36,6 +46,7 @@ const sd_bus_vtable bus_scope_vtable[] = {
         SD_BUS_PROPERTY("TimeoutStopUSec", "t", bus_property_get_usec, offsetof(Scope, timeout_stop_usec), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Result", "s", property_get_result, offsetof(Scope, result), SD_BUS_VTABLE_PROPERTY_EMITS_CHANGE),
         SD_BUS_SIGNAL("RequestStop", NULL, 0),
+        SD_BUS_METHOD("Abandon", NULL, NULL, bus_scope_abandon, 0),
         SD_BUS_VTABLE_END
 };
 
@@ -56,10 +67,6 @@ static int bus_scope_set_transient_property(
                 unsigned n = 0;
                 uint32_t pid;
 
-                r = set_ensure_allocated(&s->pids, trivial_hash_func, trivial_compare_func);
-                if (r < 0)
-                        return r;
-
                 r = sd_bus_message_enter_container(message, 'a', "u");
                 if (r < 0)
                         return r;
@@ -70,7 +77,7 @@ static int bus_scope_set_transient_property(
                                 return -EINVAL;
 
                         if (mode != UNIT_CHECK) {
-                                r = set_put(s->pids, LONG_TO_PTR(pid));
+                                r = unit_watch_pid(UNIT(s), pid);
                                 if (r < 0 && r != -EEXIST)
                                         return r;
                         }
index 45f5f70b2acd8caed560f786ce89473903996a5c..c0bcc79f00a72c6962d4ba1044fcb0da245390a7 100644 (file)
@@ -1459,7 +1459,7 @@ static int manager_dispatch_sigchld(Manager *m) {
                 log_debug_unit(u->id,
                                "Child %lu belongs to %s", (long unsigned) si.si_pid, u->id);
 
-                hashmap_remove(m->watch_pids, LONG_TO_PTR(si.si_pid));
+                unit_unwatch_pid(u, si.si_pid);
                 UNIT_VTABLE(u)->sigchld_event(u, si.si_pid, si.si_code, si.si_status);
         }
 
index 0c1d17e68c569a2fd5bcfaa1261db29a756875c3..940e40dda78076d575bea7abf9496894ee00df02 100644 (file)
@@ -35,6 +35,7 @@
 static const UnitActiveState state_translation_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = UNIT_INACTIVE,
         [SCOPE_RUNNING] = UNIT_ACTIVE,
+        [SCOPE_ABANDONED] = UNIT_ACTIVE,
         [SCOPE_STOP_SIGTERM] = UNIT_DEACTIVATING,
         [SCOPE_STOP_SIGKILL] = UNIT_DEACTIVATING,
         [SCOPE_FAILED] = UNIT_FAILED
@@ -66,9 +67,6 @@ static void scope_done(Unit *u) {
 
         free(s->controller);
 
-        set_free(s->pids);
-        s->pids = NULL;
-
         s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 }
 
@@ -100,15 +98,14 @@ static void scope_set_state(Scope *s, ScopeState state) {
         old_state = s->state;
         s->state = state;
 
-        if (state != SCOPE_STOP_SIGTERM &&
-            state != SCOPE_STOP_SIGKILL)
+        if (!IN_SET(state, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
                 s->timer_event_source = sd_event_source_unref(s->timer_event_source);
 
+        if (IN_SET(state, SCOPE_DEAD, SCOPE_FAILED))
+                unit_unwatch_all_pids(UNIT(s));
+
         if (state != old_state)
-                log_debug("%s changed %s -> %s",
-                          UNIT(s)->id,
-                          scope_state_to_string(old_state),
-                          scope_state_to_string(state));
+                log_debug("%s changed %s -> %s", UNIT(s)->id, scope_state_to_string(old_state), scope_state_to_string(state));
 
         unit_notify(UNIT(s), state_translation_table[old_state], state_translation_table[state], true);
 }
@@ -135,7 +132,7 @@ static int scope_verify(Scope *s) {
         if (UNIT(s)->load_state != UNIT_LOADED)
                 return 0;
 
-        if (set_size(s->pids) <= 0 && UNIT(s)->manager->n_reloading <= 0) {
+        if (set_isempty(UNIT(s)->pids) && UNIT(s)->manager->n_reloading <= 0) {
                 log_error_unit(UNIT(s)->id, "Scope %s has no PIDs. Refusing.", UNIT(s)->id);
                 return -EINVAL;
         }
@@ -181,12 +178,15 @@ static int scope_coldplug(Unit *u) {
 
         if (s->deserialized_state != s->state) {
 
-                if (s->deserialized_state == SCOPE_STOP_SIGKILL || s->deserialized_state == SCOPE_STOP_SIGTERM) {
+                if (IN_SET(s->deserialized_state, SCOPE_STOP_SIGKILL, SCOPE_STOP_SIGTERM)) {
                         r = scope_arm_timer(s);
                         if (r < 0)
                                 return r;
                 }
 
+                if (!IN_SET(s->deserialized_state, SCOPE_DEAD, SCOPE_FAILED))
+                        unit_watch_all_pids(UNIT(s));
+
                 scope_set_state(s, s->deserialized_state);
         }
 
@@ -227,6 +227,8 @@ static void scope_enter_signal(Scope *s, ScopeState state, ScopeResult f) {
         if (f != SCOPE_SUCCESS)
                 s->result = f;
 
+        unit_watch_all_pids(UNIT(s));
+
         /* If we have a controller set let's ask the controller nicely
          * to terminate the scope, instead of us going directly into
          * SIGTERM beserk mode */
@@ -288,13 +290,10 @@ static int scope_start(Unit *u) {
                 return r;
         }
 
-        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, s->pids);
+        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, UNIT(s)->pids);
         if (r < 0)
                 return r;
 
-        set_free(s->pids);
-        s->pids = NULL;
-
         s->result = SCOPE_SUCCESS;
 
         scope_set_state(s, SCOPE_RUNNING);
@@ -305,13 +304,13 @@ static int scope_stop(Unit *u) {
         Scope *s = SCOPE(u);
 
         assert(s);
-        assert(s->state == SCOPE_RUNNING);
 
         if (s->state == SCOPE_STOP_SIGTERM ||
             s->state == SCOPE_STOP_SIGKILL)
                 return 0;
 
-        assert(s->state == SCOPE_RUNNING);
+        assert(s->state == SCOPE_RUNNING ||
+               s->state == SCOPE_ABANDONED);
 
         scope_enter_signal(s, SCOPE_STOP_SIGTERM, SCOPE_SUCCESS);
         return 0;
@@ -389,8 +388,8 @@ static bool scope_check_gc(Unit *u) {
         /* Never clean up scopes that still have a process around,
          * even if the scope is formally dead. */
 
-        if (UNIT(s)->cgroup_path) {
-                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, UNIT(s)->cgroup_path, true);
+        if (u->cgroup_path) {
+                r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
                 if (r <= 0)
                         return true;
         }
@@ -398,6 +397,32 @@ static bool scope_check_gc(Unit *u) {
         return false;
 }
 
+static void scope_notify_cgroup_empty_event(Unit *u) {
+        Scope *s = SCOPE(u);
+        assert(u);
+
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+
+        if (IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED, SCOPE_STOP_SIGTERM, SCOPE_STOP_SIGKILL))
+                scope_enter_dead(s, SCOPE_SUCCESS);
+}
+
+static void scope_sigchld_event(Unit *u, pid_t pid, int code, int status) {
+
+        /* If we get a SIGCHLD event for one of the processes we were
+           interested in, then we look for others to watch, under the
+           assumption that we'll sooner or later get a SIGCHLD for
+           them, as the original process we watched was probably the
+           parent of them, and they are hence now our children. */
+
+        unit_tidy_watch_pids(u, 0, 0);
+        unit_watch_all_pids(u);
+
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(u->pids))
+                scope_notify_cgroup_empty_event(u);
+}
+
 static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
         Scope *s = SCOPE(userdata);
 
@@ -429,24 +454,30 @@ static int scope_dispatch_timer(sd_event_source *source, usec_t usec, void *user
         return 0;
 }
 
-static void scope_notify_cgroup_empty_event(Unit *u) {
-        Scope *s = SCOPE(u);
-        assert(u);
+int scope_abandon(Scope *s) {
+        assert(s);
 
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+        if (!IN_SET(s->state, SCOPE_RUNNING, SCOPE_ABANDONED))
+                return -ESTALE;
 
-        switch (s->state) {
+        free(s->controller);
+        s->controller = NULL;
 
-        case SCOPE_RUNNING:
-        case SCOPE_STOP_SIGTERM:
-        case SCOPE_STOP_SIGKILL:
-                scope_enter_dead(s, SCOPE_SUCCESS);
+        /* The client is no longer watching the remaining processes,
+         * so let's step in here, under the assumption that the
+         * remaining processes will be sooner or later reassigned to
+         * us as parent. */
 
-                break;
+        unit_tidy_watch_pids(UNIT(s), 0, 0);
+        unit_watch_all_pids(UNIT(s));
 
-        default:
-                ;
-        }
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(UNIT(s)->pids))
+                scope_notify_cgroup_empty_event(UNIT(s));
+        else
+                scope_set_state(s, SCOPE_ABANDONED);
+
+        return 0;
 }
 
 _pure_ static UnitActiveState scope_active_state(Unit *u) {
@@ -464,6 +495,7 @@ _pure_ static const char *scope_sub_state_to_string(Unit *u) {
 static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
         [SCOPE_DEAD] = "dead",
         [SCOPE_RUNNING] = "running",
+        [SCOPE_ABANDONED] = "abandoned",
         [SCOPE_STOP_SIGTERM] = "stop-sigterm",
         [SCOPE_STOP_SIGKILL] = "stop-sigkill",
         [SCOPE_FAILED] = "failed",
@@ -516,6 +548,8 @@ const UnitVTable scope_vtable = {
 
         .check_gc = scope_check_gc,
 
+        .sigchld_event = scope_sigchld_event,
+
         .reset_failed = scope_reset_failed,
 
         .notify_cgroup_empty = scope_notify_cgroup_empty_event,
index 014b50c765199f60d74a1dea6000f2fff2c935ad..6c591264226c6bf2448b2eca6bec1617552f2c05 100644 (file)
@@ -29,6 +29,7 @@ typedef struct Scope Scope;
 typedef enum ScopeState {
         SCOPE_DEAD,
         SCOPE_RUNNING,
+        SCOPE_ABANDONED,
         SCOPE_STOP_SIGTERM,
         SCOPE_STOP_SIGKILL,
         SCOPE_FAILED,
@@ -57,13 +58,13 @@ struct Scope {
 
         char *controller;
 
-        Set *pids;
-
         sd_event_source *timer_event_source;
 };
 
 extern const UnitVTable scope_vtable;
 
+int scope_abandon(Scope *s);
+
 const char* scope_state_to_string(ScopeState i) _const_;
 ScopeState scope_state_from_string(const char *s) _pure_;
 
index 0542eae023c18b731c675db3a8d76e3c42c69220..8c47e24bfb9673044fb830caff477c3cf0d6de4b 100644 (file)
@@ -1485,6 +1485,9 @@ static void service_set_state(Service *s, ServiceState state) {
                 s->control_command_id = _SERVICE_EXEC_COMMAND_INVALID;
         }
 
+        if (IN_SET(state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+                unit_unwatch_all_pids(UNIT(s));
+
         if (!IN_SET(state,
                     SERVICE_START_PRE, SERVICE_START, SERVICE_START_POST,
                     SERVICE_RUNNING, SERVICE_RELOAD,
@@ -1587,6 +1590,9 @@ static int service_coldplug(Unit *u) {
                                 return r;
                 }
 
+                if (!IN_SET(s->deserialized_state, SERVICE_DEAD, SERVICE_FAILED, SERVICE_AUTO_RESTART))
+                        unit_watch_all_pids(UNIT(s));
+
                 if (IN_SET(s->deserialized_state, SERVICE_START_POST, SERVICE_RUNNING, SERVICE_RELOAD))
                         service_start_watchdog(s);
 
@@ -1895,6 +1901,7 @@ static void service_enter_stop_post(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
+        unit_watch_all_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP_POST];
         if (s->control_command) {
@@ -1934,6 +1941,8 @@ static void service_enter_signal(Service *s, ServiceState state, ServiceResult f
         if (f != SERVICE_SUCCESS)
                 s->result = f;
 
+        unit_watch_all_pids(UNIT(s));
+
         r = unit_kill_context(
                         UNIT(s),
                         &s->kill_context,
@@ -1983,6 +1992,7 @@ static void service_enter_stop(Service *s, ServiceResult f) {
                 s->result = f;
 
         service_unwatch_control_pid(s);
+        unit_watch_all_pids(UNIT(s));
 
         s->control_command = s->exec_command[SERVICE_EXEC_STOP];
         if (s->control_command) {
@@ -2865,6 +2875,62 @@ fail:
         return 0;
 }
 
+static void service_notify_cgroup_empty_event(Unit *u) {
+        Service *s = SERVICE(u);
+
+        assert(u);
+
+        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
+
+        switch (s->state) {
+
+                /* Waiting for SIGCHLD is usually more interesting,
+                 * because it includes return codes/signals. Which is
+                 * why we ignore the cgroup events for most cases,
+                 * except when we don't know pid which to expect the
+                 * SIGCHLD for. */
+
+        case SERVICE_START:
+        case SERVICE_START_POST:
+                /* If we were hoping for the daemon to write its PID file,
+                 * we can give up now. */
+                if (s->pid_file_pathspec) {
+                        log_warning_unit(u->id,
+                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
+                        service_unwatch_pid_file(s);
+                        if (s->state == SERVICE_START)
+                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
+                        else
+                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
+                }
+                break;
+
+        case SERVICE_RUNNING:
+                /* service_enter_running() will figure out what to do */
+                service_enter_running(s, SERVICE_SUCCESS);
+                break;
+
+        case SERVICE_STOP_SIGTERM:
+        case SERVICE_STOP_SIGKILL:
+
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_stop_post(s, SERVICE_SUCCESS);
+
+                break;
+
+        case SERVICE_STOP_POST:
+        case SERVICE_FINAL_SIGTERM:
+        case SERVICE_FINAL_SIGKILL:
+                if (main_pid_good(s) <= 0 && !control_pid_good(s))
+                        service_enter_dead(s, SERVICE_SUCCESS, true);
+
+                break;
+
+        default:
+                ;
+        }
+}
+
 static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
         Service *s = SERVICE(u);
         ServiceResult f;
@@ -3142,6 +3208,18 @@ static void service_sigchld_event(Unit *u, pid_t pid, int code, int status) {
 
         /* Notify clients about changed exit status */
         unit_add_to_dbus_queue(u);
+
+        /* We got one SIGCHLD for the service, let's watch all
+         * processes that are now running of the service, and watch
+         * that. Among the PIDs we then watch will be children
+         * reassigned to us, which hopefully allows us to identify
+         * when all children are gone */
+        unit_tidy_watch_pids(u, s->main_pid, s->control_pid);
+        unit_watch_all_pids(u);
+
+        /* If the PID set is empty now, then let's finish this off */
+        if (set_isempty(u->pids))
+                service_notify_cgroup_empty_event(u);
 }
 
 static int service_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata) {
@@ -3254,62 +3332,6 @@ static int service_dispatch_watchdog(sd_event_source *source, usec_t usec, void
         return 0;
 }
 
-static void service_notify_cgroup_empty_event(Unit *u) {
-        Service *s = SERVICE(u);
-
-        assert(u);
-
-        log_debug_unit(u->id, "%s: cgroup is empty", u->id);
-
-        switch (s->state) {
-
-                /* Waiting for SIGCHLD is usually more interesting,
-                 * because it includes return codes/signals. Which is
-                 * why we ignore the cgroup events for most cases,
-                 * except when we don't know pid which to expect the
-                 * SIGCHLD for. */
-
-        case SERVICE_START:
-        case SERVICE_START_POST:
-                /* If we were hoping for the daemon to write its PID file,
-                 * we can give up now. */
-                if (s->pid_file_pathspec) {
-                        log_warning_unit(u->id,
-                                         "%s never wrote its PID file. Failing.", UNIT(s)->id);
-                        service_unwatch_pid_file(s);
-                        if (s->state == SERVICE_START)
-                                service_enter_signal(s, SERVICE_FINAL_SIGTERM, SERVICE_FAILURE_RESOURCES);
-                        else
-                                service_enter_stop(s, SERVICE_FAILURE_RESOURCES);
-                }
-                break;
-
-        case SERVICE_RUNNING:
-                /* service_enter_running() will figure out what to do */
-                service_enter_running(s, SERVICE_SUCCESS);
-                break;
-
-        case SERVICE_STOP_SIGTERM:
-        case SERVICE_STOP_SIGKILL:
-
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
-                        service_enter_stop_post(s, SERVICE_SUCCESS);
-
-                break;
-
-        case SERVICE_STOP_POST:
-        case SERVICE_FINAL_SIGTERM:
-        case SERVICE_FINAL_SIGKILL:
-                if (main_pid_good(s) <= 0 && !control_pid_good(s))
-                        service_enter_dead(s, SERVICE_SUCCESS, true);
-
-                break;
-
-        default:
-                ;
-        }
-}
-
 static void service_notify_message(Unit *u, pid_t pid, char **tags) {
         Service *s = SERVICE(u);
         const char *e;
index 345521a711f0cb0c740c637cf3c6ebc764857a85..07eedcd6373f70be521698df5065a413ce2eb339 100644 (file)
@@ -482,6 +482,8 @@ void unit_free(Unit *u) {
 
         set_free_free(u->names);
 
+        unit_unwatch_all_pids(u);
+
         condition_free_list(u->conditions);
 
         unit_ref_unset(&u->slice);
@@ -1697,13 +1699,25 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
 }
 
 int unit_watch_pid(Unit *u, pid_t pid) {
+        int q, r;
+
         assert(u);
         assert(pid >= 1);
 
+        r = set_ensure_allocated(&u->pids, trivial_hash_func, trivial_compare_func);
+        if (r < 0)
+                return r;
+
         /* Watch a specific PID. We only support one unit watching
          * each PID for now. */
 
-        return hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        r = set_put(u->pids, LONG_TO_PTR(pid));
+
+        q = hashmap_put(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        if (q < 0)
+                return q;
+
+        return r;
 }
 
 void unit_unwatch_pid(Unit *u, pid_t pid) {
@@ -1711,6 +1725,102 @@ void unit_unwatch_pid(Unit *u, pid_t pid) {
         assert(pid >= 1);
 
         hashmap_remove_value(u->manager->watch_pids, LONG_TO_PTR(pid), u);
+        set_remove(u->pids, LONG_TO_PTR(pid));
+}
+
+static int watch_pids_in_path(Unit *u, const char *path) {
+        _cleanup_closedir_ DIR *d = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        int ret = 0, r;
+
+        assert(u);
+        assert(path);
+
+        /* Adds all PIDs from a specific cgroup path to the set of PIDs we watch. */
+
+        r = cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, path, &f);
+        if (r >= 0) {
+                pid_t pid;
+
+                while ((r = cg_read_pid(f, &pid)) > 0) {
+                        r = unit_watch_pid(u, pid);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        r = cg_enumerate_subgroups(SYSTEMD_CGROUP_CONTROLLER, path, &d);
+        if (r >= 0) {
+                char *fn;
+
+                while ((r = cg_read_subgroup(d, &fn)) > 0) {
+                        _cleanup_free_ char *p = NULL;
+
+                        p = strjoin(path, "/", fn, NULL);
+                        free(fn);
+
+                        if (!p)
+                                return -ENOMEM;
+
+                        r = watch_pids_in_path(u, p);
+                        if (r < 0 && ret >= 0)
+                                ret = r;
+                }
+                if (r < 0 && ret >= 0)
+                        ret = r;
+
+        } else if (ret >= 0)
+                ret = r;
+
+        return ret;
+}
+
+
+int unit_watch_all_pids(Unit *u) {
+        assert(u);
+
+        if (!u->cgroup_path)
+                return -ENOENT;
+
+        /* Adds all PIDs from our cgroup to the set of PIDs we watch */
+
+        return watch_pids_in_path(u, u->cgroup_path);
+}
+
+void unit_unwatch_all_pids(Unit *u) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        SET_FOREACH(e, u->pids, i)
+                hashmap_remove_value(u->manager->watch_pids, e, u);
+
+        set_free(u->pids);
+        u->pids = NULL;
+}
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2) {
+        Iterator i;
+        void *e;
+
+        assert(u);
+
+        /* Cleans dead PIDs from our list */
+
+        SET_FOREACH(e, u->pids, i) {
+                pid_t pid = PTR_TO_LONG(e);
+
+                if (pid == except1 || pid == except2)
+                        continue;
+
+                if (kill(pid, 0) < 0 && errno == ESRCH)
+                        set_remove(u->pids, e);
+        }
 }
 
 bool unit_job_is_applicable(Unit *u, JobType j) {
@@ -2979,17 +3089,7 @@ int unit_kill_context(
                                 log_warning_unit(u->id, "Failed to kill control group: %s", strerror(-r));
                 } else if (r > 0) {
 
-                        /* FIXME: Now, we don't actually wait for any
-                         * of the processes that are neither control
-                         * nor main process. We should wait for them
-                         * of course, but that's hard since the cgroup
-                         * notification logic is so unreliable. It is
-                         * not available at all in containers, and on
-                         * the host it gets confused by
-                         * subgroups. Hence, for now, let's not wait
-                         * for these processes -- but when the kernel
-                         * gets fixed we really should correct
-                         * that. */
+                        wait_for_exit = true;
 
                         if (c->send_sighup && !sigkill) {
                                 set_free(pid_set);
index c686aeccefda52913f5dde6d3d09ec9fd406ea0c..c104a8a9d574af1169b70694a48756ec60dceb42 100644 (file)
@@ -203,6 +203,11 @@ struct Unit {
         /* CGroup realize members queue */
         LIST_FIELDS(Unit, cgroup_queue);
 
+        /* PIDs we keep an eye on. Note that a unit might have many
+         * more, but these are the ones we care enough about to
+         * process SIGCHLD for */
+        Set *pids;
+
         /* Used during GC sweeps */
         unsigned gc_marker;
 
@@ -538,6 +543,10 @@ void unit_notify(Unit *u, UnitActiveState os, UnitActiveState ns, bool reload_su
 
 int unit_watch_pid(Unit *u, pid_t pid);
 void unit_unwatch_pid(Unit *u, pid_t pid);
+int unit_watch_all_pids(Unit *u);
+void unit_unwatch_all_pids(Unit *u);
+
+void unit_tidy_watch_pids(Unit *u, pid_t except1, pid_t except2);
 
 int unit_watch_bus_name(Unit *u, const char *name);
 void unit_unwatch_bus_name(Unit *u, const char *name);