From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 6 Feb 2018 10:57:35 +0000 (+0100)
Subject: cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and... 
X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=801b1cd08dd22c45f14662b8cc7a16e36fbba364;p=elogind.git

cgroup: add a new "can_delegate" flag to the unit vtable, and set it for scope and service units only

Currently we allowed delegation for alluntis with cgroup backing
except for slices. Let's make this a bit more strict for now, and only
allow this in service and scope units.

Let's also add a generic accessor unit_cgroup_delegate() for checking
whether a unit has delegation turned on that checks the new bool first.

Also, when doing transient units, let's explcitly refuse turning on
delegation for unit types that don#t support it. This is mostly
cosmetical as we wouldn't act on the delegation request anyway, but
certainly helpful for debugging.
---

diff --git a/src/core/cgroup.c b/src/core/cgroup.c
index 7900ac3f2..9361dd286 100644
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -1125,14 +1125,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
          *
          * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
 
-        if (u->type == UNIT_SLICE)
-                return 0;
-
-        c = unit_get_cgroup_context(u);
-        if (!c)
-                return 0;
-
-        if (!c->delegate)
+        if (!unit_cgroup_delegate(u))
                 return 0;
 
         if (cg_all_unified() <= 0) {
@@ -1143,6 +1136,7 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
                         return 0;
         }
 
+        assert_se(c = unit_get_cgroup_context(u));
         return c->delegate_controllers;
 }
 
@@ -1497,7 +1491,7 @@ static int unit_create_cgroup(
         u->cgroup_enabled_mask = enable_mask;
         u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
 
-        if (u->type != UNIT_SLICE && !c->delegate) {
+        if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
 
                 /* Then, possibly move things over, but not if
                  * subgroups may contain processes, which is the case
@@ -1864,6 +1858,31 @@ static int unit_watch_pids_in_path(Unit *u, const char *path) {
         return ret;
 }
 
+int unit_synthesize_cgroup_empty_event(Unit *u) {
+        int r;
+
+        assert(u);
+
+        /* Enqueue a synthetic cgroup empty event if this unit doesn't watch any PIDs anymore. This is compatibility
+         * support for non-unified systems where notifications aren't reliable, and hence need to take whatever we can
+         * get as notification source as soon as we stopped having any useful PIDs to watch for. */
+
+        if (!u->cgroup_path)
+                return -ENOENT;
+
+        r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
+        if (r < 0)
+                return r;
+        if (r > 0) /* On unified we have reliable notifications, and don't need this */
+                return 0;
+
+        if (!set_isempty(u->pids))
+                return 0;
+
+        unit_add_to_cgroup_empty_queue(u);
+        return 0;
+}
+
 int unit_watch_all_pids(Unit *u) {
         int r;
 
@@ -2234,40 +2253,46 @@ Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
 
 Unit *manager_get_unit_by_pid_cgroup(Manager *m, pid_t pid) {
         _cleanup_free_ char *cgroup = NULL;
-        int r;
 
         assert(m);
 
-        if (pid <= 0)
+        if (!pid_is_valid(pid))
                 return NULL;
 
-        r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
-        if (r < 0)
+        if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup) < 0)
                 return NULL;
 
         return manager_get_unit_by_cgroup(m, cgroup);
 }
 
 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
-        Unit *u;
+        Unit *u, **array;
 
         assert(m);
 
-        if (pid <= 0)
+        /* Note that a process might be owned by multiple units, we return only one here, which is good enough for most
+         * cases, though not strictly correct. We prefer the one reported by cgroup membership, as that's the most
+         * relevant one as children of the process will be assigned to that one, too, before all else. */
+
+        if (!pid_is_valid(pid))
                 return NULL;
 
-        if (pid == 1)
+        if (pid == getpid_cached())
                 return hashmap_get(m->units, SPECIAL_INIT_SCOPE);
 
-        u = hashmap_get(m->watch_pids1, PID_TO_PTR(pid));
+        u = manager_get_unit_by_pid_cgroup(m, pid);
         if (u)
                 return u;
 
-        u = hashmap_get(m->watch_pids2, PID_TO_PTR(pid));
+        u = hashmap_get(m->watch_pids, PID_TO_PTR(pid));
         if (u)
                 return u;
 
-        return manager_get_unit_by_pid_cgroup(m, pid);
+        array = hashmap_get(m->watch_pids, PID_TO_PTR(-pid));
+        if (array)
+                return array[0];
+
+        return NULL;
 }
 #endif // 0
 
@@ -2591,6 +2616,21 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
         }
 }
 
+bool unit_cgroup_delegate(Unit *u) {
+        CGroupContext *c;
+
+        assert(u);
+
+        if (!UNIT_VTABLE(u)->can_delegate)
+                return false;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return false;
+
+        return c->delegate;
+}
+
 void manager_invalidate_startup_units(Manager *m) {
         Iterator i;
         Unit *u;
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index 3724b8ba1..9fc4a534d 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -197,6 +197,8 @@ Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
 int unit_search_main_pid(Unit *u, pid_t *ret);
 int unit_watch_all_pids(Unit *u);
 
+int unit_synthesize_cgroup_empty_event(Unit *u);
+
 int unit_get_memory_current(Unit *u, uint64_t *ret);
 int unit_get_tasks_current(Unit *u, uint64_t *ret);
 int unit_get_cpu_usage(Unit *u, nsec_t *ret);
@@ -225,3 +227,5 @@ void manager_invalidate_startup_units(Manager *m);
 const char* cgroup_device_policy_to_string(CGroupDevicePolicy i) _const_;
 CGroupDevicePolicy cgroup_device_policy_from_string(const char *s) _pure_;
 #endif // 0
+
+bool unit_cgroup_delegate(Unit *u);