cgroup-util: rework cg_get_keyed_attribute() a bit

author Lennart Poettering <lennart@poettering.net>

Fri, 9 Feb 2018 17:35:52 +0000 (18:35 +0100)

committer Sven Eden <yamakuzure@gmx.net>

Wed, 30 May 2018 05:59:09 +0000 (07:59 +0200)
author Lennart Poettering <lennart@poettering.net>
Fri, 9 Feb 2018 17:35:52 +0000 (18:35 +0100)
committer Sven Eden <yamakuzure@gmx.net>
Wed, 30 May 2018 05:59:09 +0000 (07:59 +0200)
diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c

index 7b6f66661b9cc110f5fe1e8b280667e011325b83..9e5c540c6fc9fb8dc1a39e9b6d045a5dab92f5c0 100644 (file)
--- a/src/basic/cgroup-util.c
+++ b/src/basic/cgroup-util.c
@@ -1076,11 +1076,13 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
                                  continue;
  
                          *e = 0;
-                        FOREACH_WORD_SEPARATOR(word, k, l, ",", state)
+                        FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
                                  if (k == cs && memcmp(word, controller_str, cs) == 0) {
                                          found = true;
                                          break;
                                  }
+                        }
+
                          if (!found)
                                  continue;
                  }
@@ -2136,46 +2138,95 @@ int cg_get_attribute(const char *controller, const char *path, const char *attri
  }
  
  #if 0 /// UNNEEDED by elogind
-int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values) {
-        _cleanup_free_ char *filename = NULL, *content = NULL;
-        char *line, *p;
-        int i, r;
+int cg_get_keyed_attribute(
+                const char *controller,
+                const char *path,
+                const char *attribute,
+                char **keys,
+                char **ret_values) {
+
+        _cleanup_free_ char *filename = NULL, *contents = NULL;
+        _cleanup_fclose_ FILE *f = NULL;
+        const char *p;
+        size_t n, i;
+        char **v;
+        int r;
  
-        for (i = 0; keys[i]; i++)
-                values[i] = NULL;
+        /* Reads one or more fields of a cgroupsv2 keyed attribute file. The 'keys' parameter should be an strv with
+         * all keys to retrieve. The 'ret_values' parameter should be passed as string size with the same number of
+         * entries as 'keys'. On success each entry will be set to the value of the matching key.
+         *
+         * If the attribute file doesn't exist at all returns ENOENT, if any key is not found returns ENXIO. */
  
          r = cg_get_path(controller, path, attribute, &filename);
          if (r < 0)
                  return r;
  
-        r = read_full_file(filename, &content, NULL);
+        r = read_full_file(filename, &contents, NULL);
          if (r < 0)
                  return r;
  
-        p = content;
-        while ((line = strsep(&p, "\n"))) {
-                char *key;
+        n = strv_length(keys);
+        if (n == 0) /* No keys to retrieve? That's easy, we are done then */
+                return 0;
  
-                key = strsep(&line, " ");
+        /* Let's build this up in a temporary array for now in order not to clobber the return parameter on failure */
+        v = newa0(char*, n);
  
-                for (i = 0; keys[i]; i++) {
-                        if (streq(key, keys[i])) {
-                                values[i] = strdup(line);
-                                break;
+        for (p = contents; *p;) {
+                const char *w = NULL;
+                size_t n_done = 0;
+
+                for (i = 0; i < n; i++) {
+                        if (v[i])
+                                n_done ++;
+                        else {
+                                w = first_word(p, keys[i]);
+                                if (w)
+                                        break;
                          }
                  }
-        }
  
-        for (i = 0; keys[i]; i++) {
-                if (!values[i]) {
-                        for (i = 0; keys[i]; i++) {
-                                values[i] = mfree(values[i]);
+                if (w) {
+                        char *c;
+                        size_t l;
+
+                        l = strcspn(w, NEWLINE);
+                        c = strndup(w, l);
+                        if (!c) {
+                                r = -ENOMEM;
+                                goto fail;
                          }
-                        return -ENOENT;
+
+                        v[i] = c;
+                        n_done++;
+
+                        if (n_done >= n)
+                                goto done;
+
+                        p = w + l;
+                } else {
+                        if (n_done >= n)
+                                goto done;
+
+                        p += strcspn(p, NEWLINE);
                  }
+
+                p += strspn(p, NEWLINE);
          }
  
+        r = -ENXIO;
+
+fail:
+        for (i = 0; i < n; i++)
+                free(v[i]);
+
+        return r;
+
+done:
+        memcpy(ret_values, v, sizeof(char*) * n);
          return 0;
+
  }
  
  int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h

index 455921d37d5af8bfa307a8cc98aee435972e2684..148d298de66dc2ce98d55fd58a697fc5a8fba43f 100644 (file)
--- a/src/basic/cgroup-util.h
+++ b/src/basic/cgroup-util.h
@@ -35,8 +35,8 @@
  #if 0 /// elogind has them set through config.h
  #define SYSTEMD_CGROUP_CONTROLLER_LEGACY "name=elogind"
  #define SYSTEMD_CGROUP_CONTROLLER_HYBRID "name=unified"
-#define SYSTEMD_CGROUP_CONTROLLER "_elogind"
  #endif // 0
+#define SYSTEMD_CGROUP_CONTROLLER "_elogind"
  
  /* An enum of well known cgroup controllers */
  typedef enum CGroupController {
@@ -195,7 +195,7 @@ int cg_create_and_attach(const char *controller, const char *path, pid_t pid);
  int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value);
  int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret);
  #if 0 /// UNNEEDED by elogind
-int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, const char **keys, char **values);
+int cg_get_keyed_attribute(const char *controller, const char *path, const char *attribute, char **keys, char **values);
  
  int cg_set_access(const char *controller, const char *path, uid_t uid, gid_t gid);
  
diff --git a/src/core/cgroup.c b/src/core/cgroup.c

index dc055669513536de8752c69c30b9c2a9891a4a9b..51f41e03b5aeace52e6828ab8b14f1caf6d6d425 100644 (file)
--- a/src/core/cgroup.c
+++ b/src/core/cgroup.c
@@ -24,7 +24,6 @@
  #include "alloc-util.h"
  //#include "blockdev-util.h"
  //#include "bpf-firewall.h"
-//#include "bus-error.h"
  #include "cgroup-util.h"
  #include "cgroup.h"
  #include "fd-util.h"
@@ -694,14 +693,20 @@ static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_
  }
  
  static void cgroup_apply_firewall(Unit *u) {
+        int r;
+
          assert(u);
  
-        /* Best-effort: let's apply IP firewalling and/or accounting if that's enabled */
+        if (u->type == UNIT_SLICE) /* Skip this for slice units, they are inner cgroup nodes, and since bpf/cgroup is
+                                    * not recursive we don't ever touch the bpf on them */
+                return;
  
-        if (bpf_firewall_compile(u) < 0)
+        r = bpf_firewall_compile(u);
+        if (r < 0)
                  return;
  
          (void) bpf_firewall_install(u);
+        return;
  }
  
  static void cgroup_context_apply(
@@ -1120,7 +1125,14 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
           *
           * Note that on the unified hierarchy it is safe to delegate controllers to unprivileged services. */
  
-        if (!unit_cgroup_delegate(u))
+        if (u->type == UNIT_SLICE)
+                return 0;
+
+        c = unit_get_cgroup_context(u);
+        if (!c)
+                return 0;
+
+        if (!c->delegate)
                  return 0;
  
          if (cg_all_unified() <= 0) {
@@ -1131,7 +1143,6 @@ CGroupMask unit_get_delegate_mask(Unit *u) {
                          return 0;
          }
  
-        assert_se(c = unit_get_cgroup_context(u));
          return c->delegate_controllers;
  }
  
@@ -1222,6 +1233,11 @@ bool unit_get_needs_bpf(Unit *u) {
          Unit *p;
          assert(u);
  
+        /* We never attach BPF to slice units, as they are inner cgroup nodes and cgroup/BPF is not recursive at the
+         * moment. */
+        if (u->type == UNIT_SLICE)
+                return false;
+
          c = unit_get_cgroup_context(u);
          if (!c)
                  return false;
@@ -1294,12 +1310,13 @@ void unit_update_cgroup_members_masks(Unit *u) {
          }
  }
  
-const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
+static const char *migrate_callback(CGroupMask mask, void *userdata) {
+        Unit *u = userdata;
  
-        /* Returns the realized cgroup path of the specified unit where all specified controllers are available. */
+        assert(mask != 0);
+        assert(u);
  
          while (u) {
-
                  if (u->cgroup_path &&
                      u->cgroup_realized &&
                      (u->cgroup_realized_mask & mask) == mask)
@@ -1311,10 +1328,6 @@ const char *unit_get_realized_cgroup_path(Unit *u, CGroupMask mask) {
          return NULL;
  }
  
-static const char *migrate_callback(CGroupMask mask, void *userdata) {
-        return unit_get_realized_cgroup_path(userdata, mask);
-}
-
  char *unit_default_cgroup_path(Unit *u) {
          _cleanup_free_ char *escaped = NULL, *slice = NULL;
          int r;
@@ -1484,7 +1497,7 @@ static int unit_create_cgroup(
          u->cgroup_enabled_mask = enable_mask;
          u->cgroup_bpf_state = needs_bpf ? UNIT_CGROUP_BPF_ON : UNIT_CGROUP_BPF_OFF;
  
-        if (u->type != UNIT_SLICE && !unit_cgroup_delegate(u)) {
+        if (u->type != UNIT_SLICE && !c->delegate) {
  
                  /* Then, possibly move things over, but not if
                   * subgroups may contain processes, which is the case
@@ -1497,142 +1510,19 @@ static int unit_create_cgroup(
          return 0;
  }
  
-static int unit_attach_pid_to_cgroup_via_bus(Unit *u, pid_t pid, const char *suffix_path) {
-        _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
-        char *pp;
+int unit_attach_pids_to_cgroup(Unit *u) {
          int r;
-
          assert(u);
  
-        if (MANAGER_IS_SYSTEM(u->manager))
-                return -EINVAL;
-
-        if (!u->manager->system_bus)
-                return -EIO;
-
-        if (!u->cgroup_path)
-                return -EINVAL;
-
-        /* Determine this unit's cgroup path relative to our cgroup root */
-        pp = path_startswith(u->cgroup_path, u->manager->cgroup_root);
-        if (!pp)
-                return -EINVAL;
-
-        pp = strjoina("/", pp, suffix_path);
-        path_kill_slashes(pp);
-
-        r = sd_bus_call_method(u->manager->system_bus,
-                               "org.freedesktop.systemd1",
-                               "/org/freedesktop/systemd1",
-                               "org.freedesktop.systemd1.Manager",
-                               "AttachProcessesToUnit",
-                               &error, NULL,
-                               "ssau",
-                               NULL /* empty unit name means client's unit, i.e. us */, pp, 1, (uint32_t) pid);
-        if (r < 0)
-                return log_unit_debug_errno(u, r, "Failed to attach unit process " PID_FMT " via the bus: %s", pid, bus_error_message(&error, r));
-
-        return 0;
-}
-
-int unit_attach_pids_to_cgroup(Unit *u, Set *pids, const char *suffix_path) {
-        CGroupMask delegated_mask;
-        const char *p;
-        Iterator i;
-        void *pidp;
-        int r, q;
-
-        assert(u);
-
-        if (!UNIT_HAS_CGROUP_CONTEXT(u))
-                return -EINVAL;
-
-        if (set_isempty(pids))
-                return 0;
-
          r = unit_realize_cgroup(u);
          if (r < 0)
                  return r;
  
-        if (isempty(suffix_path))
-                p = u->cgroup_path;
-        else
-                p = strjoina(u->cgroup_path, "/", suffix_path);
-
-        delegated_mask = unit_get_delegate_mask(u);
-
-        r = 0;
-        SET_FOREACH(pidp, pids, i) {
-                pid_t pid = PTR_TO_PID(pidp);
-                CGroupController c;
-
-                /* First, attach the PID to the main cgroup hierarchy */
-                q = cg_attach(SYSTEMD_CGROUP_CONTROLLER, p, pid);
-                if (q < 0) {
-                        log_unit_debug_errno(u, q, "Couldn't move process " PID_FMT " to requested cgroup '%s': %m", pid, p);
-
-                        if (MANAGER_IS_USER(u->manager) && IN_SET(q, -EPERM, -EACCES)) {
-                                int z;
-
-                                /* If we are in a user instance, and we can't move the process ourselves due to
-                                 * permission problems, let's ask the system instance about it instead. Since it's more
-                                 * privileged it might be able to move the process across the leaves of a subtree who's
-                                 * top node is not owned by us. */
-
-                                z = unit_attach_pid_to_cgroup_via_bus(u, pid, suffix_path);
-                                if (z < 0)
-                                        log_unit_debug_errno(u, z, "Couldn't move process " PID_FMT " to requested cgroup '%s' via the system bus either: %m", pid, p);
-                                else
-                                        continue; /* When the bus thing worked via the bus we are fully done for this PID. */
-                        }
-
-                        if (r >= 0)
-                                r = q; /* Remember first error */
-
-                        continue;
-                }
-
-                q = cg_all_unified();
-                if (q < 0)
-                        return q;
-                if (q > 0)
-                        continue;
-
-                /* In the legacy hierarchy, attach the process to the request cgroup if possible, and if not to the
-                 * innermost realized one */
-
-                for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
-                        CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
-                        const char *realized;
-
-                        if (!(u->manager->cgroup_supported & bit))
-                                continue;
-
-                        /* If this controller is delegated and realized, honour the caller's request for the cgroup suffix. */
-                        if (delegated_mask & u->cgroup_realized_mask & bit) {
-                                q = cg_attach(cgroup_controller_to_string(c), p, pid);
-                                if (q >= 0)
-                                        continue; /* Success! */
-
-                                log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to requested cgroup %s in controller %s, falling back to unit's cgroup: %m",
-                                                     pid, p, cgroup_controller_to_string(c));
-                        }
-
-                        /* So this controller is either not delegate or realized, or something else weird happened. In
-                         * that case let's attach the PID at least to the closest cgroup up the tree that is
-                         * realized. */
-                        realized = unit_get_realized_cgroup_path(u, bit);
-                        if (!realized)
-                                continue; /* Not even realized in the root slice? Then let's not bother */
-
-                        q = cg_attach(cgroup_controller_to_string(c), realized, pid);
-                        if (q < 0)
-                                log_unit_debug_errno(u, q, "Failed to attach PID " PID_FMT " to realized cgroup %s in controller %s, ignoring: %m",
-                                                     pid, realized, cgroup_controller_to_string(c));
-                }
-        }
+        r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
+        if (r < 0)
+                return r;
  
-        return r;
+        return 0;
  }
  
  static void cgroup_xattr_apply(Unit *u) {
@@ -2527,16 +2417,17 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) {
          if (r < 0)
                  return r;
          if (r > 0) {
-                const char *keys[] = { "usage_usec", NULL };
                  _cleanup_free_ char *val = NULL;
                  uint64_t us;
  
                  if ((u->cgroup_realized_mask & CGROUP_MASK_CPU) == 0)
                          return -ENODATA;
  
-                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", keys, &val);
+                r = cg_get_keyed_attribute("cpu", u->cgroup_path, "cpu.stat", STRV_MAKE("usage_usec"), &val);
                  if (r < 0)
                          return r;
+                if (IN_SET(r, -ENOENT, -ENXIO))
+                        return -ENODATA;
  
                  r = safe_atou64(val, &us);
                  if (r < 0)
@@ -2612,6 +2503,13 @@ int unit_get_ip_accounting(
          assert(metric < _CGROUP_IP_ACCOUNTING_METRIC_MAX);
          assert(ret);
  
+        /* IP accounting is currently not recursive, and hence we refuse to return any data for slice nodes. Slices are
+         * inner cgroup nodes and hence have no processes directly attached, hence their counters would be zero
+         * anyway. And if we block this now we can later open this up, if the kernel learns recursive BPF cgroup
+         * filters. */
+        if (u->type == UNIT_SLICE)
+                return -ENODATA;
+
          if (!UNIT_CGROUP_BOOL(u, ip_accounting))
                  return -ENODATA;
  
@@ -2725,21 +2623,6 @@ void unit_invalidate_cgroup_bpf(Unit *u) {
          }
  }
  
-bool unit_cgroup_delegate(Unit *u) {
-        CGroupContext *c;
-
-        assert(u);
-
-        if (!UNIT_VTABLE(u)->can_delegate)
-                return false;
-
-        c = unit_get_cgroup_context(u);
-        if (!c)
-                return false;
-
-        return c->delegate;
-}
-
  void manager_invalidate_startup_units(Manager *m) {
          Iterator i;
          Unit *u;
author	Lennart Poettering <lennart@poettering.net>
	Fri, 9 Feb 2018 17:35:52 +0000 (18:35 +0100)
committer	Sven Eden <yamakuzure@gmx.net>
	Wed, 30 May 2018 05:59:09 +0000 (07:59 +0200)
src/basic/cgroup-util.c		patch \| blob \| history
src/basic/cgroup-util.h		patch \| blob \| history
src/core/cgroup.c		patch \| blob \| history