From d4ce8390c5aa345602b51099e9d3ed3b74d3ec4d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 15 Aug 2016 18:13:36 -0400 Subject: [PATCH] core: use the unified hierarchy for the elogind cgroup controller hierarchy Currently, elogind uses either the legacy hierarchies or the unified hierarchy. When the legacy hierarchies are used, elogind uses a named legacy hierarchy mounted on /sys/fs/cgroup/elogind without any kernel controllers for process management. Due to the shortcomings in the legacy hierarchy, this involves a lot of workarounds and complexities. Because the unified hierarchy can be mounted and used in parallel to legacy hierarchies, there's no reason for elogind to use a legacy hierarchy for management even if the kernel resource controllers need to be mounted on legacy hierarchies. It can simply mount the unified hierarchy under /sys/fs/cgroup/elogind and use it without affecting other legacy hierarchies. This disables a significant amount of fragile workaround logics and would allow using features which depend on the unified hierarchy membership such bpf cgroup v2 membership test. In time, this would also allow deleting the said complexities. This patch updates elogind so that it prefers the unified hierarchy for the elogind cgroup controller hierarchy when legacy hierarchies are used for kernel resource controllers. * cg_unified(@controller) is introduced which tests whether the specific controller in on unified hierarchy and used to choose the unified hierarchy code path for process and service management when available. Kernel controller specific operations remain gated by cg_all_unified(). * "elogind.legacy_elogind_cgroup_controller" kernel argument can be used to force the use of legacy hierarchy for elogind cgroup controller. * nspawn: By default nspawn uses the same hierarchies as the host. If UNIFIED_CGROUP_HIERARCHY is set to 1, unified hierarchy is used for all. If 0, legacy for all. * nspawn: arg_unified_cgroup_hierarchy is made an enum and now encodes one of three options - legacy, only elogind controller on unified, and unified. The value is passed into mount setup functions and controls cgroup configuration. * nspawn: Interpretation of SYSTEMD_CGROUP_CONTROLLER to the actual mount option is moved to mount_legacy_cgroup_hierarchy() so that it can take an appropriate action depending on the configuration of the host. v2: - CGroupUnified enum replaces open coded integer values to indicate the cgroup operation mode. - Various style updates. v3: Fixed a bug in detect_unified_cgroup_hierarchy() introduced during v2. v4: Restored legacy container on unified host support and fixed another bug in detect_unified_cgroup_hierarchy(). --- src/basic/cgroup-util.c | 132 ++++++++++++++++++++++++++++++---------- src/basic/cgroup-util.h | 11 +++- src/core/cgroup.c | 34 ++++++----- src/core/mount-setup.c | 5 +- 4 files changed, 132 insertions(+), 50 deletions(-) diff --git a/src/basic/cgroup-util.c b/src/basic/cgroup-util.c index 7f417b8ce..604ce2b31 100644 --- a/src/basic/cgroup-util.c +++ b/src/basic/cgroup-util.c @@ -629,7 +629,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; @@ -657,7 +657,7 @@ static int controller_is_accessible(const char *controller) { if (!cg_controller_is_valid(controller)) return -EINVAL; - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) { @@ -876,7 +876,7 @@ int cg_set_task_access( if (r < 0) return r; - unified = cg_unified(); + unified = cg_unified(controller); if (unified < 0) return unified; if (unified) @@ -901,18 +901,17 @@ int cg_pid_get_path(const char *controller, pid_t pid, char **path) { assert(path); assert(pid >= 0); - unified = cg_unified(); + if (controller) { + if (!cg_controller_is_valid(controller)) + return -EINVAL; + } else + controller = SYSTEMD_CGROUP_CONTROLLER; + + unified = cg_unified(controller); if (unified < 0) return unified; - if (unified == 0) { - if (controller) { - if (!cg_controller_is_valid(controller)) - return -EINVAL; - } else - controller = SYSTEMD_CGROUP_CONTROLLER; - + if (unified == 0) cs = strlen(controller); - } fs = procfs_file_alloca(pid, "cgroup"); log_debug_elogind("Searching for PID %u in \"%s\" (controller \"%s\")", @@ -980,7 +979,7 @@ int cg_install_release_agent(const char *controller, const char *agent) { assert(agent); - unified = cg_unified(); + unified = cg_unified(controller); if (unified < 0) return unified; if (unified) /* doesn't apply to unified hierarchy */ @@ -1031,7 +1030,7 @@ int cg_uninstall_release_agent(const char *controller) { _cleanup_free_ char *fs = NULL; int r, unified; - unified = cg_unified(); + unified = cg_unified(controller); if (unified < 0) return unified; if (unified) /* Doesn't apply to unified hierarchy */ @@ -1087,7 +1086,7 @@ int cg_is_empty_recursive(const char *controller, const char *path) { if (controller && (isempty(path) || path_equal(path, "/"))) return false; - unified = cg_unified(); + unified = cg_unified(controller); if (unified < 0) return unified; @@ -2011,7 +2010,7 @@ int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path return r; /* If we are in the unified hierarchy, we are done now */ - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) @@ -2041,7 +2040,7 @@ int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_m if (r < 0) return r; - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) @@ -2093,7 +2092,7 @@ int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to return r; } - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) @@ -2126,7 +2125,7 @@ int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) if (r < 0) return r; - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) @@ -2153,7 +2152,7 @@ int cg_mask_supported(CGroupMask *ret) { * includes controllers we can make sense of and that are * actually accessible. */ - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (unified > 0) { @@ -2276,9 +2275,10 @@ int cg_kernel_controllers(Set *controllers) { } #endif // 0 -static thread_local int unified_cache = -1; +static thread_local CGroupUnified unified_cache = CGROUP_UNIFIED_UNKNOWN; + +static int cg_update_unified(void) { -int cg_unified(void) { struct statfs fs; /* Checks if we support the unified hierarchy. Returns an @@ -2286,34 +2286,57 @@ int cg_unified(void) { * have any other trouble determining if the unified hierarchy * is supported. */ - if (unified_cache >= 0) - return unified_cache; + if (unified_cache >= CGROUP_UNIFIED_NONE) + return 0; if (statfs("/sys/fs/cgroup/", &fs) < 0) return -errno; #if 0 /// UNNEEDED by elogind if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC)) - unified_cache = true; - else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) + unified_cache = CGROUP_UNIFIED_ALL; + else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { #else /* elogind can not support the unified hierarchy as a controller, * so always assume a classical hierarchy. * If, and only *if*, someone really wants to substitute systemd-login * in an environment managed by systemd with elogind, we might have to * add such a support. */ - if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) + if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC)) { #endif // 0 - unified_cache = false; - else + if (statfs("/sys/fs/cgroup/systemd/", &fs) < 0) + return -errno; + + unified_cache = F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC) ? + CGROUP_UNIFIED_SYSTEMD : CGROUP_UNIFIED_NONE; + } else return -ENOMEDIUM; - return unified_cache; + return 0; +} + +int cg_unified(const char *controller) { + + int r; + + r = cg_update_unified(); + if (r < 0) + return r; + + if (streq_ptr(controller, SYSTEMD_CGROUP_CONTROLLER)) + return unified_cache >= CGROUP_UNIFIED_SYSTEMD; + else + return unified_cache >= CGROUP_UNIFIED_ALL; +} + +int cg_all_unified(void) { + + return cg_unified(NULL); } #if 0 /// UNNEEDED by elogind void cg_unified_flush(void) { - unified_cache = -1; + unified_cache = CGROUP_UNIFIED_UNKNOWN; } int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { @@ -2326,7 +2349,7 @@ int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) { if (supported == 0) return 0; - unified = cg_unified(); + unified = cg_all_unified(); if (unified < 0) return unified; if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */ @@ -2365,7 +2388,7 @@ bool cg_is_unified_wanted(void) { /* If the hierarchy is already mounted, then follow whatever * was chosen for it. */ - unified = cg_unified(); + unified = cg_all_unified(); if (unified >= 0) return unified; @@ -2397,6 +2420,49 @@ bool cg_is_legacy_wanted(void) { #else bool cg_is_legacy_wanted(void) { return true; + +bool cg_is_unified_systemd_controller_wanted(void) { + static thread_local int wanted = -1; + int r, unified; + + /* If the unified hierarchy is requested in full, no need to + * bother with this. */ + if (cg_is_unified_wanted()) + return 0; + + /* If the hierarchy is already mounted, then follow whatever + * was chosen for it. */ + unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); + if (unified >= 0) + return unified; + + /* Otherwise, let's see what the kernel command line has to + * say. Since checking that is expensive, let's cache the + * result. */ + if (wanted >= 0) + return wanted; + + r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller", NULL); + if (r > 0) { + wanted = false; + } else { + _cleanup_free_ char *value = NULL; + + r = get_proc_cmdline_key("systemd.legacy_systemd_cgroup_controller=", &value); + if (r < 0) + return true; + + if (r == 0) + wanted = true; + else + wanted = parse_boolean(value) <= 0; + } + + return wanted; +} + +bool cg_is_legacy_systemd_controller_wanted(void) { + return cg_is_legacy_wanted() && !cg_is_unified_systemd_controller_wanted(); } #endif // 0 diff --git a/src/basic/cgroup-util.h b/src/basic/cgroup-util.h index 56610474e..d6e2a8ee1 100644 --- a/src/basic/cgroup-util.h +++ b/src/basic/cgroup-util.h @@ -114,6 +114,13 @@ static inline bool CGROUP_BLKIO_WEIGHT_IS_OK(uint64_t x) { } #endif // 0 +typedef enum CGroupUnified { + CGROUP_UNIFIED_UNKNOWN = -1, + CGROUP_UNIFIED_NONE = 0, /* Both systemd and controllers on legacy */ + CGROUP_UNIFIED_SYSTEMD = 1, /* Only systemd on unified */ + CGROUP_UNIFIED_ALL = 2, /* Both systemd and controllers on unified */ +} CGroupUnified; + /* * General rules: * @@ -237,13 +244,15 @@ int cg_kernel_controllers(Set *controllers); bool cg_ns_supported(void); #endif // 0 -int cg_unified(void); #if 0 /// UNNEEDED by elogind +int cg_all_unified(void); +int cg_unified(const char *controller); void cg_unified_flush(void); bool cg_is_unified_wanted(void); #endif // 0 bool cg_is_legacy_wanted(void); +bool cg_is_unified_systemd_controller_wanted(void); const char* cgroup_controller_to_string(CGroupController c) _const_; CGroupController cgroup_controller_from_string(const char *s) _pure_; diff --git a/src/core/cgroup.c b/src/core/cgroup.c index b53b35dad..56f065fdb 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -666,7 +666,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { bool has_weight = cgroup_context_has_cpu_weight(c); bool has_shares = cgroup_context_has_cpu_shares(c); - if (cg_unified() > 0) { + if (cg_all_unified() > 0) { uint64_t weight; if (has_weight) @@ -847,7 +847,7 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) { } if ((mask & CGROUP_MASK_MEMORY) && !is_root) { - if (cg_unified() > 0) { + if (cg_all_unified() > 0) { uint64_t max = c->memory_max; if (cgroup_context_has_unified_memory_config(c)) @@ -1021,7 +1021,7 @@ CGroupMask unit_get_own_mask(Unit *u) { e = unit_get_exec_context(u); if (!e || exec_context_maintains_privileges(e) || - cg_unified() > 0) + cg_all_unified() > 0) return _CGROUP_MASK_ALL; } @@ -1247,7 +1247,7 @@ int unit_watch_cgroup(Unit *u) { return 0; /* Only applies to the unified hierarchy */ - r = cg_unified(); + r = cg_unified(SYSTEMD_CGROUP_CONTROLLER); if (r < 0) return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m"); if (r == 0) @@ -1647,7 +1647,7 @@ int unit_watch_all_pids(Unit *u) { if (!u->cgroup_path) return -ENOENT; - if (cg_unified() > 0) /* On unified we can use proper notifications */ + if (cg_unified(SYSTEMD_CGROUP_CONTROLLER) > 0) /* On unified we can use proper notifications */ return 0; return unit_watch_pids_in_path(u, u->cgroup_path); @@ -1721,7 +1721,7 @@ static int on_cgroup_inotify_event(sd_event_source *s, int fd, uint32_t revents, int manager_setup_cgroup(Manager *m) { _cleanup_free_ char *path = NULL; CGroupController c; - int r, unified; + int r, all_unified, systemd_unified; char *e; assert(m); @@ -1762,11 +1762,17 @@ int manager_setup_cgroup(Manager *m) { if (r < 0) return log_error_errno(r, "Cannot find cgroup mount point: %m"); - unified = cg_unified(); - if (unified < 0) - return log_error_errno(r, "Couldn't determine if we are running in the unified hierarchy: %m"); - if (unified > 0) + all_unified = cg_all_unified(); + systemd_unified = cg_unified(SYSTEMD_CGROUP_CONTROLLER); + + if (all_unified < 0 || systemd_unified < 0) + return log_error_errno(all_unified < 0 ? all_unified : systemd_unified, + "Couldn't determine if we are running in the unified hierarchy: %m"); + + if (all_unified > 0) log_debug("Unified cgroup hierarchy is located at %s.", path); + else if (systemd_unified > 0) + log_debug("Unified cgroup hierarchy is located at %s. Controllers are on legacy hierarchies.", path); else log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path); @@ -1774,7 +1780,7 @@ int manager_setup_cgroup(Manager *m) { const char *scope_path; /* 3. Install agent */ - if (unified) { + if (systemd_unified) { /* In the unified hierarchy we can get * cgroup empty notifications via inotify. */ @@ -1852,7 +1858,7 @@ int manager_setup_cgroup(Manager *m) { return log_error_errno(errno, "Failed to open pin file: %m"); /* 6. Always enable hierarchical support if it exists... */ - if (!unified) + if (!all_unified) (void) cg_set_attribute("memory", "/", "memory.use_hierarchy", "1"); } @@ -2004,7 +2010,7 @@ int unit_get_memory_current(Unit *u, uint64_t *ret) { if ((u->cgroup_realized_mask & CGROUP_MASK_MEMORY) == 0) return -ENODATA; - if (cg_unified() <= 0) + if (cg_all_unified() <= 0) r = cg_get_attribute("memory", u->cgroup_path, "memory.usage_in_bytes", &v); else r = cg_get_attribute("memory", u->cgroup_path, "memory.current", &v); @@ -2049,7 +2055,7 @@ static int unit_get_cpu_usage_raw(Unit *u, nsec_t *ret) { if (!u->cgroup_path) return -ENODATA; - if (cg_unified() > 0) { + if (cg_all_unified() > 0) { const char *keys[] = { "usage_usec", NULL }; _cleanup_free_ char *val = NULL; uint64_t us; diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index ca8c63b8a..c551fa62b 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -105,10 +105,11 @@ static const MountPoint mount_table[] = { { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, #if 0 /// UNNEEDED by elogind + { "cgroup", "/sys/fs/cgroup/systemd", "cgroup2", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, + cg_is_unified_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_IN_CONTAINER }, + cg_is_legacy_systemd_controller_wanted, MNT_IN_CONTAINER }, { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, - cg_is_legacy_wanted, MNT_FATAL|MNT_IN_CONTAINER }, #else { "cgroup", "/sys/fs/cgroup/elogind", "cgroup", "none,name=elogind,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV, cg_is_legacy_wanted, MNT_IN_CONTAINER }, -- 2.30.2