From: Lennart Poettering Date: Wed, 17 Jan 2018 17:50:27 +0000 (+0100) Subject: core: propagate TasksMax= on the root slice to sysctls X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=6c81962c245a341609319421ba8cdf0ac5962744;p=elogind.git core: propagate TasksMax= on the root slice to sysctls The cgroup "pids" controller is not supported on the root cgroup. However we expose TasksMax= on it, but currently don't actually apply it to anything. Let's correct this: if set, let's propagate things to the right sysctls. This way we can expose TasksMax= on all units in a somewhat sensible way. --- diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 153a0dda8..7900ac3f2 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -1029,19 +1029,46 @@ static void cgroup_context_apply( } } - if ((apply_mask & CGROUP_MASK_PIDS) && !is_root) { + if (apply_mask & CGROUP_MASK_PIDS) { + + if (is_root) { + /* So, the "pids" controller does not expose anything on the root cgroup, in order not to + * replicate knobs exposed elsewhere needlessly. We abstract this away here however, and when + * the knobs of the root cgroup are modified propagate this to the relevant sysctls. There's a + * non-obvious asymmetry however: unlike the cgroup properties we don't really want to take + * exclusive ownership of the sysctls, but we still want to honour things if the user sets + * limits. Hence we employ sort of a one-way strategy: when the user sets a bounded limit + * through us it counts. When the user afterwards unsets it again (i.e. sets it to unbounded) + * it also counts. But if the user never set a limit through us (i.e. we are the default of + * "unbounded") we leave things unmodified. For this we manage a global boolean that we turn on + * the first time we set a limit. Note that this boolean is flushed out on manager reload, + * which is desirable so that there's an offical way to release control of the sysctl from + * systemd: set the limit to unbounded and reload. */ + + if (c->tasks_max != CGROUP_LIMIT_MAX) { + u->manager->sysctl_pid_max_changed = true; + r = procfs_tasks_set_limit(c->tasks_max); + } else if (u->manager->sysctl_pid_max_changed) + r = procfs_tasks_set_limit(TASKS_MAX); + else + r = 0; - if (c->tasks_max != CGROUP_LIMIT_MAX) { - char buf[DECIMAL_STR_MAX(uint64_t) + 2]; + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to write to tasks limit sysctls: %m"); - sprintf(buf, "%" PRIu64 "\n", c->tasks_max); - r = cg_set_attribute("pids", path, "pids.max", buf); - } else - r = cg_set_attribute("pids", path, "pids.max", "max"); + } else { + if (c->tasks_max != CGROUP_LIMIT_MAX) { + char buf[DECIMAL_STR_MAX(uint64_t) + 2]; - if (r < 0) - log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, - "Failed to set pids.max: %m"); + sprintf(buf, "%" PRIu64 "\n", c->tasks_max); + r = cg_set_attribute("pids", path, "pids.max", buf); + } else + r = cg_set_attribute("pids", path, "pids.max", "max"); + if (r < 0) + log_unit_full(u, IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r, + "Failed to set pids.max: %m"); + } } if (apply_bpf)