From: Lennart Poettering Date: Mon, 22 Aug 2011 22:37:35 +0000 (+0200) Subject: cgroup: optionally mount a specific cgroup controllers together, and add cpu+cpuacct... X-Git-Tag: v34~19 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=0c85a4f3efa2883c414ed8ff59aea263b85b7687 cgroup: optionally mount a specific cgroup controllers together, and add cpu+cpuacct to the default --- diff --git a/TODO b/TODO index be0878dd9..307445485 100644 --- a/TODO +++ b/TODO @@ -24,8 +24,6 @@ Features: * hide PAM/TCPWrap options in fragment parser when compile time disabled -* make arbitrary cgroups attributes settable - * when we automatically restart a service, ensure we retsart its rdeps, too. * allow Type=simple with PIDFile= @@ -46,8 +44,6 @@ Features: * logind: use sysfs path in device hash table instead of sysname, as soon as fb driver is fixed -* timedated: implement NTP calls - * implement Register= switch in .socket units to enable registration in Avahi, RPC and other socket registration services. @@ -84,10 +80,6 @@ Features: * GC unreferenced jobs (such as .device jobs) -* add JoinControllers= to system.conf to mount certain cgroup - controllers together in order to guarantee atomic creation/addition - of cgroups - * avoid DefaultStandardOutput=syslog to have any effect on StandardInput=socket services * cgroup_notify_empty(): recursively check groups up the tree, too @@ -102,6 +94,7 @@ Features: - cgroup best pratices to avoid stepping on each others toes - how to pass throw-away units to systemd, or dynamically change properties of existing units - how to integrate cgconfig and suchlike with systemd + - security properties * allow port=0 in .socket units diff --git a/man/systemd.conf.xml b/man/systemd.conf.xml index 8faedda6c..56490ef44 100644 --- a/man/systemd.conf.xml +++ b/man/systemd.conf.xml @@ -129,6 +129,26 @@ touch any hierarchies but its own. + + + JoinControllers=cpu,cpuacct + + Configures controllers + that shall be mounted in a single + hierarchy. By default systemd will + mount all controllers which are + enabled in the kernel in individual + hierachies, with the exception of + those listed in this setting. Takes a + space separated list of comma + separated controller names, in order + to allow multiple joined + hierarchies. Defaults to + 'cpu,cpuacct'. Pass an empty string to + ensure that systemd mounts all + controllers in separate + hierarchies. + diff --git a/src/automount.c b/src/automount.c index 16babd1fa..29b807de5 100644 --- a/src/automount.c +++ b/src/automount.c @@ -611,7 +611,7 @@ static int automount_start(Unit *u) { assert(a->state == AUTOMOUNT_DEAD || a->state == AUTOMOUNT_FAILED); - if (path_is_mount_point(a->where)) { + if (path_is_mount_point(a->where, false)) { log_error("Path %s is already a mount point, refusing start for %s", a->where, u->meta.id); return -EEXIST; } diff --git a/src/cgroup-util.c b/src/cgroup-util.c index 5fb248470..f74280f49 100644 --- a/src/cgroup-util.c +++ b/src/cgroup-util.c @@ -513,7 +513,7 @@ int cg_get_path(const char *controller, const char *path, const char *suffix, ch if (_unlikely_(!good)) { int r; - r = path_is_mount_point("/sys/fs/cgroup"); + r = path_is_mount_point("/sys/fs/cgroup", false); if (r <= 0) return r < 0 ? r : -ENOENT; diff --git a/src/cgroup.c b/src/cgroup.c index 0005a4fb6..dcf2c2feb 100644 --- a/src/cgroup.c +++ b/src/cgroup.c @@ -270,7 +270,7 @@ int manager_setup_cgroup(Manager *m) { assert(m); /* 0. Be nice to Ingo Molnar #628004 */ - if (path_is_mount_point("/sys/fs/cgroup/systemd") <= 0) { + if (path_is_mount_point("/sys/fs/cgroup/systemd", false) <= 0) { log_warning("No control group support available, not creating root group."); return 0; } diff --git a/src/main.c b/src/main.c index 1ff61849c..94401a5c0 100644 --- a/src/main.c +++ b/src/main.c @@ -75,6 +75,7 @@ static bool arg_sysv_console = true; static bool arg_mount_auto = true; static bool arg_swap_auto = true; static char **arg_default_controllers = NULL; +static char ***arg_join_controllers = NULL; static ExecOutput arg_default_std_output = EXEC_OUTPUT_INHERIT; static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT; @@ -494,6 +495,124 @@ static int config_parse_cpu_affinity2( return 0; } +static void strv_free_free(char ***l) { + char ***i; + + if (!l) + return; + + for (i = l; *i; i++) + strv_free(*i); + + free(l); +} + +static void free_join_controllers(void) { + if (!arg_join_controllers) + return; + + strv_free_free(arg_join_controllers); + arg_join_controllers = NULL; +} + +static int config_parse_join_controllers( + const char *filename, + unsigned line, + const char *section, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + unsigned n = 0; + char *state, *w; + size_t length; + + assert(filename); + assert(lvalue); + assert(rvalue); + + free_join_controllers(); + + FOREACH_WORD_QUOTED(w, length, rvalue, state) { + char *s, **l; + + s = strndup(w, length); + if (!s) + return -ENOMEM; + + l = strv_split(s, ","); + free(s); + + strv_uniq(l); + + if (strv_length(l) <= 1) { + strv_free(l); + continue; + } + + if (!arg_join_controllers) { + arg_join_controllers = new(char**, 2); + if (!arg_join_controllers) { + strv_free(l); + return -ENOMEM; + } + + arg_join_controllers[0] = l; + arg_join_controllers[1] = NULL; + + n = 1; + } else { + char ***a; + char ***t; + + t = new0(char**, n+2); + if (!t) { + strv_free(l); + return -ENOMEM; + } + + n = 0; + + for (a = arg_join_controllers; *a; a++) { + + if (strv_overlap(*a, l)) { + char **c; + + c = strv_merge(*a, l); + if (!c) { + strv_free(l); + strv_free_free(t); + return -ENOMEM; + } + + strv_free(l); + l = c; + } else { + char **c; + + c = strv_copy(*a); + if (!c) { + strv_free(l); + strv_free_free(t); + return -ENOMEM; + } + + t[n++] = c; + } + } + + t[n++] = strv_uniq(l); + + strv_free_free(arg_join_controllers); + arg_join_controllers = t; + } + } + + return 0; +} + static int parse_config_file(void) { const ConfigTableItem items[] = { @@ -514,6 +633,7 @@ static int parse_config_file(void) { { "Manager", "DefaultControllers", config_parse_strv, 0, &arg_default_controllers }, { "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output }, { "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error }, + { "Manager", "JoinControllers", config_parse_join_controllers, 0, &arg_join_controllers }, { NULL, NULL, NULL, 0, NULL } }; @@ -1084,14 +1204,28 @@ int main(int argc, char *argv[]) { log_open(); } + /* Initialize default unit */ if (set_default_unit(SPECIAL_DEFAULT_TARGET) < 0) goto finish; + /* By default, mount "cpu" and "cpuacct" together */ + arg_join_controllers = new(char**, 2); + if (!arg_join_controllers) + goto finish; + + arg_join_controllers[0] = strv_new("cpu", "cpuacct", NULL); + arg_join_controllers[1] = NULL; + + if (!arg_join_controllers[0]) + goto finish; + /* Mount /proc, /sys and friends, so that /proc/cmdline and * /proc/$PID/fd is available. */ - if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) - if (mount_setup(loaded_policy) < 0) + if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) { + r = mount_setup(loaded_policy); + if (r < 0) goto finish; + } /* Reset all signal handlers. */ assert_se(reset_all_signal_handlers() == 0); @@ -1199,6 +1333,12 @@ int main(int argc, char *argv[]) { if (getpid() == 1) install_crash_handler(); + if (geteuid() == 0 && !getenv("SYSTEMD_SKIP_API_MOUNTS")) { + r = mount_cgroup_controllers(arg_join_controllers); + if (r < 0) + goto finish; + } + log_full(arg_running_as == MANAGER_SYSTEM ? LOG_INFO : LOG_DEBUG, PACKAGE_STRING " running in %s mode. (" SYSTEMD_FEATURES "; " DISTRIBUTION ")", manager_running_as_to_string(arg_running_as)); @@ -1368,6 +1508,7 @@ finish: free(arg_default_unit); strv_free(arg_default_controllers); + free_join_controllers(); dbus_shutdown(); diff --git a/src/mount-setup.c b/src/mount-setup.c index ec4184de5..abb0c19d2 100644 --- a/src/mount-setup.c +++ b/src/mount-setup.c @@ -34,6 +34,8 @@ #include "macro.h" #include "util.h" #include "label.h" +#include "set.h" +#include "strv.h" #ifndef TTY_GID #define TTY_GID 5 @@ -104,7 +106,7 @@ static int mount_one(const MountPoint *p, bool relabel) { if (relabel) label_fix(p->where, true); - if ((r = path_is_mount_point(p->where)) < 0) + if ((r = path_is_mount_point(p->where, true)) < 0) return r; if (r > 0) @@ -133,7 +135,7 @@ static int mount_one(const MountPoint *p, bool relabel) { if (relabel) label_fix(p->where, false); - return 0; + return 1; } int mount_setup_early(void) { @@ -155,25 +157,33 @@ int mount_setup_early(void) { return r; } -static int mount_cgroup_controllers(void) { +int mount_cgroup_controllers(char ***join_controllers) { int r; FILE *f; char buf[LINE_MAX]; + Set *controllers; /* Mount all available cgroup controllers that are built into the kernel. */ - if (!(f = fopen("/proc/cgroups", "re"))) { + f = fopen("/proc/cgroups", "re"); + if (!f) { log_error("Failed to enumerate cgroup controllers: %m"); return 0; } + controllers = set_new(string_hash_func, string_compare_func); + if (!controllers) { + r = -ENOMEM; + log_error("Failed to allocate controller set."); + goto finish; + } + /* Ignore the header line */ (void) fgets(buf, sizeof(buf), f); for (;;) { - MountPoint p; - char *controller, *where; - int enabled = false; + char *controller; + int enabled = 0; if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) { @@ -190,8 +200,66 @@ static int mount_cgroup_controllers(void) { continue; } - if (asprintf(&where, "/sys/fs/cgroup/%s", controller) < 0) { + r = set_put(controllers, controller); + if (r < 0) { + log_error("Failed to add controller to set."); free(controller); + goto finish; + } + } + + for (;;) { + MountPoint p; + char *controller, *where, *options; + char ***k = NULL; + + controller = set_steal_first(controllers); + if (!controller) + break; + + if (join_controllers) + for (k = join_controllers; *k; k++) + if (strv_find(*k, controller)) + break; + + if (k && *k) { + char **i, **j; + + for (i = *k, j = *k; *i; i++) { + + if (!streq(*i, controller)) { + char *t; + + t = set_remove(controllers, *i); + if (!t) { + free(*i); + continue; + } + free(t); + } + + *(j++) = *i; + } + + *j = NULL; + + options = strv_join(*k, ","); + if (!options) { + log_error("Failed to join options"); + free(controller); + r = -ENOMEM; + goto finish; + } + + } else { + options = controller; + controller = NULL; + } + + where = strappend("/sys/fs/cgroup/", options); + if (!where) { + log_error("Failed to build path"); + free(options); r = -ENOMEM; goto finish; } @@ -200,7 +268,7 @@ static int mount_cgroup_controllers(void) { p.what = "cgroup"; p.where = where; p.type = "cgroup"; - p.options = controller; + p.options = options; p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV; p.fatal = false; @@ -208,13 +276,45 @@ static int mount_cgroup_controllers(void) { free(controller); free(where); - if (r < 0) + if (r < 0) { + free(options); goto finish; + } + + if (r > 0 && k && *k) { + char **i; + + for (i = *k; *i; i++) { + char *t; + + t = strappend("/sys/fs/cgroup/", *i); + if (!t) { + log_error("Failed to build path"); + r = -ENOMEM; + free(options); + goto finish; + } + + r = symlink(options, t); + free(t); + + if (r < 0 && errno != EEXIST) { + log_error("Failed to create symlink: %m"); + r = -errno; + free(options); + goto finish; + } + } + } + + free(options); } r = 0; finish: + set_free_free(controllers); + fclose(f); return r; @@ -301,5 +401,5 @@ int mount_setup(bool loaded_policy) { mkdir("/run/systemd", 0755); mkdir("/run/systemd/system", 0755); - return mount_cgroup_controllers(); + return 0; } diff --git a/src/mount-setup.h b/src/mount-setup.h index 9311cacc4..c1a27ba6b 100644 --- a/src/mount-setup.h +++ b/src/mount-setup.h @@ -28,6 +28,8 @@ int mount_setup_early(void); int mount_setup(bool loaded_policy); +int mount_cgroup_controllers(char ***join_controllers); + bool mount_point_is_api(const char *path); bool mount_point_ignore(const char *path); diff --git a/src/nspawn.c b/src/nspawn.c index 19d95b214..8c3cf6bfa 100644 --- a/src/nspawn.c +++ b/src/nspawn.c @@ -167,7 +167,7 @@ static int mount_all(const char *dest) { break; } - if ((t = path_is_mount_point(where)) < 0) { + if ((t = path_is_mount_point(where, false)) < 0) { log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); free(where); diff --git a/src/strv.c b/src/strv.c index 066dd0927..a52440d83 100644 --- a/src/strv.c +++ b/src/strv.c @@ -660,3 +660,16 @@ char **strv_parse_nulstr(const char *s, size_t l) { return v; } + +bool strv_overlap(char **a, char **b) { + char **i, **j; + + STRV_FOREACH(i, a) { + STRV_FOREACH(j, b) { + if (streq(*i, *j)) + return true; + } + } + + return false; +} diff --git a/src/strv.h b/src/strv.h index 46436a52d..d038c9f3b 100644 --- a/src/strv.h +++ b/src/strv.h @@ -68,6 +68,8 @@ char **strv_env_clean(char **l); char **strv_parse_nulstr(const char *s, size_t l); +bool strv_overlap(char **a, char **b); + #define STRV_FOREACH(s, l) \ for ((s) = (l); (s) && *(s); (s)++) diff --git a/src/system.conf b/src/system.conf index 4e06319b3..ad2cd7f3b 100644 --- a/src/system.conf +++ b/src/system.conf @@ -23,3 +23,4 @@ #DefaultControllers=cpu #DefaultStandardOutput=inherit #DefaultStandardError=inherit +#JoinControllers=cpu,cpuacct diff --git a/src/util.c b/src/util.c index e31e0f575..65d4b143d 100644 --- a/src/util.c +++ b/src/util.c @@ -2895,19 +2895,25 @@ ssize_t loop_write(int fd, const void *buf, size_t nbytes, bool do_poll) { return n; } -int path_is_mount_point(const char *t) { +int path_is_mount_point(const char *t, bool allow_symlink) { struct stat a, b; char *parent; int r; - if (lstat(t, &a) < 0) { + if (allow_symlink) + r = stat(t, &a); + else + r = lstat(t, &a); + + if (r < 0) { if (errno == ENOENT) return 0; return -errno; } - if ((r = parent_of_path(t, &parent)) < 0) + r = parent_of_path(t, &parent); + if (r < 0) return r; r = lstat(parent, &b); diff --git a/src/util.h b/src/util.h index b81edc8b2..a6b700c82 100644 --- a/src/util.h +++ b/src/util.h @@ -341,7 +341,7 @@ int fopen_temporary(const char *path, FILE **_f, char **_temp_path); ssize_t loop_read(int fd, void *buf, size_t nbytes, bool do_poll); ssize_t loop_write(int fd, const void *buf, size_t nbytes, bool do_poll); -int path_is_mount_point(const char *path); +int path_is_mount_point(const char *path, bool allow_symlink); bool is_device_path(const char *path);