1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
36 #include "path-util.h"
41 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
42 _cleanup_free_ char *fs = NULL;
48 r = cg_get_path(controller, path, "cgroup.procs", &fs);
60 int cg_read_pid(FILE *f, pid_t *_pid) {
63 /* Note that the cgroup.procs might contain duplicates! See
64 * cgroups.txt for details. */
70 if (fscanf(f, "%lu", &ul) != 1) {
75 return errno ? -errno : -EIO;
85 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
86 _cleanup_free_ char *fs = NULL;
92 /* This is not recursive! */
94 r = cg_get_path(controller, path, NULL, &fs);
106 int cg_read_subgroup(DIR *d, char **fn) {
112 FOREACH_DIRENT(de, d, return -errno) {
115 if (de->d_type != DT_DIR)
118 if (streq(de->d_name, ".") ||
119 streq(de->d_name, ".."))
122 b = strdup(de->d_name);
133 int cg_rmdir(const char *controller, const char *path) {
134 _cleanup_free_ char *p = NULL;
137 r = cg_get_path(controller, path, NULL, &p);
142 if (r < 0 && errno != ENOENT)
148 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
149 _cleanup_set_free_ Set *allocated_set = NULL;
156 /* This goes through the tasks list and kills them all. This
157 * is repeated until no further processes are added to the
158 * tasks list, to properly handle forking processes */
161 s = allocated_set = set_new(NULL);
169 _cleanup_fclose_ FILE *f = NULL;
173 r = cg_enumerate_processes(controller, path, &f);
175 if (ret >= 0 && r != -ENOENT)
181 while ((r = cg_read_pid(f, &pid)) > 0) {
183 if (ignore_self && pid == my_pid)
186 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
189 /* If we haven't killed this process yet, kill
191 if (kill(pid, sig) < 0) {
192 if (ret >= 0 && errno != ESRCH)
195 if (sigcont && sig != SIGKILL)
204 r = set_put(s, LONG_TO_PTR(pid));
220 /* To avoid racing against processes which fork
221 * quicker than we can kill them we repeat this until
222 * no new pids need to be killed. */
229 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
230 _cleanup_set_free_ Set *allocated_set = NULL;
231 _cleanup_closedir_ DIR *d = NULL;
239 s = allocated_set = set_new(NULL);
244 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
246 r = cg_enumerate_subgroups(controller, path, &d);
248 if (ret >= 0 && r != -ENOENT)
254 while ((r = cg_read_subgroup(d, &fn)) > 0) {
255 _cleanup_free_ char *p = NULL;
257 p = strjoin(path, "/", fn, NULL);
262 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
263 if (ret >= 0 && r != 0)
267 if (ret >= 0 && r < 0)
271 r = cg_rmdir(controller, path);
272 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
279 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
281 _cleanup_set_free_ Set *s = NULL;
297 _cleanup_fclose_ FILE *f = NULL;
301 r = cg_enumerate_processes(cfrom, pfrom, &f);
303 if (ret >= 0 && r != -ENOENT)
309 while ((r = cg_read_pid(f, &pid)) > 0) {
311 /* This might do weird stuff if we aren't a
312 * single-threaded program. However, we
313 * luckily know we are not */
314 if (ignore_self && pid == my_pid)
317 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
320 r = cg_attach(cto, pto, pid);
322 if (ret >= 0 && r != -ESRCH)
329 r = set_put(s, LONG_TO_PTR(pid));
349 int cg_migrate_recursive(
357 _cleanup_closedir_ DIR *d = NULL;
366 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
368 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
370 if (ret >= 0 && r != -ENOENT)
376 while ((r = cg_read_subgroup(d, &fn)) > 0) {
377 _cleanup_free_ char *p = NULL;
379 p = strjoin(pfrom, "/", fn, NULL);
388 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
389 if (r != 0 && ret >= 0)
393 if (r < 0 && ret >= 0)
397 r = cg_rmdir(cfrom, pfrom);
398 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
405 int cg_migrate_recursive_fallback(
420 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
422 char prefix[strlen(pto) + 1];
424 /* This didn't work? Then let's try all prefixes of the destination */
426 PATH_FOREACH_PREFIX(prefix, pto) {
427 r = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
436 static const char *normalize_controller(const char *controller) {
440 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
442 else if (startswith(controller, "name="))
443 return controller + 5;
448 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
451 if (!isempty(controller)) {
452 if (!isempty(path) && !isempty(suffix))
453 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
454 else if (!isempty(path))
455 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
456 else if (!isempty(suffix))
457 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
459 t = strappend("/sys/fs/cgroup/", controller);
461 if (!isempty(path) && !isempty(suffix))
462 t = strjoin(path, "/", suffix, NULL);
463 else if (!isempty(path))
472 *fs = path_kill_slashes(t);
476 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
478 static thread_local bool good = false;
482 if (controller && !cg_controller_is_valid(controller, true))
485 if (_unlikely_(!good)) {
488 r = path_is_mount_point("/sys/fs/cgroup", false);
490 return r < 0 ? r : -ENOENT;
492 /* Cache this to save a few stat()s */
496 p = controller ? normalize_controller(controller) : NULL;
498 return join_path(p, path, suffix, fs);
501 static int check_hierarchy(const char *p) {
506 if (!filename_is_valid(p))
509 /* Check if this controller actually really exists */
510 cc = strjoina("/sys/fs/cgroup/", p);
511 if (laccess(cc, F_OK) < 0)
517 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
523 if (!cg_controller_is_valid(controller, true))
526 /* Normalize the controller syntax */
527 p = normalize_controller(controller);
529 /* Check if this controller actually really exists */
530 r = check_hierarchy(p);
534 return join_path(p, path, suffix, fs);
537 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
542 if (typeflag != FTW_DP)
545 if (ftwbuf->level < 1)
552 int cg_trim(const char *controller, const char *path, bool delete_root) {
553 _cleanup_free_ char *fs = NULL;
558 r = cg_get_path(controller, path, NULL, &fs);
563 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
564 r = errno ? -errno : -EIO;
567 if (rmdir(fs) < 0 && errno != ENOENT)
574 int cg_delete(const char *controller, const char *path) {
575 _cleanup_free_ char *parent = NULL;
580 r = path_get_parent(path, &parent);
584 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
585 return r == -ENOENT ? 0 : r;
588 int cg_create(const char *controller, const char *path) {
589 _cleanup_free_ char *fs = NULL;
592 r = cg_get_path_and_check(controller, path, NULL, &fs);
596 r = mkdir_parents(fs, 0755);
600 if (mkdir(fs, 0755) < 0) {
611 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
616 r = cg_create(controller, path);
620 q = cg_attach(controller, path, pid);
624 /* This does not remove the cgroup on failure */
628 int cg_attach(const char *controller, const char *path, pid_t pid) {
629 _cleanup_free_ char *fs = NULL;
630 char c[DECIMAL_STR_MAX(pid_t) + 2];
636 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
643 snprintf(c, sizeof(c), PID_FMT"\n", pid);
645 return write_string_file_no_create(fs, c);
648 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
655 r = cg_attach(controller, path, pid);
657 char prefix[strlen(path) + 1];
659 /* This didn't work? Then let's try all prefixes of
662 PATH_FOREACH_PREFIX(prefix, path) {
663 r = cg_attach(controller, prefix, pid);
672 int cg_set_group_access(
673 const char *controller,
679 _cleanup_free_ char *fs = NULL;
684 if (mode != MODE_INVALID)
687 r = cg_get_path(controller, path, NULL, &fs);
691 return chmod_and_chown(fs, mode, uid, gid);
694 int cg_set_task_access(
695 const char *controller,
701 _cleanup_free_ char *fs = NULL, *procs = NULL;
706 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
709 if (mode != MODE_INVALID)
712 r = cg_get_path(controller, path, "cgroup.procs", &fs);
716 r = chmod_and_chown(fs, mode, uid, gid);
720 /* Compatibility, Always keep values for "tasks" in sync with
722 r = cg_get_path(controller, path, "tasks", &procs);
726 return chmod_and_chown(procs, mode, uid, gid);
729 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
730 _cleanup_fclose_ FILE *f = NULL;
739 if (!cg_controller_is_valid(controller, true))
742 controller = normalize_controller(controller);
744 controller = SYSTEMD_CGROUP_CONTROLLER;
746 fs = procfs_file_alloca(pid, "cgroup");
750 return errno == ENOENT ? -ESRCH : -errno;
752 cs = strlen(controller);
754 FOREACH_LINE(line, f, return -errno) {
757 const char *word, *state;
762 l = strchr(line, ':');
773 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
775 if (k == cs && memcmp(word, controller, cs) == 0) {
781 memcmp(word, "name=", 5) == 0 &&
782 memcmp(word+5, controller, cs) == 0) {
802 int cg_install_release_agent(const char *controller, const char *agent) {
803 _cleanup_free_ char *fs = NULL, *contents = NULL;
809 r = cg_get_path(controller, NULL, "release_agent", &fs);
813 r = read_one_line_file(fs, &contents);
817 sc = strstrip(contents);
819 r = write_string_file_no_create(fs, agent);
822 } else if (!streq(sc, agent))
827 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
833 r = read_one_line_file(fs, &contents);
837 sc = strstrip(contents);
838 if (streq(sc, "0")) {
839 r = write_string_file_no_create(fs, "1");
852 int cg_uninstall_release_agent(const char *controller) {
853 _cleanup_free_ char *fs = NULL;
856 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
860 r = write_string_file_no_create(fs, "0");
867 r = cg_get_path(controller, NULL, "release_agent", &fs);
871 r = write_string_file_no_create(fs, "");
878 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
879 _cleanup_fclose_ FILE *f = NULL;
880 pid_t pid = 0, self_pid;
886 r = cg_enumerate_processes(controller, path, &f);
888 return r == -ENOENT ? 1 : r;
892 while ((r = cg_read_pid(f, &pid)) > 0) {
894 if (ignore_self && pid == self_pid)
907 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
908 _cleanup_closedir_ DIR *d = NULL;
914 r = cg_is_empty(controller, path, ignore_self);
918 r = cg_enumerate_subgroups(controller, path, &d);
920 return r == -ENOENT ? 1 : r;
922 while ((r = cg_read_subgroup(d, &fn)) > 0) {
923 _cleanup_free_ char *p = NULL;
925 p = strjoin(path, "/", fn, NULL);
930 r = cg_is_empty_recursive(controller, p, ignore_self);
941 int cg_split_spec(const char *spec, char **controller, char **path) {
943 char *t = NULL, *u = NULL;
944 _cleanup_free_ char *v = NULL;
949 if (!path_is_safe(spec))
957 *path = path_kill_slashes(t);
966 e = strchr(spec, ':');
968 if (!cg_controller_is_valid(spec, true))
972 t = strdup(normalize_controller(spec));
985 v = strndup(spec, e-spec);
988 t = strdup(normalize_controller(v));
991 if (!cg_controller_is_valid(t, true)) {
996 if (streq(e+1, "")) {
1009 if (!path_is_safe(u) ||
1010 !path_is_absolute(u)) {
1016 path_kill_slashes(u);
1032 int cg_mangle_path(const char *path, char **result) {
1033 _cleanup_free_ char *c = NULL, *p = NULL;
1040 /* First, check if it already is a filesystem path */
1041 if (path_startswith(path, "/sys/fs/cgroup")) {
1047 *result = path_kill_slashes(t);
1051 /* Otherwise, treat it as cg spec */
1052 r = cg_split_spec(path, &c, &p);
1056 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1059 int cg_get_root_path(char **path) {
1062 return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1065 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1066 _cleanup_free_ char *rt = NULL;
1074 /* If the root was specified let's use that, otherwise
1075 * let's determine it from PID 1 */
1077 r = cg_get_root_path(&rt);
1084 p = path_startswith(cgroup, root);
1093 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1094 _cleanup_free_ char *raw = NULL;
1101 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1105 r = cg_shift_path(raw, root, &c);
1125 int cg_path_get_session(const char *path, char **session) {
1126 const char *e, *n, *s;
1128 /* Elogind uses a flat hierarchy, just "/SESSION". The only
1129 wrinkle is that SESSION might be escaped. */
1132 assert(path[0] == '/');
1135 n = strchrnul(e, '/');
1139 s = strndupa(e, n - e);
1158 int cg_pid_get_session(pid_t pid, char **session) {
1159 _cleanup_free_ char *cgroup = NULL;
1162 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1166 return cg_path_get_session(cgroup, session);
1169 char *cg_escape(const char *p) {
1170 bool need_prefix = false;
1172 /* This implements very minimal escaping for names to be used
1173 * as file names in the cgroup tree: any name which might
1174 * conflict with a kernel name or is prefixed with '_' is
1175 * prefixed with a '_'. That way, when reading cgroup names it
1176 * is sufficient to remove a single prefixing underscore if
1179 /* The return value of this function (unlike cg_unescape())
1185 streq(p, "notify_on_release") ||
1186 streq(p, "release_agent") ||
1192 dot = strrchr(p, '.');
1195 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1200 n = strndupa(p, dot - p);
1202 if (check_hierarchy(n) >= 0)
1209 return strappend("_", p);
1214 char *cg_unescape(const char *p) {
1217 /* The return value of this function (unlike cg_escape())
1218 * doesn't need free()! */
1226 #define CONTROLLER_VALID \
1230 bool cg_controller_is_valid(const char *p, bool allow_named) {
1237 s = startswith(p, "name=");
1242 if (*p == 0 || *p == '_')
1245 for (t = p; *t; t++)
1246 if (!strchr(CONTROLLER_VALID, *t))
1249 if (t - p > FILENAME_MAX)
1255 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1256 _cleanup_free_ char *p = NULL;
1259 r = cg_get_path(controller, path, attribute, &p);
1263 return write_string_file_no_create(p, value);
1266 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1267 _cleanup_free_ char *p = NULL;
1270 r = cg_get_path(controller, path, attribute, &p);
1274 return read_one_line_file(p, ret);
1277 static const char mask_names[] =
1284 int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) {
1285 CGroupControllerMask bit = 1;
1289 /* This one will create a cgroup in our private tree, but also
1290 * duplicate it in the trees specified in mask, and remove it
1293 /* First create the cgroup in our own hierarchy. */
1294 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1298 /* Then, do the same in the other hierarchies */
1299 NULSTR_FOREACH(n, mask_names) {
1302 else if (supported & bit)
1303 cg_trim(n, path, true);
1311 int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1312 CGroupControllerMask bit = 1;
1316 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1320 NULSTR_FOREACH(n, mask_names) {
1322 if (supported & bit) {
1323 const char *p = NULL;
1326 p = path_callback(bit, userdata);
1331 cg_attach_fallback(n, path, pid);
1340 int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1345 SET_FOREACH(pidp, pids, i) {
1346 pid_t pid = PTR_TO_LONG(pidp);
1349 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1357 int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1358 CGroupControllerMask bit = 1;
1362 if (!path_equal(from, to)) {
1363 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1368 NULSTR_FOREACH(n, mask_names) {
1369 if (supported & bit) {
1370 const char *p = NULL;
1373 p = to_callback(bit, userdata);
1378 cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, n, p, false, false);
1387 int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root) {
1388 CGroupControllerMask bit = 1;
1392 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1396 NULSTR_FOREACH(n, mask_names) {
1397 if (supported & bit)
1398 cg_trim(n, path, delete_root);
1406 CGroupControllerMask cg_mask_supported(void) {
1407 CGroupControllerMask bit = 1, mask = 0;
1410 NULSTR_FOREACH(n, mask_names) {
1411 if (check_hierarchy(n) >= 0)
1420 int cg_kernel_controllers(Set *controllers) {
1421 _cleanup_fclose_ FILE *f = NULL;
1425 assert(controllers);
1427 f = fopen("/proc/cgroups", "re");
1429 if (errno == ENOENT)
1434 /* Ignore the header line */
1435 (void) fgets(buf, sizeof(buf), f);
1442 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1447 if (ferror(f) && errno)
1458 if (!filename_is_valid(controller)) {
1463 r = set_consume(controllers, controller);