1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
36 #include "formats-util.h"
37 #include "path-util.h"
42 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
43 _cleanup_free_ char *fs = NULL;
49 r = cg_get_path(controller, path, "cgroup.procs", &fs);
61 int cg_read_pid(FILE *f, pid_t *_pid) {
64 /* Note that the cgroup.procs might contain duplicates! See
65 * cgroups.txt for details. */
71 if (fscanf(f, "%lu", &ul) != 1) {
76 return errno ? -errno : -EIO;
86 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
87 _cleanup_free_ char *fs = NULL;
93 /* This is not recursive! */
95 r = cg_get_path(controller, path, NULL, &fs);
107 int cg_read_subgroup(DIR *d, char **fn) {
113 FOREACH_DIRENT(de, d, return -errno) {
116 if (de->d_type != DT_DIR)
119 if (streq(de->d_name, ".") ||
120 streq(de->d_name, ".."))
123 b = strdup(de->d_name);
134 int cg_rmdir(const char *controller, const char *path) {
135 _cleanup_free_ char *p = NULL;
138 r = cg_get_path(controller, path, NULL, &p);
143 if (r < 0 && errno != ENOENT)
149 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
150 _cleanup_set_free_ Set *allocated_set = NULL;
157 /* This goes through the tasks list and kills them all. This
158 * is repeated until no further processes are added to the
159 * tasks list, to properly handle forking processes */
162 s = allocated_set = set_new(NULL);
170 _cleanup_fclose_ FILE *f = NULL;
174 r = cg_enumerate_processes(controller, path, &f);
176 if (ret >= 0 && r != -ENOENT)
182 while ((r = cg_read_pid(f, &pid)) > 0) {
184 if (ignore_self && pid == my_pid)
187 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
190 /* If we haven't killed this process yet, kill
192 if (kill(pid, sig) < 0) {
193 if (ret >= 0 && errno != ESRCH)
196 if (sigcont && sig != SIGKILL)
205 r = set_put(s, LONG_TO_PTR(pid));
221 /* To avoid racing against processes which fork
222 * quicker than we can kill them we repeat this until
223 * no new pids need to be killed. */
230 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
231 _cleanup_set_free_ Set *allocated_set = NULL;
232 _cleanup_closedir_ DIR *d = NULL;
240 s = allocated_set = set_new(NULL);
245 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
247 r = cg_enumerate_subgroups(controller, path, &d);
249 if (ret >= 0 && r != -ENOENT)
255 while ((r = cg_read_subgroup(d, &fn)) > 0) {
256 _cleanup_free_ char *p = NULL;
258 p = strjoin(path, "/", fn, NULL);
263 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
264 if (ret >= 0 && r != 0)
268 if (ret >= 0 && r < 0)
272 r = cg_rmdir(controller, path);
273 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
280 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
282 _cleanup_set_free_ Set *s = NULL;
298 _cleanup_fclose_ FILE *f = NULL;
302 r = cg_enumerate_processes(cfrom, pfrom, &f);
304 if (ret >= 0 && r != -ENOENT)
310 while ((r = cg_read_pid(f, &pid)) > 0) {
312 /* This might do weird stuff if we aren't a
313 * single-threaded program. However, we
314 * luckily know we are not */
315 if (ignore_self && pid == my_pid)
318 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
321 r = cg_attach(cto, pto, pid);
323 if (ret >= 0 && r != -ESRCH)
330 r = set_put(s, LONG_TO_PTR(pid));
350 int cg_migrate_recursive(
358 _cleanup_closedir_ DIR *d = NULL;
367 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
369 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
371 if (ret >= 0 && r != -ENOENT)
377 while ((r = cg_read_subgroup(d, &fn)) > 0) {
378 _cleanup_free_ char *p = NULL;
380 p = strjoin(pfrom, "/", fn, NULL);
389 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
390 if (r != 0 && ret >= 0)
394 if (r < 0 && ret >= 0)
398 r = cg_rmdir(cfrom, pfrom);
399 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
406 int cg_migrate_recursive_fallback(
421 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
423 char prefix[strlen(pto) + 1];
425 /* This didn't work? Then let's try all prefixes of the destination */
427 PATH_FOREACH_PREFIX(prefix, pto) {
428 r = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
437 static const char *normalize_controller(const char *controller) {
441 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
443 else if (startswith(controller, "name="))
444 return controller + 5;
449 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
452 if (!isempty(controller)) {
453 if (!isempty(path) && !isempty(suffix))
454 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
455 else if (!isempty(path))
456 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
457 else if (!isempty(suffix))
458 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
460 t = strappend("/sys/fs/cgroup/", controller);
462 if (!isempty(path) && !isempty(suffix))
463 t = strjoin(path, "/", suffix, NULL);
464 else if (!isempty(path))
473 *fs = path_kill_slashes(t);
477 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
479 static thread_local bool good = false;
483 if (controller && !cg_controller_is_valid(controller, true))
486 if (_unlikely_(!good)) {
489 r = path_is_mount_point("/sys/fs/cgroup", false);
495 /* Cache this to save a few stat()s */
499 p = controller ? normalize_controller(controller) : NULL;
501 return join_path(p, path, suffix, fs);
504 static int check_hierarchy(const char *p) {
509 if (!filename_is_valid(p))
512 /* Check if this controller actually really exists */
513 cc = strjoina("/sys/fs/cgroup/", p);
514 if (laccess(cc, F_OK) < 0)
520 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
526 if (!cg_controller_is_valid(controller, true))
529 /* Normalize the controller syntax */
530 p = normalize_controller(controller);
532 /* Check if this controller actually really exists */
533 r = check_hierarchy(p);
537 return join_path(p, path, suffix, fs);
540 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
545 if (typeflag != FTW_DP)
548 if (ftwbuf->level < 1)
555 int cg_trim(const char *controller, const char *path, bool delete_root) {
556 _cleanup_free_ char *fs = NULL;
561 r = cg_get_path(controller, path, NULL, &fs);
566 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
567 r = errno ? -errno : -EIO;
570 if (rmdir(fs) < 0 && errno != ENOENT)
577 int cg_delete(const char *controller, const char *path) {
578 _cleanup_free_ char *parent = NULL;
583 r = path_get_parent(path, &parent);
587 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
588 return r == -ENOENT ? 0 : r;
591 int cg_create(const char *controller, const char *path) {
592 _cleanup_free_ char *fs = NULL;
595 r = cg_get_path_and_check(controller, path, NULL, &fs);
599 r = mkdir_parents(fs, 0755);
603 if (mkdir(fs, 0755) < 0) {
614 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
619 r = cg_create(controller, path);
623 q = cg_attach(controller, path, pid);
627 /* This does not remove the cgroup on failure */
631 int cg_attach(const char *controller, const char *path, pid_t pid) {
632 _cleanup_free_ char *fs = NULL;
633 char c[DECIMAL_STR_MAX(pid_t) + 2];
639 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
646 snprintf(c, sizeof(c), PID_FMT"\n", pid);
648 return write_string_file_no_create(fs, c);
651 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
658 r = cg_attach(controller, path, pid);
660 char prefix[strlen(path) + 1];
662 /* This didn't work? Then let's try all prefixes of
665 PATH_FOREACH_PREFIX(prefix, path) {
666 r = cg_attach(controller, prefix, pid);
675 int cg_set_group_access(
676 const char *controller,
682 _cleanup_free_ char *fs = NULL;
687 if (mode != MODE_INVALID)
690 r = cg_get_path(controller, path, NULL, &fs);
694 return chmod_and_chown(fs, mode, uid, gid);
697 int cg_set_task_access(
698 const char *controller,
704 _cleanup_free_ char *fs = NULL, *procs = NULL;
709 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
712 if (mode != MODE_INVALID)
715 r = cg_get_path(controller, path, "cgroup.procs", &fs);
719 r = chmod_and_chown(fs, mode, uid, gid);
723 /* Compatibility, Always keep values for "tasks" in sync with
725 r = cg_get_path(controller, path, "tasks", &procs);
729 return chmod_and_chown(procs, mode, uid, gid);
732 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
733 _cleanup_fclose_ FILE *f = NULL;
742 if (!cg_controller_is_valid(controller, true))
745 controller = normalize_controller(controller);
747 controller = SYSTEMD_CGROUP_CONTROLLER;
749 fs = procfs_file_alloca(pid, "cgroup");
753 return errno == ENOENT ? -ESRCH : -errno;
755 cs = strlen(controller);
757 FOREACH_LINE(line, f, return -errno) {
760 const char *word, *state;
765 l = strchr(line, ':');
776 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
778 if (k == cs && memcmp(word, controller, cs) == 0) {
784 memcmp(word, "name=", 5) == 0 &&
785 memcmp(word+5, controller, cs) == 0) {
805 int cg_install_release_agent(const char *controller, const char *agent) {
806 _cleanup_free_ char *fs = NULL, *contents = NULL;
812 r = cg_get_path(controller, NULL, "release_agent", &fs);
816 r = read_one_line_file(fs, &contents);
820 sc = strstrip(contents);
822 r = write_string_file_no_create(fs, agent);
825 } else if (!streq(sc, agent))
830 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
836 r = read_one_line_file(fs, &contents);
840 sc = strstrip(contents);
841 if (streq(sc, "0")) {
842 r = write_string_file_no_create(fs, "1");
855 int cg_uninstall_release_agent(const char *controller) {
856 _cleanup_free_ char *fs = NULL;
859 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
863 r = write_string_file_no_create(fs, "0");
870 r = cg_get_path(controller, NULL, "release_agent", &fs);
874 r = write_string_file_no_create(fs, "");
881 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
882 _cleanup_fclose_ FILE *f = NULL;
883 pid_t pid = 0, self_pid;
889 r = cg_enumerate_processes(controller, path, &f);
891 return r == -ENOENT ? 1 : r;
895 while ((r = cg_read_pid(f, &pid)) > 0) {
897 if (ignore_self && pid == self_pid)
910 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
911 _cleanup_closedir_ DIR *d = NULL;
917 r = cg_is_empty(controller, path, ignore_self);
921 r = cg_enumerate_subgroups(controller, path, &d);
923 return r == -ENOENT ? 1 : r;
925 while ((r = cg_read_subgroup(d, &fn)) > 0) {
926 _cleanup_free_ char *p = NULL;
928 p = strjoin(path, "/", fn, NULL);
933 r = cg_is_empty_recursive(controller, p, ignore_self);
944 int cg_split_spec(const char *spec, char **controller, char **path) {
946 char *t = NULL, *u = NULL;
947 _cleanup_free_ char *v = NULL;
952 if (!path_is_safe(spec))
960 *path = path_kill_slashes(t);
969 e = strchr(spec, ':');
971 if (!cg_controller_is_valid(spec, true))
975 t = strdup(normalize_controller(spec));
988 v = strndup(spec, e-spec);
991 t = strdup(normalize_controller(v));
994 if (!cg_controller_is_valid(t, true)) {
999 if (streq(e+1, "")) {
1012 if (!path_is_safe(u) ||
1013 !path_is_absolute(u)) {
1019 path_kill_slashes(u);
1035 int cg_mangle_path(const char *path, char **result) {
1036 _cleanup_free_ char *c = NULL, *p = NULL;
1043 /* First, check if it already is a filesystem path */
1044 if (path_startswith(path, "/sys/fs/cgroup")) {
1050 *result = path_kill_slashes(t);
1054 /* Otherwise, treat it as cg spec */
1055 r = cg_split_spec(path, &c, &p);
1059 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1062 int cg_get_root_path(char **path) {
1065 return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1068 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1069 _cleanup_free_ char *rt = NULL;
1077 /* If the root was specified let's use that, otherwise
1078 * let's determine it from PID 1 */
1080 r = cg_get_root_path(&rt);
1087 p = path_startswith(cgroup, root);
1096 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1097 _cleanup_free_ char *raw = NULL;
1104 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1108 r = cg_shift_path(raw, root, &c);
1128 int cg_path_get_session(const char *path, char **session) {
1129 const char *e, *n, *s;
1131 /* Elogind uses a flat hierarchy, just "/SESSION". The only
1132 wrinkle is that SESSION might be escaped. */
1135 assert(path[0] == '/');
1138 n = strchrnul(e, '/');
1142 s = strndupa(e, n - e);
1161 int cg_pid_get_session(pid_t pid, char **session) {
1162 _cleanup_free_ char *cgroup = NULL;
1165 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1169 return cg_path_get_session(cgroup, session);
1172 char *cg_escape(const char *p) {
1173 bool need_prefix = false;
1175 /* This implements very minimal escaping for names to be used
1176 * as file names in the cgroup tree: any name which might
1177 * conflict with a kernel name or is prefixed with '_' is
1178 * prefixed with a '_'. That way, when reading cgroup names it
1179 * is sufficient to remove a single prefixing underscore if
1182 /* The return value of this function (unlike cg_unescape())
1188 streq(p, "notify_on_release") ||
1189 streq(p, "release_agent") ||
1195 dot = strrchr(p, '.');
1198 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1203 n = strndupa(p, dot - p);
1205 if (check_hierarchy(n) >= 0)
1212 return strappend("_", p);
1217 char *cg_unescape(const char *p) {
1220 /* The return value of this function (unlike cg_escape())
1221 * doesn't need free()! */
1229 #define CONTROLLER_VALID \
1233 bool cg_controller_is_valid(const char *p, bool allow_named) {
1240 s = startswith(p, "name=");
1245 if (*p == 0 || *p == '_')
1248 for (t = p; *t; t++)
1249 if (!strchr(CONTROLLER_VALID, *t))
1252 if (t - p > FILENAME_MAX)
1258 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1259 _cleanup_free_ char *p = NULL;
1262 r = cg_get_path(controller, path, attribute, &p);
1266 return write_string_file_no_create(p, value);
1269 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1270 _cleanup_free_ char *p = NULL;
1273 r = cg_get_path(controller, path, attribute, &p);
1277 return read_one_line_file(p, ret);
1280 static const char mask_names[] =
1287 int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) {
1288 CGroupControllerMask bit = 1;
1292 /* This one will create a cgroup in our private tree, but also
1293 * duplicate it in the trees specified in mask, and remove it
1296 /* First create the cgroup in our own hierarchy. */
1297 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1301 /* Then, do the same in the other hierarchies */
1302 NULSTR_FOREACH(n, mask_names) {
1305 else if (supported & bit)
1306 cg_trim(n, path, true);
1314 int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1315 CGroupControllerMask bit = 1;
1319 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1323 NULSTR_FOREACH(n, mask_names) {
1325 if (supported & bit) {
1326 const char *p = NULL;
1329 p = path_callback(bit, userdata);
1334 cg_attach_fallback(n, path, pid);
1343 int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1348 SET_FOREACH(pidp, pids, i) {
1349 pid_t pid = PTR_TO_LONG(pidp);
1352 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1360 int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1361 CGroupControllerMask bit = 1;
1365 if (!path_equal(from, to)) {
1366 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1371 NULSTR_FOREACH(n, mask_names) {
1372 if (supported & bit) {
1373 const char *p = NULL;
1376 p = to_callback(bit, userdata);
1381 cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, n, p, false, false);
1390 int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root) {
1391 CGroupControllerMask bit = 1;
1395 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1399 NULSTR_FOREACH(n, mask_names) {
1400 if (supported & bit)
1401 cg_trim(n, path, delete_root);
1409 CGroupControllerMask cg_mask_supported(void) {
1410 CGroupControllerMask bit = 1, mask = 0;
1413 NULSTR_FOREACH(n, mask_names) {
1414 if (check_hierarchy(n) >= 0)
1423 int cg_kernel_controllers(Set *controllers) {
1424 _cleanup_fclose_ FILE *f = NULL;
1428 assert(controllers);
1430 f = fopen("/proc/cgroups", "re");
1432 if (errno == ENOENT)
1437 /* Ignore the header line */
1438 (void) fgets(buf, sizeof(buf), f);
1445 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1450 if (ferror(f) && errno)
1461 if (!filename_is_valid(controller)) {
1466 r = set_consume(controllers, controller);