1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
36 #include "formats-util.h"
37 #include "process-util.h"
38 #include "path-util.h"
43 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
44 _cleanup_free_ char *fs = NULL;
50 r = cg_get_path(controller, path, "cgroup.procs", &fs);
62 int cg_read_pid(FILE *f, pid_t *_pid) {
65 /* Note that the cgroup.procs might contain duplicates! See
66 * cgroups.txt for details. */
72 if (fscanf(f, "%lu", &ul) != 1) {
77 return errno ? -errno : -EIO;
87 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
88 _cleanup_free_ char *fs = NULL;
94 /* This is not recursive! */
96 r = cg_get_path(controller, path, NULL, &fs);
108 int cg_read_subgroup(DIR *d, char **fn) {
114 FOREACH_DIRENT(de, d, return -errno) {
117 if (de->d_type != DT_DIR)
120 if (streq(de->d_name, ".") ||
121 streq(de->d_name, ".."))
124 b = strdup(de->d_name);
135 int cg_rmdir(const char *controller, const char *path) {
136 _cleanup_free_ char *p = NULL;
139 r = cg_get_path(controller, path, NULL, &p);
144 if (r < 0 && errno != ENOENT)
150 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
151 _cleanup_set_free_ Set *allocated_set = NULL;
158 /* This goes through the tasks list and kills them all. This
159 * is repeated until no further processes are added to the
160 * tasks list, to properly handle forking processes */
163 s = allocated_set = set_new(NULL);
171 _cleanup_fclose_ FILE *f = NULL;
175 r = cg_enumerate_processes(controller, path, &f);
177 if (ret >= 0 && r != -ENOENT)
183 while ((r = cg_read_pid(f, &pid)) > 0) {
185 if (ignore_self && pid == my_pid)
188 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
191 /* If we haven't killed this process yet, kill
193 if (kill(pid, sig) < 0) {
194 if (ret >= 0 && errno != ESRCH)
197 if (sigcont && sig != SIGKILL)
206 r = set_put(s, LONG_TO_PTR(pid));
222 /* To avoid racing against processes which fork
223 * quicker than we can kill them we repeat this until
224 * no new pids need to be killed. */
231 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
232 _cleanup_set_free_ Set *allocated_set = NULL;
233 _cleanup_closedir_ DIR *d = NULL;
241 s = allocated_set = set_new(NULL);
246 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
248 r = cg_enumerate_subgroups(controller, path, &d);
250 if (ret >= 0 && r != -ENOENT)
256 while ((r = cg_read_subgroup(d, &fn)) > 0) {
257 _cleanup_free_ char *p = NULL;
259 p = strjoin(path, "/", fn, NULL);
264 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
265 if (ret >= 0 && r != 0)
269 if (ret >= 0 && r < 0)
273 r = cg_rmdir(controller, path);
274 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
281 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
283 _cleanup_set_free_ Set *s = NULL;
299 _cleanup_fclose_ FILE *f = NULL;
303 r = cg_enumerate_processes(cfrom, pfrom, &f);
305 if (ret >= 0 && r != -ENOENT)
311 while ((r = cg_read_pid(f, &pid)) > 0) {
313 /* This might do weird stuff if we aren't a
314 * single-threaded program. However, we
315 * luckily know we are not */
316 if (ignore_self && pid == my_pid)
319 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
322 r = cg_attach(cto, pto, pid);
324 if (ret >= 0 && r != -ESRCH)
331 r = set_put(s, LONG_TO_PTR(pid));
351 int cg_migrate_recursive(
359 _cleanup_closedir_ DIR *d = NULL;
368 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
370 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
372 if (ret >= 0 && r != -ENOENT)
378 while ((r = cg_read_subgroup(d, &fn)) > 0) {
379 _cleanup_free_ char *p = NULL;
381 p = strjoin(pfrom, "/", fn, NULL);
390 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
391 if (r != 0 && ret >= 0)
395 if (r < 0 && ret >= 0)
399 r = cg_rmdir(cfrom, pfrom);
400 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
407 int cg_migrate_recursive_fallback(
422 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
424 char prefix[strlen(pto) + 1];
426 /* This didn't work? Then let's try all prefixes of the destination */
428 PATH_FOREACH_PREFIX(prefix, pto) {
429 r = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
438 static const char *normalize_controller(const char *controller) {
442 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
444 else if (startswith(controller, "name="))
445 return controller + 5;
450 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
453 if (!isempty(controller)) {
454 if (!isempty(path) && !isempty(suffix))
455 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
456 else if (!isempty(path))
457 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
458 else if (!isempty(suffix))
459 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
461 t = strappend("/sys/fs/cgroup/", controller);
463 if (!isempty(path) && !isempty(suffix))
464 t = strjoin(path, "/", suffix, NULL);
465 else if (!isempty(path))
474 *fs = path_kill_slashes(t);
478 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
480 static thread_local bool good = false;
484 if (controller && !cg_controller_is_valid(controller, true))
487 if (_unlikely_(!good)) {
490 r = path_is_mount_point("/sys/fs/cgroup", false);
496 /* Cache this to save a few stat()s */
500 p = controller ? normalize_controller(controller) : NULL;
502 return join_path(p, path, suffix, fs);
505 static int check_hierarchy(const char *p) {
510 if (!filename_is_valid(p))
513 /* Check if this controller actually really exists */
514 cc = strjoina("/sys/fs/cgroup/", p);
515 if (laccess(cc, F_OK) < 0)
521 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
527 if (!cg_controller_is_valid(controller, true))
530 /* Normalize the controller syntax */
531 p = normalize_controller(controller);
533 /* Check if this controller actually really exists */
534 r = check_hierarchy(p);
538 return join_path(p, path, suffix, fs);
541 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
546 if (typeflag != FTW_DP)
549 if (ftwbuf->level < 1)
556 int cg_trim(const char *controller, const char *path, bool delete_root) {
557 _cleanup_free_ char *fs = NULL;
562 r = cg_get_path(controller, path, NULL, &fs);
567 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
568 r = errno ? -errno : -EIO;
571 if (rmdir(fs) < 0 && errno != ENOENT)
578 int cg_delete(const char *controller, const char *path) {
579 _cleanup_free_ char *parent = NULL;
584 r = path_get_parent(path, &parent);
588 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
589 return r == -ENOENT ? 0 : r;
592 int cg_create(const char *controller, const char *path) {
593 _cleanup_free_ char *fs = NULL;
596 r = cg_get_path_and_check(controller, path, NULL, &fs);
600 r = mkdir_parents(fs, 0755);
604 if (mkdir(fs, 0755) < 0) {
615 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
620 r = cg_create(controller, path);
624 q = cg_attach(controller, path, pid);
628 /* This does not remove the cgroup on failure */
632 int cg_attach(const char *controller, const char *path, pid_t pid) {
633 _cleanup_free_ char *fs = NULL;
634 char c[DECIMAL_STR_MAX(pid_t) + 2];
640 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
647 snprintf(c, sizeof(c), PID_FMT"\n", pid);
649 return write_string_file_no_create(fs, c);
652 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
659 r = cg_attach(controller, path, pid);
661 char prefix[strlen(path) + 1];
663 /* This didn't work? Then let's try all prefixes of
666 PATH_FOREACH_PREFIX(prefix, path) {
667 r = cg_attach(controller, prefix, pid);
676 int cg_set_group_access(
677 const char *controller,
683 _cleanup_free_ char *fs = NULL;
688 if (mode != MODE_INVALID)
691 r = cg_get_path(controller, path, NULL, &fs);
695 return chmod_and_chown(fs, mode, uid, gid);
698 int cg_set_task_access(
699 const char *controller,
705 _cleanup_free_ char *fs = NULL, *procs = NULL;
710 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
713 if (mode != MODE_INVALID)
716 r = cg_get_path(controller, path, "cgroup.procs", &fs);
720 r = chmod_and_chown(fs, mode, uid, gid);
724 /* Compatibility, Always keep values for "tasks" in sync with
726 r = cg_get_path(controller, path, "tasks", &procs);
730 return chmod_and_chown(procs, mode, uid, gid);
733 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
734 _cleanup_fclose_ FILE *f = NULL;
743 if (!cg_controller_is_valid(controller, true))
746 controller = normalize_controller(controller);
748 controller = SYSTEMD_CGROUP_CONTROLLER;
750 fs = procfs_file_alloca(pid, "cgroup");
754 return errno == ENOENT ? -ESRCH : -errno;
756 cs = strlen(controller);
758 FOREACH_LINE(line, f, return -errno) {
761 const char *word, *state;
766 l = strchr(line, ':');
777 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
779 if (k == cs && memcmp(word, controller, cs) == 0) {
785 memcmp(word, "name=", 5) == 0 &&
786 memcmp(word+5, controller, cs) == 0) {
806 int cg_install_release_agent(const char *controller, const char *agent) {
807 _cleanup_free_ char *fs = NULL, *contents = NULL;
813 r = cg_get_path(controller, NULL, "release_agent", &fs);
817 r = read_one_line_file(fs, &contents);
821 sc = strstrip(contents);
823 r = write_string_file_no_create(fs, agent);
826 } else if (!streq(sc, agent))
831 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
837 r = read_one_line_file(fs, &contents);
841 sc = strstrip(contents);
842 if (streq(sc, "0")) {
843 r = write_string_file_no_create(fs, "1");
856 int cg_uninstall_release_agent(const char *controller) {
857 _cleanup_free_ char *fs = NULL;
860 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
864 r = write_string_file_no_create(fs, "0");
871 r = cg_get_path(controller, NULL, "release_agent", &fs);
875 r = write_string_file_no_create(fs, "");
882 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
883 _cleanup_fclose_ FILE *f = NULL;
884 pid_t pid = 0, self_pid;
890 r = cg_enumerate_processes(controller, path, &f);
892 return r == -ENOENT ? 1 : r;
896 while ((r = cg_read_pid(f, &pid)) > 0) {
898 if (ignore_self && pid == self_pid)
911 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
912 _cleanup_closedir_ DIR *d = NULL;
918 r = cg_is_empty(controller, path, ignore_self);
922 r = cg_enumerate_subgroups(controller, path, &d);
924 return r == -ENOENT ? 1 : r;
926 while ((r = cg_read_subgroup(d, &fn)) > 0) {
927 _cleanup_free_ char *p = NULL;
929 p = strjoin(path, "/", fn, NULL);
934 r = cg_is_empty_recursive(controller, p, ignore_self);
945 int cg_split_spec(const char *spec, char **controller, char **path) {
947 char *t = NULL, *u = NULL;
948 _cleanup_free_ char *v = NULL;
953 if (!path_is_safe(spec))
961 *path = path_kill_slashes(t);
970 e = strchr(spec, ':');
972 if (!cg_controller_is_valid(spec, true))
976 t = strdup(normalize_controller(spec));
989 v = strndup(spec, e-spec);
992 t = strdup(normalize_controller(v));
995 if (!cg_controller_is_valid(t, true)) {
1000 if (streq(e+1, "")) {
1013 if (!path_is_safe(u) ||
1014 !path_is_absolute(u)) {
1020 path_kill_slashes(u);
1036 int cg_mangle_path(const char *path, char **result) {
1037 _cleanup_free_ char *c = NULL, *p = NULL;
1044 /* First, check if it already is a filesystem path */
1045 if (path_startswith(path, "/sys/fs/cgroup")) {
1051 *result = path_kill_slashes(t);
1055 /* Otherwise, treat it as cg spec */
1056 r = cg_split_spec(path, &c, &p);
1060 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1063 int cg_get_root_path(char **path) {
1066 return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1069 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1070 _cleanup_free_ char *rt = NULL;
1078 /* If the root was specified let's use that, otherwise
1079 * let's determine it from PID 1 */
1081 r = cg_get_root_path(&rt);
1088 p = path_startswith(cgroup, root);
1097 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1098 _cleanup_free_ char *raw = NULL;
1105 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1109 r = cg_shift_path(raw, root, &c);
1129 int cg_path_get_session(const char *path, char **session) {
1130 const char *e, *n, *s;
1132 /* Elogind uses a flat hierarchy, just "/SESSION". The only
1133 wrinkle is that SESSION might be escaped. */
1136 assert(path[0] == '/');
1139 n = strchrnul(e, '/');
1143 s = strndupa(e, n - e);
1162 int cg_pid_get_session(pid_t pid, char **session) {
1163 _cleanup_free_ char *cgroup = NULL;
1166 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1170 return cg_path_get_session(cgroup, session);
1173 char *cg_escape(const char *p) {
1174 bool need_prefix = false;
1176 /* This implements very minimal escaping for names to be used
1177 * as file names in the cgroup tree: any name which might
1178 * conflict with a kernel name or is prefixed with '_' is
1179 * prefixed with a '_'. That way, when reading cgroup names it
1180 * is sufficient to remove a single prefixing underscore if
1183 /* The return value of this function (unlike cg_unescape())
1189 streq(p, "notify_on_release") ||
1190 streq(p, "release_agent") ||
1196 dot = strrchr(p, '.');
1199 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1204 n = strndupa(p, dot - p);
1206 if (check_hierarchy(n) >= 0)
1213 return strappend("_", p);
1218 char *cg_unescape(const char *p) {
1221 /* The return value of this function (unlike cg_escape())
1222 * doesn't need free()! */
1230 #define CONTROLLER_VALID \
1234 bool cg_controller_is_valid(const char *p, bool allow_named) {
1241 s = startswith(p, "name=");
1246 if (*p == 0 || *p == '_')
1249 for (t = p; *t; t++)
1250 if (!strchr(CONTROLLER_VALID, *t))
1253 if (t - p > FILENAME_MAX)
1259 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1260 _cleanup_free_ char *p = NULL;
1263 r = cg_get_path(controller, path, attribute, &p);
1267 return write_string_file_no_create(p, value);
1270 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1271 _cleanup_free_ char *p = NULL;
1274 r = cg_get_path(controller, path, attribute, &p);
1278 return read_one_line_file(p, ret);
1281 static const char mask_names[] =
1288 int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) {
1289 CGroupControllerMask bit = 1;
1293 /* This one will create a cgroup in our private tree, but also
1294 * duplicate it in the trees specified in mask, and remove it
1297 /* First create the cgroup in our own hierarchy. */
1298 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1302 /* Then, do the same in the other hierarchies */
1303 NULSTR_FOREACH(n, mask_names) {
1306 else if (supported & bit)
1307 cg_trim(n, path, true);
1315 int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1316 CGroupControllerMask bit = 1;
1320 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1324 NULSTR_FOREACH(n, mask_names) {
1326 if (supported & bit) {
1327 const char *p = NULL;
1330 p = path_callback(bit, userdata);
1335 cg_attach_fallback(n, path, pid);
1344 int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1349 SET_FOREACH(pidp, pids, i) {
1350 pid_t pid = PTR_TO_LONG(pidp);
1353 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1361 int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1362 CGroupControllerMask bit = 1;
1366 if (!path_equal(from, to)) {
1367 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1372 NULSTR_FOREACH(n, mask_names) {
1373 if (supported & bit) {
1374 const char *p = NULL;
1377 p = to_callback(bit, userdata);
1382 cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, n, p, false, false);
1391 int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root) {
1392 CGroupControllerMask bit = 1;
1396 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1400 NULSTR_FOREACH(n, mask_names) {
1401 if (supported & bit)
1402 cg_trim(n, path, delete_root);
1410 CGroupControllerMask cg_mask_supported(void) {
1411 CGroupControllerMask bit = 1, mask = 0;
1414 NULSTR_FOREACH(n, mask_names) {
1415 if (check_hierarchy(n) >= 0)
1424 int cg_kernel_controllers(Set *controllers) {
1425 _cleanup_fclose_ FILE *f = NULL;
1429 assert(controllers);
1431 f = fopen("/proc/cgroups", "re");
1433 if (errno == ENOENT)
1438 /* Ignore the header line */
1439 (void) fgets(buf, sizeof(buf), f);
1446 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1451 if (ferror(f) && errno)
1462 if (!filename_is_valid(controller)) {
1467 r = set_consume(controllers, controller);