1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
36 #include "path-util.h"
41 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
42 _cleanup_free_ char *fs = NULL;
48 r = cg_get_path(controller, path, "cgroup.procs", &fs);
60 int cg_read_pid(FILE *f, pid_t *_pid) {
63 /* Note that the cgroup.procs might contain duplicates! See
64 * cgroups.txt for details. */
70 if (fscanf(f, "%lu", &ul) != 1) {
75 return errno ? -errno : -EIO;
85 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
86 _cleanup_free_ char *fs = NULL;
92 /* This is not recursive! */
94 r = cg_get_path(controller, path, NULL, &fs);
106 int cg_read_subgroup(DIR *d, char **fn) {
112 FOREACH_DIRENT(de, d, return -errno) {
115 if (de->d_type != DT_DIR)
118 if (streq(de->d_name, ".") ||
119 streq(de->d_name, ".."))
122 b = strdup(de->d_name);
133 int cg_rmdir(const char *controller, const char *path) {
134 _cleanup_free_ char *p = NULL;
137 r = cg_get_path(controller, path, NULL, &p);
142 if (r < 0 && errno != ENOENT)
148 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
149 _cleanup_set_free_ Set *allocated_set = NULL;
156 /* This goes through the tasks list and kills them all. This
157 * is repeated until no further processes are added to the
158 * tasks list, to properly handle forking processes */
161 s = allocated_set = set_new(NULL);
169 _cleanup_fclose_ FILE *f = NULL;
173 r = cg_enumerate_processes(controller, path, &f);
175 if (ret >= 0 && r != -ENOENT)
181 while ((r = cg_read_pid(f, &pid)) > 0) {
183 if (ignore_self && pid == my_pid)
186 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
189 /* If we haven't killed this process yet, kill
191 if (kill(pid, sig) < 0) {
192 if (ret >= 0 && errno != ESRCH)
195 if (sigcont && sig != SIGKILL)
204 r = set_put(s, LONG_TO_PTR(pid));
220 /* To avoid racing against processes which fork
221 * quicker than we can kill them we repeat this until
222 * no new pids need to be killed. */
229 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
230 _cleanup_set_free_ Set *allocated_set = NULL;
231 _cleanup_closedir_ DIR *d = NULL;
239 s = allocated_set = set_new(NULL);
244 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
246 r = cg_enumerate_subgroups(controller, path, &d);
248 if (ret >= 0 && r != -ENOENT)
254 while ((r = cg_read_subgroup(d, &fn)) > 0) {
255 _cleanup_free_ char *p = NULL;
257 p = strjoin(path, "/", fn, NULL);
262 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
263 if (ret >= 0 && r != 0)
267 if (ret >= 0 && r < 0)
271 r = cg_rmdir(controller, path);
272 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
279 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
281 _cleanup_set_free_ Set *s = NULL;
297 _cleanup_fclose_ FILE *f = NULL;
301 r = cg_enumerate_processes(cfrom, pfrom, &f);
303 if (ret >= 0 && r != -ENOENT)
309 while ((r = cg_read_pid(f, &pid)) > 0) {
311 /* This might do weird stuff if we aren't a
312 * single-threaded program. However, we
313 * luckily know we are not */
314 if (ignore_self && pid == my_pid)
317 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
320 r = cg_attach(cto, pto, pid);
322 if (ret >= 0 && r != -ESRCH)
329 r = set_put(s, LONG_TO_PTR(pid));
349 int cg_migrate_recursive(
357 _cleanup_closedir_ DIR *d = NULL;
366 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
368 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
370 if (ret >= 0 && r != -ENOENT)
376 while ((r = cg_read_subgroup(d, &fn)) > 0) {
377 _cleanup_free_ char *p = NULL;
379 p = strjoin(pfrom, "/", fn, NULL);
388 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
389 if (r != 0 && ret >= 0)
393 if (r < 0 && ret >= 0)
397 r = cg_rmdir(cfrom, pfrom);
398 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
405 int cg_migrate_recursive_fallback(
420 r = cg_migrate_recursive(cfrom, pfrom, cto, pto, ignore_self, rem);
422 char prefix[strlen(pto) + 1];
424 /* This didn't work? Then let's try all prefixes of the destination */
426 PATH_FOREACH_PREFIX(prefix, pto) {
427 r = cg_migrate_recursive(cfrom, pfrom, cto, prefix, ignore_self, rem);
436 static const char *normalize_controller(const char *controller) {
440 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
442 else if (startswith(controller, "name="))
443 return controller + 5;
448 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
451 if (!isempty(controller)) {
452 if (!isempty(path) && !isempty(suffix))
453 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
454 else if (!isempty(path))
455 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
456 else if (!isempty(suffix))
457 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
459 t = strappend("/sys/fs/cgroup/", controller);
461 if (!isempty(path) && !isempty(suffix))
462 t = strjoin(path, "/", suffix, NULL);
463 else if (!isempty(path))
472 *fs = path_kill_slashes(t);
476 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
478 static thread_local bool good = false;
482 if (controller && !cg_controller_is_valid(controller, true))
485 if (_unlikely_(!good)) {
488 r = path_is_mount_point("/sys/fs/cgroup", false);
494 /* Cache this to save a few stat()s */
498 p = controller ? normalize_controller(controller) : NULL;
500 return join_path(p, path, suffix, fs);
503 static int check_hierarchy(const char *p) {
508 if (!filename_is_valid(p))
511 /* Check if this controller actually really exists */
512 cc = strjoina("/sys/fs/cgroup/", p);
513 if (laccess(cc, F_OK) < 0)
519 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
525 if (!cg_controller_is_valid(controller, true))
528 /* Normalize the controller syntax */
529 p = normalize_controller(controller);
531 /* Check if this controller actually really exists */
532 r = check_hierarchy(p);
536 return join_path(p, path, suffix, fs);
539 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
544 if (typeflag != FTW_DP)
547 if (ftwbuf->level < 1)
554 int cg_trim(const char *controller, const char *path, bool delete_root) {
555 _cleanup_free_ char *fs = NULL;
560 r = cg_get_path(controller, path, NULL, &fs);
565 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
566 r = errno ? -errno : -EIO;
569 if (rmdir(fs) < 0 && errno != ENOENT)
576 int cg_delete(const char *controller, const char *path) {
577 _cleanup_free_ char *parent = NULL;
582 r = path_get_parent(path, &parent);
586 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
587 return r == -ENOENT ? 0 : r;
590 int cg_create(const char *controller, const char *path) {
591 _cleanup_free_ char *fs = NULL;
594 r = cg_get_path_and_check(controller, path, NULL, &fs);
598 r = mkdir_parents(fs, 0755);
602 if (mkdir(fs, 0755) < 0) {
613 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
618 r = cg_create(controller, path);
622 q = cg_attach(controller, path, pid);
626 /* This does not remove the cgroup on failure */
630 int cg_attach(const char *controller, const char *path, pid_t pid) {
631 _cleanup_free_ char *fs = NULL;
632 char c[DECIMAL_STR_MAX(pid_t) + 2];
638 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
645 snprintf(c, sizeof(c), PID_FMT"\n", pid);
647 return write_string_file_no_create(fs, c);
650 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
657 r = cg_attach(controller, path, pid);
659 char prefix[strlen(path) + 1];
661 /* This didn't work? Then let's try all prefixes of
664 PATH_FOREACH_PREFIX(prefix, path) {
665 r = cg_attach(controller, prefix, pid);
674 int cg_set_group_access(
675 const char *controller,
681 _cleanup_free_ char *fs = NULL;
686 if (mode != MODE_INVALID)
689 r = cg_get_path(controller, path, NULL, &fs);
693 return chmod_and_chown(fs, mode, uid, gid);
696 int cg_set_task_access(
697 const char *controller,
703 _cleanup_free_ char *fs = NULL, *procs = NULL;
708 if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
711 if (mode != MODE_INVALID)
714 r = cg_get_path(controller, path, "cgroup.procs", &fs);
718 r = chmod_and_chown(fs, mode, uid, gid);
722 /* Compatibility, Always keep values for "tasks" in sync with
724 r = cg_get_path(controller, path, "tasks", &procs);
728 return chmod_and_chown(procs, mode, uid, gid);
731 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
732 _cleanup_fclose_ FILE *f = NULL;
741 if (!cg_controller_is_valid(controller, true))
744 controller = normalize_controller(controller);
746 controller = SYSTEMD_CGROUP_CONTROLLER;
748 fs = procfs_file_alloca(pid, "cgroup");
752 return errno == ENOENT ? -ESRCH : -errno;
754 cs = strlen(controller);
756 FOREACH_LINE(line, f, return -errno) {
759 const char *word, *state;
764 l = strchr(line, ':');
775 FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
777 if (k == cs && memcmp(word, controller, cs) == 0) {
783 memcmp(word, "name=", 5) == 0 &&
784 memcmp(word+5, controller, cs) == 0) {
804 int cg_install_release_agent(const char *controller, const char *agent) {
805 _cleanup_free_ char *fs = NULL, *contents = NULL;
811 r = cg_get_path(controller, NULL, "release_agent", &fs);
815 r = read_one_line_file(fs, &contents);
819 sc = strstrip(contents);
821 r = write_string_file_no_create(fs, agent);
824 } else if (!streq(sc, agent))
829 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
835 r = read_one_line_file(fs, &contents);
839 sc = strstrip(contents);
840 if (streq(sc, "0")) {
841 r = write_string_file_no_create(fs, "1");
854 int cg_uninstall_release_agent(const char *controller) {
855 _cleanup_free_ char *fs = NULL;
858 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
862 r = write_string_file_no_create(fs, "0");
869 r = cg_get_path(controller, NULL, "release_agent", &fs);
873 r = write_string_file_no_create(fs, "");
880 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
881 _cleanup_fclose_ FILE *f = NULL;
882 pid_t pid = 0, self_pid;
888 r = cg_enumerate_processes(controller, path, &f);
890 return r == -ENOENT ? 1 : r;
894 while ((r = cg_read_pid(f, &pid)) > 0) {
896 if (ignore_self && pid == self_pid)
909 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
910 _cleanup_closedir_ DIR *d = NULL;
916 r = cg_is_empty(controller, path, ignore_self);
920 r = cg_enumerate_subgroups(controller, path, &d);
922 return r == -ENOENT ? 1 : r;
924 while ((r = cg_read_subgroup(d, &fn)) > 0) {
925 _cleanup_free_ char *p = NULL;
927 p = strjoin(path, "/", fn, NULL);
932 r = cg_is_empty_recursive(controller, p, ignore_self);
943 int cg_split_spec(const char *spec, char **controller, char **path) {
945 char *t = NULL, *u = NULL;
946 _cleanup_free_ char *v = NULL;
951 if (!path_is_safe(spec))
959 *path = path_kill_slashes(t);
968 e = strchr(spec, ':');
970 if (!cg_controller_is_valid(spec, true))
974 t = strdup(normalize_controller(spec));
987 v = strndup(spec, e-spec);
990 t = strdup(normalize_controller(v));
993 if (!cg_controller_is_valid(t, true)) {
998 if (streq(e+1, "")) {
1011 if (!path_is_safe(u) ||
1012 !path_is_absolute(u)) {
1018 path_kill_slashes(u);
1034 int cg_mangle_path(const char *path, char **result) {
1035 _cleanup_free_ char *c = NULL, *p = NULL;
1042 /* First, check if it already is a filesystem path */
1043 if (path_startswith(path, "/sys/fs/cgroup")) {
1049 *result = path_kill_slashes(t);
1053 /* Otherwise, treat it as cg spec */
1054 r = cg_split_spec(path, &c, &p);
1058 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1061 int cg_get_root_path(char **path) {
1064 return cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, path);
1067 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1068 _cleanup_free_ char *rt = NULL;
1076 /* If the root was specified let's use that, otherwise
1077 * let's determine it from PID 1 */
1079 r = cg_get_root_path(&rt);
1086 p = path_startswith(cgroup, root);
1095 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1096 _cleanup_free_ char *raw = NULL;
1103 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1107 r = cg_shift_path(raw, root, &c);
1127 int cg_path_get_session(const char *path, char **session) {
1128 const char *e, *n, *s;
1130 /* Elogind uses a flat hierarchy, just "/SESSION". The only
1131 wrinkle is that SESSION might be escaped. */
1134 assert(path[0] == '/');
1137 n = strchrnul(e, '/');
1141 s = strndupa(e, n - e);
1160 int cg_pid_get_session(pid_t pid, char **session) {
1161 _cleanup_free_ char *cgroup = NULL;
1164 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1168 return cg_path_get_session(cgroup, session);
1171 char *cg_escape(const char *p) {
1172 bool need_prefix = false;
1174 /* This implements very minimal escaping for names to be used
1175 * as file names in the cgroup tree: any name which might
1176 * conflict with a kernel name or is prefixed with '_' is
1177 * prefixed with a '_'. That way, when reading cgroup names it
1178 * is sufficient to remove a single prefixing underscore if
1181 /* The return value of this function (unlike cg_unescape())
1187 streq(p, "notify_on_release") ||
1188 streq(p, "release_agent") ||
1194 dot = strrchr(p, '.');
1197 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1202 n = strndupa(p, dot - p);
1204 if (check_hierarchy(n) >= 0)
1211 return strappend("_", p);
1216 char *cg_unescape(const char *p) {
1219 /* The return value of this function (unlike cg_escape())
1220 * doesn't need free()! */
1228 #define CONTROLLER_VALID \
1232 bool cg_controller_is_valid(const char *p, bool allow_named) {
1239 s = startswith(p, "name=");
1244 if (*p == 0 || *p == '_')
1247 for (t = p; *t; t++)
1248 if (!strchr(CONTROLLER_VALID, *t))
1251 if (t - p > FILENAME_MAX)
1257 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1258 _cleanup_free_ char *p = NULL;
1261 r = cg_get_path(controller, path, attribute, &p);
1265 return write_string_file_no_create(p, value);
1268 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1269 _cleanup_free_ char *p = NULL;
1272 r = cg_get_path(controller, path, attribute, &p);
1276 return read_one_line_file(p, ret);
1279 static const char mask_names[] =
1286 int cg_create_everywhere(CGroupControllerMask supported, CGroupControllerMask mask, const char *path) {
1287 CGroupControllerMask bit = 1;
1291 /* This one will create a cgroup in our private tree, but also
1292 * duplicate it in the trees specified in mask, and remove it
1295 /* First create the cgroup in our own hierarchy. */
1296 r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1300 /* Then, do the same in the other hierarchies */
1301 NULSTR_FOREACH(n, mask_names) {
1304 else if (supported & bit)
1305 cg_trim(n, path, true);
1313 int cg_attach_everywhere(CGroupControllerMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1314 CGroupControllerMask bit = 1;
1318 r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1322 NULSTR_FOREACH(n, mask_names) {
1324 if (supported & bit) {
1325 const char *p = NULL;
1328 p = path_callback(bit, userdata);
1333 cg_attach_fallback(n, path, pid);
1342 int cg_attach_many_everywhere(CGroupControllerMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1347 SET_FOREACH(pidp, pids, i) {
1348 pid_t pid = PTR_TO_LONG(pidp);
1351 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1359 int cg_migrate_everywhere(CGroupControllerMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1360 CGroupControllerMask bit = 1;
1364 if (!path_equal(from, to)) {
1365 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, false, true);
1370 NULSTR_FOREACH(n, mask_names) {
1371 if (supported & bit) {
1372 const char *p = NULL;
1375 p = to_callback(bit, userdata);
1380 cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, n, p, false, false);
1389 int cg_trim_everywhere(CGroupControllerMask supported, const char *path, bool delete_root) {
1390 CGroupControllerMask bit = 1;
1394 r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
1398 NULSTR_FOREACH(n, mask_names) {
1399 if (supported & bit)
1400 cg_trim(n, path, delete_root);
1408 CGroupControllerMask cg_mask_supported(void) {
1409 CGroupControllerMask bit = 1, mask = 0;
1412 NULSTR_FOREACH(n, mask_names) {
1413 if (check_hierarchy(n) >= 0)
1422 int cg_kernel_controllers(Set *controllers) {
1423 _cleanup_fclose_ FILE *f = NULL;
1427 assert(controllers);
1429 f = fopen("/proc/cgroups", "re");
1431 if (errno == ENOENT)
1436 /* Ignore the header line */
1437 (void) fgets(buf, sizeof(buf), f);
1444 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
1449 if (ferror(f) && errno)
1460 if (!filename_is_valid(controller)) {
1465 r = set_consume(controllers, controller);