1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
42 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
43 _cleanup_free_ char *fs = NULL;
49 r = cg_get_path(controller, path, "cgroup.procs", &fs);
61 int cg_read_pid(FILE *f, pid_t *_pid) {
64 /* Note that the cgroup.procs might contain duplicates! See
65 * cgroups.txt for details. */
71 if (fscanf(f, "%lu", &ul) != 1) {
76 return errno ? -errno : -EIO;
86 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
87 _cleanup_free_ char *fs = NULL;
93 /* This is not recursive! */
95 r = cg_get_path(controller, path, NULL, &fs);
107 int cg_read_subgroup(DIR *d, char **fn) {
113 FOREACH_DIRENT(de, d, return -errno) {
116 if (de->d_type != DT_DIR)
119 if (streq(de->d_name, ".") ||
120 streq(de->d_name, ".."))
123 b = strdup(de->d_name);
134 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
135 _cleanup_free_ char *p = NULL;
138 r = cg_get_path(controller, path, NULL, &p);
145 /* If the sticky bit is set on cgroup.procs, don't
146 * remove the directory */
148 fn = strappend(p, "/cgroup.procs");
152 r = file_is_priv_sticky(fn);
158 /* Compatibility ... */
159 fn = strappend(p, "/tasks");
163 r = file_is_priv_sticky(fn);
171 if (r < 0 && errno != ENOENT)
177 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
178 _cleanup_set_free_ Set *allocated_set = NULL;
185 /* This goes through the tasks list and kills them all. This
186 * is repeated until no further processes are added to the
187 * tasks list, to properly handle forking processes */
190 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
198 _cleanup_fclose_ FILE *f = NULL;
202 r = cg_enumerate_processes(controller, path, &f);
204 if (ret >= 0 && r != -ENOENT)
210 while ((r = cg_read_pid(f, &pid)) > 0) {
212 if (ignore_self && pid == my_pid)
215 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
218 /* If we haven't killed this process yet, kill
220 if (kill(pid, sig) < 0) {
221 if (ret >= 0 && errno != ESRCH)
223 } else if (ret == 0) {
233 r = set_put(s, LONG_TO_PTR(pid));
249 /* To avoid racing against processes which fork
250 * quicker than we can kill them we repeat this until
251 * no new pids need to be killed. */
258 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
259 _cleanup_set_free_ Set *allocated_set = NULL;
260 _cleanup_closedir_ DIR *d = NULL;
268 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
273 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
275 r = cg_enumerate_subgroups(controller, path, &d);
277 if (ret >= 0 && r != -ENOENT)
283 while ((r = cg_read_subgroup(d, &fn)) > 0) {
284 _cleanup_free_ char *p = NULL;
286 p = strjoin(path, "/", fn, NULL);
291 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
292 if (ret >= 0 && r != 0)
296 if (ret >= 0 && r < 0)
300 r = cg_rmdir(controller, path, true);
301 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
308 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
313 /* This safely kills all processes; first it sends a SIGTERM,
314 * then checks 8 times after 200ms whether the group is now
315 * empty, then kills everything that is left with SIGKILL and
316 * finally checks 5 times after 200ms each whether the group
317 * is finally empty. */
319 for (i = 0; i < 15; i++) {
329 r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL);
333 usleep(200 * USEC_PER_MSEC);
339 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
341 _cleanup_set_free_ Set *s = NULL;
350 s = set_new(trivial_hash_func, trivial_compare_func);
357 _cleanup_fclose_ FILE *f = NULL;
361 r = cg_enumerate_processes(cfrom, pfrom, &f);
363 if (ret >= 0 && r != -ENOENT)
369 while ((r = cg_read_pid(f, &pid)) > 0) {
371 /* This might do weird stuff if we aren't a
372 * single-threaded program. However, we
373 * luckily know we are not */
374 if (ignore_self && pid == my_pid)
377 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
380 r = cg_attach(cto, pto, pid);
382 if (ret >= 0 && r != -ESRCH)
389 r = set_put(s, LONG_TO_PTR(pid));
409 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
410 _cleanup_closedir_ DIR *d = NULL;
419 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
421 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
423 if (ret >= 0 && r != -ENOENT)
429 while ((r = cg_read_subgroup(d, &fn)) > 0) {
430 _cleanup_free_ char *p = NULL;
432 p = strjoin(pfrom, "/", fn, NULL);
441 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
442 if (r != 0 && ret >= 0)
446 if (r < 0 && ret >= 0)
450 r = cg_rmdir(cfrom, pfrom, true);
451 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
458 static const char *normalize_controller(const char *controller) {
462 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
464 else if (startswith(controller, "name="))
465 return controller + 5;
470 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
475 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
477 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
479 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
481 t = strappend("/sys/fs/cgroup/", controller);
484 t = strjoin(path, "/", suffix, NULL);
494 path_kill_slashes(t);
500 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
502 static __thread bool good = false;
506 if (controller && !cg_controller_is_valid(controller, true))
509 if (_unlikely_(!good)) {
512 r = path_is_mount_point("/sys/fs/cgroup", false);
514 return r < 0 ? r : -ENOENT;
516 /* Cache this to save a few stat()s */
520 p = controller ? normalize_controller(controller) : NULL;
522 return join_path(p, path, suffix, fs);
525 static int check_hierarchy(const char *p) {
530 /* Check if this controller actually really exists */
531 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
532 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
533 if (access(cc, F_OK) < 0)
539 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
545 if (!cg_controller_is_valid(controller, true))
548 /* Normalize the controller syntax */
549 p = normalize_controller(controller);
551 /* Check if this controller actually really exists */
552 r = check_hierarchy(p);
556 return join_path(p, path, suffix, fs);
559 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
563 if (typeflag != FTW_DP)
566 if (ftwbuf->level < 1)
569 p = strappend(path, "/cgroup.procs");
575 is_sticky = file_is_priv_sticky(p) > 0;
582 p = strappend(path, "/tasks");
588 is_sticky = file_is_priv_sticky(p) > 0;
598 int cg_trim(const char *controller, const char *path, bool delete_root) {
599 _cleanup_free_ char *fs = NULL;
604 r = cg_get_path(controller, path, NULL, &fs);
609 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
610 r = errno ? -errno : -EIO;
616 p = strappend(fs, "/cgroup.procs");
620 is_sticky = file_is_priv_sticky(p) > 0;
624 p = strappend(fs, "/tasks");
628 is_sticky = file_is_priv_sticky(p) > 0;
633 if (rmdir(fs) < 0 && errno != ENOENT && r == 0)
640 int cg_delete(const char *controller, const char *path) {
641 _cleanup_free_ char *parent = NULL;
646 r = path_get_parent(path, &parent);
650 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
651 return r == -ENOENT ? 0 : r;
654 int cg_attach(const char *controller, const char *path, pid_t pid) {
655 _cleanup_free_ char *fs = NULL;
656 char c[DECIMAL_STR_MAX(pid_t) + 2];
662 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
669 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
671 return write_string_file(fs, c);
674 int cg_set_group_access(
675 const char *controller,
681 _cleanup_free_ char *fs = NULL;
686 if (mode != (mode_t) -1)
689 r = cg_get_path(controller, path, NULL, &fs);
693 return chmod_and_chown(fs, mode, uid, gid);
696 int cg_set_task_access(
697 const char *controller,
704 _cleanup_free_ char *fs = NULL, *procs = NULL;
709 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
712 if (mode != (mode_t) -1)
715 r = cg_get_path(controller, path, "cgroup.procs", &fs);
719 if (sticky >= 0 && mode != (mode_t) -1)
720 /* Both mode and sticky param are passed */
721 mode |= (sticky ? S_ISVTX : 0);
722 else if ((sticky >= 0 && mode == (mode_t) -1) ||
723 (mode != (mode_t) -1 && sticky < 0)) {
726 /* Only one param is passed, hence read the current
727 * mode from the file itself */
733 if (mode == (mode_t) -1)
734 /* No mode set, we just shall set the sticky bit */
735 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
737 /* Only mode set, leave sticky bit untouched */
738 mode = (st.st_mode & ~0777) | mode;
741 r = chmod_and_chown(fs, mode, uid, gid);
745 /* Compatibility, Always keep values for "tasks" in sync with
747 r = cg_get_path(controller, path, "tasks", &procs);
751 return chmod_and_chown(procs, mode, uid, gid);
754 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
755 _cleanup_fclose_ FILE *f = NULL;
764 if (!cg_controller_is_valid(controller, true))
767 controller = normalize_controller(controller);
769 controller = SYSTEMD_CGROUP_CONTROLLER;
772 fs = "/proc/self/cgroup";
774 fs = procfs_file_alloca(pid, "cgroup");
778 return errno == ENOENT ? -ESRCH : -errno;
780 cs = strlen(controller);
782 FOREACH_LINE(line, f, return -errno) {
790 l = strchr(line, ':');
801 FOREACH_WORD_SEPARATOR(w, k, l, ",", state) {
803 if (k == cs && memcmp(w, controller, cs) == 0) {
809 memcmp(w, "name=", 5) == 0 &&
810 memcmp(w+5, controller, cs) == 0) {
830 int cg_install_release_agent(const char *controller, const char *agent) {
831 _cleanup_free_ char *fs = NULL, *contents = NULL;
837 r = cg_get_path(controller, NULL, "release_agent", &fs);
841 r = read_one_line_file(fs, &contents);
845 sc = strstrip(contents);
847 r = write_string_file(fs, agent);
850 } else if (!streq(sc, agent))
855 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
861 r = read_one_line_file(fs, &contents);
865 sc = strstrip(contents);
866 if (streq(sc, "0")) {
867 r = write_string_file(fs, "1");
880 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
881 _cleanup_fclose_ FILE *f = NULL;
882 pid_t pid = 0, self_pid;
888 r = cg_enumerate_processes(controller, path, &f);
890 return r == -ENOENT ? 1 : r;
894 while ((r = cg_read_pid(f, &pid)) > 0) {
896 if (ignore_self && pid == self_pid)
909 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
910 _cleanup_free_ char *controller = NULL, *path = NULL;
915 r = cg_split_spec(spec, &controller, &path);
919 return cg_is_empty(controller, path, ignore_self);
922 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
923 _cleanup_closedir_ DIR *d = NULL;
929 r = cg_is_empty(controller, path, ignore_self);
933 r = cg_enumerate_subgroups(controller, path, &d);
935 return r == -ENOENT ? 1 : r;
937 while ((r = cg_read_subgroup(d, &fn)) > 0) {
938 _cleanup_free_ char *p = NULL;
940 p = strjoin(path, "/", fn, NULL);
945 r = cg_is_empty_recursive(controller, p, ignore_self);
956 int cg_split_spec(const char *spec, char **controller, char **path) {
958 char *t = NULL, *u = NULL;
959 _cleanup_free_ char *v = NULL;
964 if (!path_is_safe(spec))
972 path_kill_slashes(t);
982 e = strchr(spec, ':');
984 if (!cg_controller_is_valid(spec, true))
988 t = strdup(normalize_controller(spec));
1001 v = strndup(spec, e-spec);
1004 t = strdup(normalize_controller(v));
1007 if (!cg_controller_is_valid(t, true)) {
1017 if (!path_is_safe(u) ||
1018 !path_is_absolute(u)) {
1024 path_kill_slashes(u);
1039 int cg_join_spec(const char *controller, const char *path, char **spec) {
1045 controller = "systemd";
1047 if (!cg_controller_is_valid(controller, true))
1050 controller = normalize_controller(controller);
1053 if (!path_is_absolute(path))
1056 s = strjoin(controller, ":", path, NULL);
1060 path_kill_slashes(s + strlen(controller) + 1);
1066 int cg_mangle_path(const char *path, char **result) {
1067 _cleanup_free_ char *c = NULL, *p = NULL;
1074 /* First check if it already is a filesystem path */
1075 if (path_startswith(path, "/sys/fs/cgroup")) {
1081 path_kill_slashes(t);
1086 /* Otherwise treat it as cg spec */
1087 r = cg_split_spec(path, &c, &p);
1091 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1094 int cg_get_system_path(char **path) {
1100 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1102 p = strdup("/system");
1107 if (endswith(p, "/system"))
1112 q = strappend(p, "/system");
1123 int cg_get_root_path(char **path) {
1129 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &root);
1133 e = endswith(root, "/system");
1143 int cg_get_user_path(char **path) {
1144 _cleanup_free_ char *root = NULL;
1149 /* Figure out the place to put user cgroups below. We use the
1150 * same as PID 1 has but with the "/system" suffix replaced by
1153 if (cg_get_root_path(&root) < 0 || streq(root, "/"))
1154 p = strdup("/user");
1156 p = strappend(root, "/user");
1165 int cg_get_machine_path(const char *machine, char **path) {
1166 _cleanup_free_ char *root = NULL, *escaped = NULL;
1172 const char *name = strappenda(machine, ".nspawn");
1174 escaped = cg_escape(name);
1179 p = strjoin(cg_get_root_path(&root) >= 0 && !streq(root, "/") ? root : "",
1180 "/machine", machine ? "/" : "", machine ? escaped : "", NULL);
1188 char **cg_shorten_controllers(char **controllers) {
1194 for (f = controllers, t = controllers; *f; f++) {
1198 p = normalize_controller(*f);
1200 if (streq(p, "systemd")) {
1205 if (!cg_controller_is_valid(p, true)) {
1206 log_warning("Controller %s is not valid, removing from controllers list.", p);
1211 r = check_hierarchy(p);
1213 log_debug("Controller %s is not available, removing from controllers list.", p);
1222 return strv_uniq(controllers);
1225 int cg_pid_get_path_shifted(pid_t pid, char **root, char **cgroup) {
1226 _cleanup_free_ char *cg_root = NULL;
1227 char *cg_process, *p;
1230 r = cg_get_root_path(&cg_root);
1234 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1238 p = path_startswith(cg_process, cg_root);
1257 cg_process[p-cg_process] = 0;
1265 int cg_path_decode_unit(const char *cgroup, char **unit){
1266 char *p, *e, *c, *s, *k;
1271 e = strchrnul(cgroup, '/');
1272 c = strndupa(cgroup, e - cgroup);
1275 /* Could this be a valid unit name? */
1276 if (!unit_name_is_valid(c, true))
1279 if (!unit_name_is_template(c))
1285 e += strspn(e, "/");
1287 p = strchrnul(e, '/');
1288 k = strndupa(e, p - e);
1291 if (!unit_name_is_valid(k, false))
1304 int cg_path_get_unit(const char *path, char **unit) {
1310 e = path_startswith(path, "/system/");
1314 return cg_path_decode_unit(e, unit);
1317 int cg_pid_get_unit(pid_t pid, char **unit) {
1318 _cleanup_free_ char *cgroup = NULL;
1323 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1327 return cg_path_get_unit(cgroup, unit);
1330 _pure_ static const char *skip_label(const char *e) {
1337 e += strspn(e, "/");
1341 int cg_path_get_user_unit(const char *path, char **unit) {
1347 /* We always have to parse the path from the beginning as unit
1348 * cgroups might have arbitrary child cgroups and we shouldn't get
1349 * confused by those */
1351 e = path_startswith(path, "/user/");
1355 /* Skip the user name */
1360 /* Skip the session ID */
1365 /* Skip the systemd cgroup */
1370 return cg_path_decode_unit(e, unit);
1373 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1374 _cleanup_free_ char *cgroup = NULL;
1379 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1383 return cg_path_get_user_unit(cgroup, unit);
1386 int cg_path_get_machine_name(const char *path, char **machine) {
1393 e = path_startswith(path, "/machine/");
1397 n = strchrnul(e, '/');
1401 s = strndupa(e, n - e);
1403 r = strdup(cg_unescape(s));
1411 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1412 _cleanup_free_ char *cgroup = NULL;
1417 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1421 return cg_path_get_machine_name(cgroup, machine);
1424 int cg_path_get_session(const char *path, char **session) {
1431 e = path_startswith(path, "/user/");
1435 /* Skip the user name */
1440 n = strchrnul(e, '/');
1443 if (memcmp(n - 8, ".session", 8) != 0)
1446 s = strndup(e, n - e - 8);
1454 int cg_pid_get_session(pid_t pid, char **session) {
1455 _cleanup_free_ char *cgroup = NULL;
1460 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1464 return cg_path_get_session(cgroup, session);
1467 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1474 e = path_startswith(path, "/user/");
1478 n = strchrnul(e, '/');
1481 if (memcmp(n - 5, ".user", 5) != 0)
1484 s = strndupa(e, n - e - 5);
1488 return parse_uid(s, uid);
1491 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1492 _cleanup_free_ char *cgroup = NULL;
1497 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1501 return cg_path_get_owner_uid(cgroup, uid);
1504 int cg_controller_from_attr(const char *attr, char **controller) {
1511 if (!filename_is_safe(attr))
1514 dot = strchr(attr, '.');
1520 c = strndup(attr, dot - attr);
1524 if (!cg_controller_is_valid(c, false)) {
1533 char *cg_escape(const char *p) {
1534 bool need_prefix = false;
1536 /* This implements very minimal escaping for names to be used
1537 * as file names in the cgroup tree: any name which might
1538 * conflict with a kernel name or is prefixed with '_' is
1539 * prefixed with a '_'. That way, when reading cgroup names it
1540 * is sufficient to remove a single prefixing underscore if
1543 /* The return value of this function (unlike cg_unescape())
1549 streq(p, "notify_on_release") ||
1550 streq(p, "release_agent") ||
1556 dot = strrchr(p, '.');
1559 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1564 n = strndupa(p, dot - p);
1566 if (check_hierarchy(n) >= 0)
1573 return strappend("_", p);
1578 char *cg_unescape(const char *p) {
1581 /* The return value of this function (unlike cg_escape())
1582 * doesn't need free()! */
1590 #define CONTROLLER_VALID \
1592 "abcdefghijklmnopqrstuvwxyz" \
1593 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
1596 bool cg_controller_is_valid(const char *p, bool allow_named) {
1603 s = startswith(p, "name=");
1608 if (*p == 0 || *p == '_')
1611 for (t = p; *t; t++)
1612 if (!strchr(CONTROLLER_VALID, *t))
1615 if (t - p > FILENAME_MAX)
1621 int cg_slice_to_path(const char *unit, char **ret) {
1622 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1628 if (!unit_name_is_valid(unit, false))
1631 if (!endswith(unit, ".slice"))
1634 p = unit_name_to_prefix(unit);
1638 dash = strchr(p, '-');
1640 _cleanup_free_ char *escaped = NULL;
1641 char n[dash - p + sizeof(".slice")];
1643 strcpy(stpncpy(n, p, dash - p), ".slice");
1645 if (!unit_name_is_valid(n, false))
1648 escaped = cg_escape(n);
1652 if (!strextend(&s, escaped, "/", NULL))
1655 dash = strchr(dash+1, '-');
1658 e = cg_escape(unit);
1662 if (!strextend(&s, e, NULL))