1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
43 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
44 _cleanup_free_ char *fs = NULL;
50 r = cg_get_path(controller, path, "cgroup.procs", &fs);
62 int cg_read_pid(FILE *f, pid_t *_pid) {
65 /* Note that the cgroup.procs might contain duplicates! See
66 * cgroups.txt for details. */
72 if (fscanf(f, "%lu", &ul) != 1) {
77 return errno ? -errno : -EIO;
87 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
88 _cleanup_free_ char *fs = NULL;
94 /* This is not recursive! */
96 r = cg_get_path(controller, path, NULL, &fs);
108 int cg_read_subgroup(DIR *d, char **fn) {
114 FOREACH_DIRENT(de, d, return -errno) {
117 if (de->d_type != DT_DIR)
120 if (streq(de->d_name, ".") ||
121 streq(de->d_name, ".."))
124 b = strdup(de->d_name);
135 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
136 _cleanup_free_ char *p = NULL;
139 r = cg_get_path(controller, path, NULL, &p);
146 /* If the sticky bit is set on cgroup.procs, don't
147 * remove the directory */
149 fn = strappend(p, "/cgroup.procs");
153 r = file_is_priv_sticky(fn);
159 /* Compatibility ... */
160 fn = strappend(p, "/tasks");
164 r = file_is_priv_sticky(fn);
172 if (r < 0 && errno != ENOENT)
178 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
179 _cleanup_set_free_ Set *allocated_set = NULL;
186 /* This goes through the tasks list and kills them all. This
187 * is repeated until no further processes are added to the
188 * tasks list, to properly handle forking processes */
191 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
199 _cleanup_fclose_ FILE *f = NULL;
203 r = cg_enumerate_processes(controller, path, &f);
205 if (ret >= 0 && r != -ENOENT)
211 while ((r = cg_read_pid(f, &pid)) > 0) {
213 if (ignore_self && pid == my_pid)
216 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
219 /* If we haven't killed this process yet, kill
221 if (kill(pid, sig) < 0) {
222 if (ret >= 0 && errno != ESRCH)
224 } else if (ret == 0) {
234 r = set_put(s, LONG_TO_PTR(pid));
250 /* To avoid racing against processes which fork
251 * quicker than we can kill them we repeat this until
252 * no new pids need to be killed. */
259 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
260 _cleanup_set_free_ Set *allocated_set = NULL;
261 _cleanup_closedir_ DIR *d = NULL;
269 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
274 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
276 r = cg_enumerate_subgroups(controller, path, &d);
278 if (ret >= 0 && r != -ENOENT)
284 while ((r = cg_read_subgroup(d, &fn)) > 0) {
285 _cleanup_free_ char *p = NULL;
287 p = strjoin(path, "/", fn, NULL);
292 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
293 if (ret >= 0 && r != 0)
297 if (ret >= 0 && r < 0)
301 r = cg_rmdir(controller, path, true);
302 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
309 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
314 /* This safely kills all processes; first it sends a SIGTERM,
315 * then checks 8 times after 200ms whether the group is now
316 * empty, then kills everything that is left with SIGKILL and
317 * finally checks 5 times after 200ms each whether the group
318 * is finally empty. */
320 for (i = 0; i < 15; i++) {
330 r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL);
334 usleep(200 * USEC_PER_MSEC);
340 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
342 _cleanup_set_free_ Set *s = NULL;
351 s = set_new(trivial_hash_func, trivial_compare_func);
358 _cleanup_fclose_ FILE *f = NULL;
362 r = cg_enumerate_processes(cfrom, pfrom, &f);
364 if (ret >= 0 && r != -ENOENT)
370 while ((r = cg_read_pid(f, &pid)) > 0) {
372 /* This might do weird stuff if we aren't a
373 * single-threaded program. However, we
374 * luckily know we are not */
375 if (ignore_self && pid == my_pid)
378 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
381 r = cg_attach(cto, pto, pid);
383 if (ret >= 0 && r != -ESRCH)
390 r = set_put(s, LONG_TO_PTR(pid));
410 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
411 _cleanup_closedir_ DIR *d = NULL;
420 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
422 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
424 if (ret >= 0 && r != -ENOENT)
430 while ((r = cg_read_subgroup(d, &fn)) > 0) {
431 _cleanup_free_ char *p = NULL;
433 p = strjoin(pfrom, "/", fn, NULL);
442 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
443 if (r != 0 && ret >= 0)
447 if (r < 0 && ret >= 0)
451 r = cg_rmdir(cfrom, pfrom, true);
452 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
459 static const char *normalize_controller(const char *controller) {
463 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
465 else if (startswith(controller, "name="))
466 return controller + 5;
471 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
474 if (!isempty(controller)) {
475 if (!isempty(path) && !isempty(suffix))
476 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
477 else if (!isempty(path))
478 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
479 else if (!isempty(suffix))
480 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
482 t = strappend("/sys/fs/cgroup/", controller);
484 if (!isempty(path) && !isempty(suffix))
485 t = strjoin(path, "/", suffix, NULL);
486 else if (!isempty(path))
495 path_kill_slashes(t);
501 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
503 static __thread bool good = false;
507 if (controller && !cg_controller_is_valid(controller, true))
510 if (_unlikely_(!good)) {
513 r = path_is_mount_point("/sys/fs/cgroup", false);
515 return r < 0 ? r : -ENOENT;
517 /* Cache this to save a few stat()s */
521 p = controller ? normalize_controller(controller) : NULL;
523 return join_path(p, path, suffix, fs);
526 static int check_hierarchy(const char *p) {
531 /* Check if this controller actually really exists */
532 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
533 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
534 if (access(cc, F_OK) < 0)
540 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
546 if (!cg_controller_is_valid(controller, true))
549 /* Normalize the controller syntax */
550 p = normalize_controller(controller);
552 /* Check if this controller actually really exists */
553 r = check_hierarchy(p);
557 return join_path(p, path, suffix, fs);
560 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
564 if (typeflag != FTW_DP)
567 if (ftwbuf->level < 1)
570 p = strappend(path, "/cgroup.procs");
576 is_sticky = file_is_priv_sticky(p) > 0;
583 p = strappend(path, "/tasks");
589 is_sticky = file_is_priv_sticky(p) > 0;
599 int cg_trim(const char *controller, const char *path, bool delete_root) {
600 _cleanup_free_ char *fs = NULL;
605 r = cg_get_path(controller, path, NULL, &fs);
610 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
611 r = errno ? -errno : -EIO;
617 p = strappend(fs, "/cgroup.procs");
621 is_sticky = file_is_priv_sticky(p) > 0;
625 p = strappend(fs, "/tasks");
629 is_sticky = file_is_priv_sticky(p) > 0;
634 if (rmdir(fs) < 0 && errno != ENOENT && r == 0)
641 int cg_delete(const char *controller, const char *path) {
642 _cleanup_free_ char *parent = NULL;
647 r = path_get_parent(path, &parent);
651 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
652 return r == -ENOENT ? 0 : r;
655 int cg_attach(const char *controller, const char *path, pid_t pid) {
656 _cleanup_free_ char *fs = NULL;
657 char c[DECIMAL_STR_MAX(pid_t) + 2];
663 r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
670 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
672 return write_string_file(fs, c);
675 int cg_set_group_access(
676 const char *controller,
682 _cleanup_free_ char *fs = NULL;
687 if (mode != (mode_t) -1)
690 r = cg_get_path(controller, path, NULL, &fs);
694 return chmod_and_chown(fs, mode, uid, gid);
697 int cg_set_task_access(
698 const char *controller,
705 _cleanup_free_ char *fs = NULL, *procs = NULL;
710 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
713 if (mode != (mode_t) -1)
716 r = cg_get_path(controller, path, "cgroup.procs", &fs);
720 if (sticky >= 0 && mode != (mode_t) -1)
721 /* Both mode and sticky param are passed */
722 mode |= (sticky ? S_ISVTX : 0);
723 else if ((sticky >= 0 && mode == (mode_t) -1) ||
724 (mode != (mode_t) -1 && sticky < 0)) {
727 /* Only one param is passed, hence read the current
728 * mode from the file itself */
734 if (mode == (mode_t) -1)
735 /* No mode set, we just shall set the sticky bit */
736 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
738 /* Only mode set, leave sticky bit untouched */
739 mode = (st.st_mode & ~0777) | mode;
742 r = chmod_and_chown(fs, mode, uid, gid);
746 /* Compatibility, Always keep values for "tasks" in sync with
748 r = cg_get_path(controller, path, "tasks", &procs);
752 return chmod_and_chown(procs, mode, uid, gid);
755 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
756 _cleanup_fclose_ FILE *f = NULL;
765 if (!cg_controller_is_valid(controller, true))
768 controller = normalize_controller(controller);
770 controller = SYSTEMD_CGROUP_CONTROLLER;
773 fs = "/proc/self/cgroup";
775 fs = procfs_file_alloca(pid, "cgroup");
779 return errno == ENOENT ? -ESRCH : -errno;
781 cs = strlen(controller);
783 FOREACH_LINE(line, f, return -errno) {
791 l = strchr(line, ':');
802 FOREACH_WORD_SEPARATOR(w, k, l, ",", state) {
804 if (k == cs && memcmp(w, controller, cs) == 0) {
810 memcmp(w, "name=", 5) == 0 &&
811 memcmp(w+5, controller, cs) == 0) {
831 int cg_install_release_agent(const char *controller, const char *agent) {
832 _cleanup_free_ char *fs = NULL, *contents = NULL;
838 r = cg_get_path(controller, NULL, "release_agent", &fs);
842 r = read_one_line_file(fs, &contents);
846 sc = strstrip(contents);
848 r = write_string_file(fs, agent);
851 } else if (!streq(sc, agent))
856 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
862 r = read_one_line_file(fs, &contents);
866 sc = strstrip(contents);
867 if (streq(sc, "0")) {
868 r = write_string_file(fs, "1");
881 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
882 _cleanup_fclose_ FILE *f = NULL;
883 pid_t pid = 0, self_pid;
889 r = cg_enumerate_processes(controller, path, &f);
891 return r == -ENOENT ? 1 : r;
895 while ((r = cg_read_pid(f, &pid)) > 0) {
897 if (ignore_self && pid == self_pid)
910 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
911 _cleanup_free_ char *controller = NULL, *path = NULL;
916 r = cg_split_spec(spec, &controller, &path);
920 return cg_is_empty(controller, path, ignore_self);
923 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
924 _cleanup_closedir_ DIR *d = NULL;
930 r = cg_is_empty(controller, path, ignore_self);
934 r = cg_enumerate_subgroups(controller, path, &d);
936 return r == -ENOENT ? 1 : r;
938 while ((r = cg_read_subgroup(d, &fn)) > 0) {
939 _cleanup_free_ char *p = NULL;
941 p = strjoin(path, "/", fn, NULL);
946 r = cg_is_empty_recursive(controller, p, ignore_self);
957 int cg_split_spec(const char *spec, char **controller, char **path) {
959 char *t = NULL, *u = NULL;
960 _cleanup_free_ char *v = NULL;
965 if (!path_is_safe(spec))
973 path_kill_slashes(t);
983 e = strchr(spec, ':');
985 if (!cg_controller_is_valid(spec, true))
989 t = strdup(normalize_controller(spec));
1002 v = strndup(spec, e-spec);
1005 t = strdup(normalize_controller(v));
1008 if (!cg_controller_is_valid(t, true)) {
1018 if (!path_is_safe(u) ||
1019 !path_is_absolute(u)) {
1025 path_kill_slashes(u);
1040 int cg_join_spec(const char *controller, const char *path, char **spec) {
1046 controller = "systemd";
1048 if (!cg_controller_is_valid(controller, true))
1051 controller = normalize_controller(controller);
1054 if (!path_is_absolute(path))
1057 s = strjoin(controller, ":", path, NULL);
1061 path_kill_slashes(s + strlen(controller) + 1);
1067 int cg_mangle_path(const char *path, char **result) {
1068 _cleanup_free_ char *c = NULL, *p = NULL;
1075 /* First check if it already is a filesystem path */
1076 if (path_startswith(path, "/sys/fs/cgroup")) {
1082 path_kill_slashes(t);
1087 /* Otherwise treat it as cg spec */
1088 r = cg_split_spec(path, &c, &p);
1092 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1095 int cg_get_root_path(char **path) {
1101 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1105 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE);
1113 char **cg_shorten_controllers(char **controllers) {
1119 for (f = controllers, t = controllers; *f; f++) {
1123 p = normalize_controller(*f);
1125 if (streq(p, "systemd")) {
1130 if (!cg_controller_is_valid(p, true)) {
1131 log_warning("Controller %s is not valid, removing from controllers list.", p);
1136 r = check_hierarchy(p);
1138 log_debug("Controller %s is not available, removing from controllers list.", p);
1147 return strv_uniq(controllers);
1150 int cg_pid_get_path_shifted(pid_t pid, char **root, char **cgroup) {
1151 _cleanup_free_ char *cg_root = NULL;
1152 char *cg_process, *p;
1155 r = cg_get_root_path(&cg_root);
1159 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1163 p = path_startswith(cg_process, cg_root);
1182 cg_process[p-cg_process] = 0;
1190 int cg_path_decode_unit(const char *cgroup, char **unit){
1191 char *p, *e, *c, *s, *k;
1196 e = strchrnul(cgroup, '/');
1197 c = strndupa(cgroup, e - cgroup);
1200 /* Could this be a valid unit name? */
1201 if (!unit_name_is_valid(c, true))
1204 if (!unit_name_is_template(c))
1210 e += strspn(e, "/");
1212 p = strchrnul(e, '/');
1213 k = strndupa(e, p - e);
1216 if (!unit_name_is_valid(k, false))
1229 static const char *skip_slices(const char *p) {
1232 /* Skips over all slice assignments */
1235 p += strspn(p, "/");
1237 n = strcspn(p, "/");
1238 if (n <= 6 || memcmp(p + n - 6, ".slice", 6) != 0)
1245 int cg_path_get_unit(const char *path, char **unit) {
1251 e = skip_slices(path);
1253 return cg_path_decode_unit(e, unit);
1256 int cg_pid_get_unit(pid_t pid, char **unit) {
1257 _cleanup_free_ char *cgroup = NULL;
1262 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1266 return cg_path_get_unit(cgroup, unit);
1269 static const char *skip_user(const char *p) {
1274 p += strspn(p, "/");
1276 n = strcspn(p, "/");
1277 if (n <= 5 || memcmp(p + n - 5, ".user", 5) != 0)
1281 p += strspn(p, "/");
1286 static const char *skip_session(const char *p) {
1291 p += strspn(p, "/");
1293 n = strcspn(p, "/");
1294 if (n <= 8 || memcmp(p + n - 8, ".session", 8) != 0)
1298 p += strspn(p, "/");
1303 static const char *skip_systemd_label(const char *p) {
1308 p += strspn(p, "/");
1310 n = strcspn(p, "/");
1311 if (n < 8 || memcmp(p, "systemd-", 8) != 0)
1315 p += strspn(p, "/");
1320 int cg_path_get_user_unit(const char *path, char **unit) {
1326 /* We always have to parse the path from the beginning as unit
1327 * cgroups might have arbitrary child cgroups and we shouldn't get
1328 * confused by those */
1330 /* Skip slices, if there are any */
1331 e = skip_slices(path);
1333 /* Skip the user name, if there is one */
1336 /* Skip the session ID, require that there is one */
1337 e = skip_session(e);
1341 /* Skip the systemd cgroup, if there is one */
1342 e = skip_systemd_label(e);
1344 return cg_path_decode_unit(e, unit);
1347 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1348 _cleanup_free_ char *cgroup = NULL;
1353 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1357 return cg_path_get_user_unit(cgroup, unit);
1360 int cg_path_get_machine_name(const char *path, char **machine) {
1361 const char *e, *n, *x;
1367 /* Skip slices, if there are any */
1368 e = skip_slices(path);
1370 n = strchrnul(e, '/');
1374 s = strndupa(e, n - e);
1377 x = endswith(s, ".machine");
1381 r = strndup(s, x - s);
1389 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1390 _cleanup_free_ char *cgroup = NULL;
1395 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1399 return cg_path_get_machine_name(cgroup, machine);
1402 int cg_path_get_session(const char *path, char **session) {
1409 /* Skip slices, if there are any */
1410 e = skip_slices(path);
1412 /* Skip the user name, if there is one */
1415 n = strchrnul(e, '/');
1418 if (memcmp(n - 8, ".session", 8) != 0)
1421 s = strndup(e, n - e - 8);
1429 int cg_pid_get_session(pid_t pid, char **session) {
1430 _cleanup_free_ char *cgroup = NULL;
1435 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1439 return cg_path_get_session(cgroup, session);
1442 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1449 /* Skip slices, if there are any */
1450 e = skip_slices(path);
1452 n = strchrnul(e, '/');
1455 if (memcmp(n - 5, ".user", 5) != 0)
1458 s = strndupa(e, n - e - 5);
1462 return parse_uid(s, uid);
1465 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1466 _cleanup_free_ char *cgroup = NULL;
1471 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1475 return cg_path_get_owner_uid(cgroup, uid);
1478 int cg_controller_from_attr(const char *attr, char **controller) {
1485 if (!filename_is_safe(attr))
1488 dot = strchr(attr, '.');
1494 c = strndup(attr, dot - attr);
1498 if (!cg_controller_is_valid(c, false)) {
1507 char *cg_escape(const char *p) {
1508 bool need_prefix = false;
1510 /* This implements very minimal escaping for names to be used
1511 * as file names in the cgroup tree: any name which might
1512 * conflict with a kernel name or is prefixed with '_' is
1513 * prefixed with a '_'. That way, when reading cgroup names it
1514 * is sufficient to remove a single prefixing underscore if
1517 /* The return value of this function (unlike cg_unescape())
1523 streq(p, "notify_on_release") ||
1524 streq(p, "release_agent") ||
1530 dot = strrchr(p, '.');
1533 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1538 n = strndupa(p, dot - p);
1540 if (check_hierarchy(n) >= 0)
1547 return strappend("_", p);
1552 char *cg_unescape(const char *p) {
1555 /* The return value of this function (unlike cg_escape())
1556 * doesn't need free()! */
1564 #define CONTROLLER_VALID \
1566 "abcdefghijklmnopqrstuvwxyz" \
1567 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
1570 bool cg_controller_is_valid(const char *p, bool allow_named) {
1577 s = startswith(p, "name=");
1582 if (*p == 0 || *p == '_')
1585 for (t = p; *t; t++)
1586 if (!strchr(CONTROLLER_VALID, *t))
1589 if (t - p > FILENAME_MAX)
1595 int cg_slice_to_path(const char *unit, char **ret) {
1596 _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1602 if (!unit_name_is_valid(unit, false))
1605 if (!endswith(unit, ".slice"))
1608 p = unit_name_to_prefix(unit);
1612 dash = strchr(p, '-');
1614 _cleanup_free_ char *escaped = NULL;
1615 char n[dash - p + sizeof(".slice")];
1617 strcpy(stpncpy(n, p, dash - p), ".slice");
1619 if (!unit_name_is_valid(n, false))
1622 escaped = cg_escape(n);
1626 if (!strextend(&s, escaped, "/", NULL))
1629 dash = strchr(dash+1, '-');
1632 e = cg_escape(unit);
1636 if (!strextend(&s, e, NULL))