1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
42 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
43 _cleanup_free_ char *fs = NULL;
49 r = cg_get_path(controller, path, "cgroup.procs", &fs);
61 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
62 _cleanup_free_ char *fs = NULL;
68 r = cg_get_path(controller, path, "tasks", &fs);
80 int cg_read_pid(FILE *f, pid_t *_pid) {
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
90 if (fscanf(f, "%lu", &ul) != 1) {
95 return errno ? -errno : -EIO;
105 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
106 _cleanup_free_ char *fs = NULL;
112 /* This is not recursive! */
114 r = cg_get_path(controller, path, NULL, &fs);
126 int cg_read_subgroup(DIR *d, char **fn) {
132 FOREACH_DIRENT(de, d, return -errno) {
135 if (de->d_type != DT_DIR)
138 if (streq(de->d_name, ".") ||
139 streq(de->d_name, ".."))
142 b = strdup(de->d_name);
153 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
154 _cleanup_free_ char *p = NULL;
157 r = cg_get_path(controller, path, NULL, &p);
164 /* If the sticky bit is set don't remove the directory */
166 tasks = strappend(p, "/tasks");
170 r = file_is_priv_sticky(tasks);
178 if (r < 0 && errno != ENOENT)
184 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
185 _cleanup_set_free_ Set *allocated_set = NULL;
192 /* This goes through the tasks list and kills them all. This
193 * is repeated until no further processes are added to the
194 * tasks list, to properly handle forking processes */
197 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
205 _cleanup_fclose_ FILE *f = NULL;
209 r = cg_enumerate_processes(controller, path, &f);
211 if (ret >= 0 && r != -ENOENT)
217 while ((r = cg_read_pid(f, &pid)) > 0) {
219 if (ignore_self && pid == my_pid)
222 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
225 /* If we haven't killed this process yet, kill
227 if (kill(pid, sig) < 0) {
228 if (ret >= 0 && errno != ESRCH)
230 } else if (ret == 0) {
240 r = set_put(s, LONG_TO_PTR(pid));
256 /* To avoid racing against processes which fork
257 * quicker than we can kill them we repeat this until
258 * no new pids need to be killed. */
265 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
266 _cleanup_set_free_ Set *allocated_set = NULL;
267 _cleanup_closedir_ DIR *d = NULL;
275 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
280 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
282 r = cg_enumerate_subgroups(controller, path, &d);
284 if (ret >= 0 && r != -ENOENT)
290 while ((r = cg_read_subgroup(d, &fn)) > 0) {
291 _cleanup_free_ char *p = NULL;
293 p = strjoin(path, "/", fn, NULL);
298 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
299 if (ret >= 0 && r != 0)
303 if (ret >= 0 && r < 0)
307 r = cg_rmdir(controller, path, true);
308 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
315 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
320 /* This safely kills all processes; first it sends a SIGTERM,
321 * then checks 8 times after 200ms whether the group is now
322 * empty, then kills everything that is left with SIGKILL and
323 * finally checks 5 times after 200ms each whether the group
324 * is finally empty. */
326 for (i = 0; i < 15; i++) {
336 r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL);
340 usleep(200 * USEC_PER_MSEC);
346 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
348 _cleanup_set_free_ Set *s = NULL;
357 s = set_new(trivial_hash_func, trivial_compare_func);
364 _cleanup_fclose_ FILE *f = NULL;
368 r = cg_enumerate_tasks(cfrom, pfrom, &f);
370 if (ret >= 0 && r != -ENOENT)
376 while ((r = cg_read_pid(f, &pid)) > 0) {
378 /* This might do weird stuff if we aren't a
379 * single-threaded program. However, we
380 * luckily know we are not */
381 if (ignore_self && pid == my_pid)
384 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
387 r = cg_attach(cto, pto, pid);
389 if (ret >= 0 && r != -ESRCH)
396 r = set_put(s, LONG_TO_PTR(pid));
416 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
417 _cleanup_closedir_ DIR *d = NULL;
426 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
428 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
430 if (ret >= 0 && r != -ENOENT)
436 while ((r = cg_read_subgroup(d, &fn)) > 0) {
437 _cleanup_free_ char *p = NULL;
439 p = strjoin(pfrom, "/", fn, NULL);
448 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
449 if (r != 0 && ret >= 0)
453 if (r < 0 && ret >= 0)
457 r = cg_rmdir(cfrom, pfrom, true);
458 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
465 static const char *normalize_controller(const char *controller) {
469 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
471 else if (startswith(controller, "name="))
472 return controller + 5;
477 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
482 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
484 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
486 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
488 t = strappend("/sys/fs/cgroup/", controller);
491 t = strjoin(path, "/", suffix, NULL);
501 path_kill_slashes(t);
507 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
509 static __thread bool good = false;
513 if (controller && !cg_controller_is_valid(controller, true))
516 if (_unlikely_(!good)) {
519 r = path_is_mount_point("/sys/fs/cgroup", false);
521 return r < 0 ? r : -ENOENT;
523 /* Cache this to save a few stat()s */
527 p = controller ? normalize_controller(controller) : NULL;
529 return join_path(p, path, suffix, fs);
532 static int check_hierarchy(const char *p) {
537 /* Check if this controller actually really exists */
538 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
539 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
540 if (access(cc, F_OK) < 0)
546 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
552 if (!cg_controller_is_valid(controller, true))
555 /* Normalize the controller syntax */
556 p = normalize_controller(controller);
558 /* Check if this controller actually really exists */
559 r = check_hierarchy(p);
563 return join_path(p, path, suffix, fs);
566 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
570 if (typeflag != FTW_DP)
573 if (ftwbuf->level < 1)
576 p = strappend(path, "/tasks");
582 is_sticky = file_is_priv_sticky(p) > 0;
592 int cg_trim(const char *controller, const char *path, bool delete_root) {
593 _cleanup_free_ char *fs = NULL;
598 r = cg_get_path(controller, path, NULL, &fs);
603 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
604 r = errno ? -errno : -EIO;
610 p = strappend(fs, "/tasks");
614 is_sticky = file_is_priv_sticky(p) > 0;
618 if (rmdir(fs) < 0 && errno != ENOENT && r == 0)
625 int cg_delete(const char *controller, const char *path) {
626 _cleanup_free_ char *parent = NULL;
631 r = path_get_parent(path, &parent);
635 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
636 return r == -ENOENT ? 0 : r;
639 int cg_attach(const char *controller, const char *path, pid_t pid) {
640 _cleanup_free_ char *fs = NULL;
641 char c[DECIMAL_STR_MAX(pid_t) + 2];
647 r = cg_get_path_and_check(controller, path, "tasks", &fs);
654 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
656 return write_string_file(fs, c);
659 int cg_set_group_access(
660 const char *controller,
666 _cleanup_free_ char *fs = NULL;
671 if (mode != (mode_t) -1)
674 r = cg_get_path(controller, path, NULL, &fs);
678 return chmod_and_chown(fs, mode, uid, gid);
681 int cg_set_task_access(
682 const char *controller,
689 _cleanup_free_ char *fs = NULL, *procs = NULL;
694 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
697 if (mode != (mode_t) -1)
700 r = cg_get_path(controller, path, "tasks", &fs);
704 if (sticky >= 0 && mode != (mode_t) -1)
705 /* Both mode and sticky param are passed */
706 mode |= (sticky ? S_ISVTX : 0);
707 else if ((sticky >= 0 && mode == (mode_t) -1) ||
708 (mode != (mode_t) -1 && sticky < 0)) {
711 /* Only one param is passed, hence read the current
712 * mode from the file itself */
718 if (mode == (mode_t) -1)
719 /* No mode set, we just shall set the sticky bit */
720 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
722 /* Only mode set, leave sticky bit untouched */
723 mode = (st.st_mode & ~0777) | mode;
726 r = chmod_and_chown(fs, mode, uid, gid);
730 /* Always keep values for "cgroup.procs" in sync with "tasks" */
731 r = cg_get_path(controller, path, "cgroup.procs", &procs);
735 return chmod_and_chown(procs, mode, uid, gid);
738 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
739 _cleanup_fclose_ FILE *f = NULL;
748 if (!cg_controller_is_valid(controller, true))
751 controller = normalize_controller(controller);
753 controller = SYSTEMD_CGROUP_CONTROLLER;
756 fs = "/proc/self/cgroup";
758 fs = procfs_file_alloca(pid, "cgroup");
762 return errno == ENOENT ? -ESRCH : -errno;
764 cs = strlen(controller);
766 FOREACH_LINE(line, f, return -errno) {
774 l = strchr(line, ':');
785 FOREACH_WORD_SEPARATOR(w, k, l, ",", state) {
787 if (k == cs && memcmp(w, controller, cs) == 0) {
793 memcmp(w, "name=", 5) == 0 &&
794 memcmp(w+5, controller, cs) == 0) {
814 int cg_install_release_agent(const char *controller, const char *agent) {
815 _cleanup_free_ char *fs = NULL, *contents = NULL;
821 r = cg_get_path(controller, NULL, "release_agent", &fs);
825 r = read_one_line_file(fs, &contents);
829 sc = strstrip(contents);
831 r = write_string_file(fs, agent);
834 } else if (!streq(sc, agent))
839 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
845 r = read_one_line_file(fs, &contents);
849 sc = strstrip(contents);
850 if (streq(sc, "0")) {
851 r = write_string_file(fs, "1");
864 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
865 _cleanup_fclose_ FILE *f = NULL;
866 pid_t pid = 0, self_pid;
872 r = cg_enumerate_tasks(controller, path, &f);
874 return r == -ENOENT ? 1 : r;
878 while ((r = cg_read_pid(f, &pid)) > 0) {
880 if (ignore_self && pid == self_pid)
893 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
894 _cleanup_free_ char *controller = NULL, *path = NULL;
899 r = cg_split_spec(spec, &controller, &path);
903 return cg_is_empty(controller, path, ignore_self);
906 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
907 _cleanup_closedir_ DIR *d = NULL;
913 r = cg_is_empty(controller, path, ignore_self);
917 r = cg_enumerate_subgroups(controller, path, &d);
919 return r == -ENOENT ? 1 : r;
921 while ((r = cg_read_subgroup(d, &fn)) > 0) {
922 _cleanup_free_ char *p = NULL;
924 p = strjoin(path, "/", fn, NULL);
929 r = cg_is_empty_recursive(controller, p, ignore_self);
940 int cg_split_spec(const char *spec, char **controller, char **path) {
942 char *t = NULL, *u = NULL;
943 _cleanup_free_ char *v = NULL;
948 if (!path_is_safe(spec))
956 path_kill_slashes(t);
966 e = strchr(spec, ':');
968 if (!cg_controller_is_valid(spec, true))
972 t = strdup(normalize_controller(spec));
985 v = strndup(spec, e-spec);
988 t = strdup(normalize_controller(v));
991 if (!cg_controller_is_valid(t, true)) {
1001 if (!path_is_safe(u) ||
1002 !path_is_absolute(u)) {
1008 path_kill_slashes(u);
1023 int cg_join_spec(const char *controller, const char *path, char **spec) {
1029 controller = "systemd";
1031 if (!cg_controller_is_valid(controller, true))
1034 controller = normalize_controller(controller);
1037 if (!path_is_absolute(path))
1040 s = strjoin(controller, ":", path, NULL);
1044 path_kill_slashes(s + strlen(controller) + 1);
1050 int cg_mangle_path(const char *path, char **result) {
1051 _cleanup_free_ char *c = NULL, *p = NULL;
1058 /* First check if it already is a filesystem path */
1059 if (path_startswith(path, "/sys/fs/cgroup")) {
1065 path_kill_slashes(t);
1070 /* Otherwise treat it as cg spec */
1071 r = cg_split_spec(path, &c, &p);
1075 return cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1078 int cg_get_system_path(char **path) {
1084 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1086 p = strdup("/system");
1091 if (endswith(p, "/system"))
1096 q = strappend(p, "/system");
1107 int cg_get_root_path(char **path) {
1113 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &root);
1117 e = endswith(root, "/system");
1127 int cg_get_user_path(char **path) {
1128 _cleanup_free_ char *root = NULL;
1133 /* Figure out the place to put user cgroups below. We use the
1134 * same as PID 1 has but with the "/system" suffix replaced by
1137 if (cg_get_root_path(&root) < 0 || streq(root, "/"))
1138 p = strdup("/user");
1140 p = strappend(root, "/user");
1149 int cg_get_machine_path(char **path) {
1150 _cleanup_free_ char *root = NULL;
1155 if (cg_get_root_path(&root) < 0 || streq(root, "/"))
1156 p = strdup("/machine");
1158 p = strappend(root, "/machine");
1167 char **cg_shorten_controllers(char **controllers) {
1173 for (f = controllers, t = controllers; *f; f++) {
1177 p = normalize_controller(*f);
1179 if (streq(p, "systemd")) {
1184 if (!cg_controller_is_valid(p, true)) {
1185 log_warning("Controller %s is not valid, removing from controllers list.", p);
1190 r = check_hierarchy(p);
1192 log_debug("Controller %s is not available, removing from controllers list.", p);
1201 return strv_uniq(controllers);
1204 int cg_pid_get_path_shifted(pid_t pid, char **root, char **cgroup) {
1205 _cleanup_free_ char *cg_root = NULL;
1206 char *cg_process, *p;
1209 r = cg_get_root_path(&cg_root);
1213 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1217 p = path_startswith(cg_process, cg_root);
1236 cg_process[p-cg_process] = 0;
1244 int cg_path_decode_unit(const char *cgroup, char **unit){
1245 char *p, *e, *c, *s, *k;
1250 e = strchrnul(cgroup, '/');
1251 c = strndupa(cgroup, e - cgroup);
1254 /* Could this be a valid unit name? */
1255 if (!unit_name_is_valid(c, true))
1258 if (!unit_name_is_template(c))
1264 e += strspn(e, "/");
1266 p = strchrnul(e, '/');
1267 k = strndupa(e, p - e);
1270 if (!unit_name_is_valid(k, false))
1283 int cg_path_get_unit(const char *path, char **unit) {
1289 e = path_startswith(path, "/system/");
1293 return cg_path_decode_unit(e, unit);
1296 int cg_pid_get_unit(pid_t pid, char **unit) {
1297 _cleanup_free_ char *cgroup = NULL;
1302 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1306 return cg_path_get_unit(cgroup, unit);
1309 static const char *skip_label(const char *e) {
1316 e += strspn(e, "/");
1320 int cg_path_get_user_unit(const char *path, char **unit) {
1326 /* We always have to parse the path from the beginning as unit
1327 * cgroups might have arbitrary child cgroups and we shouldn't get
1328 * confused by those */
1330 e = path_startswith(path, "/user/");
1334 /* Skip the user name */
1339 /* Skip the session ID */
1344 /* Skip the systemd cgroup */
1349 return cg_path_decode_unit(e, unit);
1352 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1353 _cleanup_free_ char *cgroup = NULL;
1358 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1362 return cg_path_get_user_unit(cgroup, unit);
1365 int cg_path_get_machine_name(const char *path, char **machine) {
1372 e = path_startswith(path, "/machine/");
1376 n = strchrnul(e, '/');
1380 s = strndupa(e, n - e);
1382 r = strdup(cg_unescape(s));
1390 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1391 _cleanup_free_ char *cgroup = NULL;
1396 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1400 return cg_path_get_machine_name(cgroup, machine);
1403 int cg_path_get_session(const char *path, char **session) {
1410 e = path_startswith(path, "/user/");
1414 /* Skip the user name */
1419 n = strchrnul(e, '/');
1422 if (memcmp(n - 8, ".session", 8) != 0)
1425 s = strndup(e, n - e - 8);
1433 int cg_pid_get_session(pid_t pid, char **session) {
1434 _cleanup_free_ char *cgroup = NULL;
1439 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1443 return cg_path_get_session(cgroup, session);
1446 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1453 e = path_startswith(path, "/user/");
1457 n = strchrnul(e, '/');
1460 if (memcmp(n - 5, ".user", 5) != 0)
1463 s = strndupa(e, n - e - 5);
1467 return parse_uid(s, uid);
1470 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1471 _cleanup_free_ char *cgroup = NULL;
1476 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1480 return cg_path_get_owner_uid(cgroup, uid);
1483 int cg_controller_from_attr(const char *attr, char **controller) {
1490 if (!filename_is_safe(attr))
1493 dot = strchr(attr, '.');
1499 c = strndup(attr, dot - attr);
1503 if (!cg_controller_is_valid(c, false)) {
1512 char *cg_escape(const char *p) {
1513 bool need_prefix = false;
1515 /* This implements very minimal escaping for names to be used
1516 * as file names in the cgroup tree: any name which might
1517 * conflict with a kernel name or is prefixed with '_' is
1518 * prefixed with a '_'. That way, when reading cgroup names it
1519 * is sufficient to remove a single prefixing underscore if
1522 /* The return value of this function (unlike cg_unescape())
1525 if (p[0] == '_' || streq(p, "notify_on_release") || streq(p, "release_agent") || streq(p, "tasks"))
1530 dot = strrchr(p, '.');
1533 if (dot - p == 6 && memcmp(p, "cgroup", 6) == 0)
1538 n = strndupa(p, dot - p);
1540 if (check_hierarchy(n) >= 0)
1547 return strappend("_", p);
1552 char *cg_unescape(const char *p) {
1555 /* The return value of this function (unlike cg_escape())
1556 * doesn't need free()! */
1564 #define CONTROLLER_VALID \
1566 "abcdefghijklmnopqrstuvwxyz" \
1567 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
1570 bool cg_controller_is_valid(const char *p, bool allow_named) {
1577 s = startswith(p, "name=");
1582 if (*p == 0 || *p == '_')
1585 for (t = p; *t; t++)
1586 if (!strchr(CONTROLLER_VALID, *t))
1589 if (t - p > FILENAME_MAX)