1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
42 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
43 _cleanup_free_ char *fs = NULL;
49 r = cg_get_path(controller, path, "cgroup.procs", &fs);
61 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
62 _cleanup_free_ char *fs = NULL;
68 r = cg_get_path(controller, path, "tasks", &fs);
80 int cg_read_pid(FILE *f, pid_t *_pid) {
83 /* Note that the cgroup.procs might contain duplicates! See
84 * cgroups.txt for details. */
90 if (fscanf(f, "%lu", &ul) != 1) {
95 return errno ? -errno : -EIO;
105 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
106 _cleanup_free_ char *fs = NULL;
112 /* This is not recursive! */
114 r = cg_get_path(controller, path, NULL, &fs);
126 int cg_read_subgroup(DIR *d, char **fn) {
132 FOREACH_DIRENT(de, d, return -errno) {
135 if (de->d_type != DT_DIR)
138 if (streq(de->d_name, ".") ||
139 streq(de->d_name, ".."))
142 b = strdup(de->d_name);
153 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
154 _cleanup_free_ char *p = NULL;
157 r = cg_get_path(controller, path, NULL, &p);
164 /* If the sticky bit is set don't remove the directory */
166 tasks = strappend(p, "/tasks");
170 r = file_is_priv_sticky(tasks);
178 if (r < 0 && errno != ENOENT)
184 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
185 _cleanup_set_free_ Set *allocated_set = NULL;
192 /* This goes through the tasks list and kills them all. This
193 * is repeated until no further processes are added to the
194 * tasks list, to properly handle forking processes */
197 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
205 _cleanup_fclose_ FILE *f = NULL;
209 r = cg_enumerate_processes(controller, path, &f);
211 if (ret >= 0 && r != -ENOENT)
217 while ((r = cg_read_pid(f, &pid)) > 0) {
219 if (ignore_self && pid == my_pid)
222 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
225 /* If we haven't killed this process yet, kill
227 if (kill(pid, sig) < 0) {
228 if (ret >= 0 && errno != ESRCH)
230 } else if (ret == 0) {
240 r = set_put(s, LONG_TO_PTR(pid));
256 /* To avoid racing against processes which fork
257 * quicker than we can kill them we repeat this until
258 * no new pids need to be killed. */
265 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
266 _cleanup_set_free_ Set *allocated_set = NULL;
267 _cleanup_closedir_ DIR *d = NULL;
275 s = allocated_set = set_new(trivial_hash_func, trivial_compare_func);
280 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
282 r = cg_enumerate_subgroups(controller, path, &d);
284 if (ret >= 0 && r != -ENOENT)
290 while ((r = cg_read_subgroup(d, &fn)) > 0) {
291 _cleanup_free_ char *p = NULL;
293 p = strjoin(path, "/", fn, NULL);
298 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
299 if (ret >= 0 && r != 0)
303 if (ret >= 0 && r < 0)
307 r = cg_rmdir(controller, path, true);
308 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
315 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
320 /* This safely kills all processes; first it sends a SIGTERM,
321 * then checks 8 times after 200ms whether the group is now
322 * empty, then kills everything that is left with SIGKILL and
323 * finally checks 5 times after 200ms each whether the group
324 * is finally empty. */
326 for (i = 0; i < 15; i++) {
336 r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL);
340 usleep(200 * USEC_PER_MSEC);
346 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
348 _cleanup_set_free_ Set *s = NULL;
357 s = set_new(trivial_hash_func, trivial_compare_func);
364 _cleanup_fclose_ FILE *f = NULL;
368 r = cg_enumerate_tasks(cfrom, pfrom, &f);
370 if (ret >= 0 && r != -ENOENT)
376 while ((r = cg_read_pid(f, &pid)) > 0) {
378 /* This might do weird stuff if we aren't a
379 * single-threaded program. However, we
380 * luckily know we are not */
381 if (ignore_self && pid == my_pid)
384 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
387 r = cg_attach(cto, pto, pid);
389 if (ret >= 0 && r != -ESRCH)
396 r = set_put(s, LONG_TO_PTR(pid));
416 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
417 _cleanup_closedir_ DIR *d = NULL;
426 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
428 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
430 if (ret >= 0 && r != -ENOENT)
436 while ((r = cg_read_subgroup(d, &fn)) > 0) {
437 _cleanup_free_ char *p = NULL;
439 p = strjoin(pfrom, "/", fn, NULL);
448 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
449 if (r != 0 && ret >= 0)
453 if (r < 0 && ret >= 0)
457 r = cg_rmdir(cfrom, pfrom, true);
458 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
465 static const char *normalize_controller(const char *controller) {
469 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
471 else if (startswith(controller, "name="))
472 return controller + 5;
477 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
482 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
484 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
486 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
488 t = strappend("/sys/fs/cgroup/", controller);
491 t = strjoin(path, "/", suffix, NULL);
501 path_kill_slashes(t);
507 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
509 static __thread bool good = false;
513 if (_unlikely_(!good)) {
516 r = path_is_mount_point("/sys/fs/cgroup", false);
518 return r < 0 ? r : -ENOENT;
520 /* Cache this to save a few stat()s */
524 p = controller ? normalize_controller(controller) : NULL;
526 return join_path(p, path, suffix, fs);
529 static int check_hierarchy(const char *p) {
534 /* Check if this controller actually really exists */
535 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
536 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
537 if (access(cc, F_OK) < 0)
543 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
549 if (isempty(controller))
552 /* Normalize the controller syntax */
553 p = normalize_controller(controller);
555 /* Check if this controller actually really exists */
556 r = check_hierarchy(p);
560 return join_path(p, path, suffix, fs);
563 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
567 if (typeflag != FTW_DP)
570 if (ftwbuf->level < 1)
573 p = strappend(path, "/tasks");
579 is_sticky = file_is_priv_sticky(p) > 0;
589 int cg_trim(const char *controller, const char *path, bool delete_root) {
590 _cleanup_free_ char *fs = NULL;
595 r = cg_get_path(controller, path, NULL, &fs);
600 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0)
601 r = errno ? -errno : -EIO;
607 p = strappend(fs, "/tasks");
611 is_sticky = file_is_priv_sticky(p) > 0;
615 if (rmdir(fs) < 0 && errno != ENOENT && r == 0)
622 int cg_delete(const char *controller, const char *path) {
623 _cleanup_free_ char *parent = NULL;
628 r = path_get_parent(path, &parent);
632 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
633 return r == -ENOENT ? 0 : r;
636 int cg_attach(const char *controller, const char *path, pid_t pid) {
637 _cleanup_free_ char *fs = NULL;
638 char c[DECIMAL_STR_MAX(pid_t) + 2];
644 r = cg_get_path_and_check(controller, path, "tasks", &fs);
651 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
653 return write_string_file(fs, c);
656 int cg_set_group_access(
657 const char *controller,
663 _cleanup_free_ char *fs = NULL;
668 if (mode != (mode_t) -1)
671 r = cg_get_path(controller, path, NULL, &fs);
675 return chmod_and_chown(fs, mode, uid, gid);
678 int cg_set_task_access(
679 const char *controller,
686 _cleanup_free_ char *fs = NULL, *procs = NULL;
691 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
694 if (mode != (mode_t) -1)
697 r = cg_get_path(controller, path, "tasks", &fs);
701 if (sticky >= 0 && mode != (mode_t) -1)
702 /* Both mode and sticky param are passed */
703 mode |= (sticky ? S_ISVTX : 0);
704 else if ((sticky >= 0 && mode == (mode_t) -1) ||
705 (mode != (mode_t) -1 && sticky < 0)) {
708 /* Only one param is passed, hence read the current
709 * mode from the file itself */
715 if (mode == (mode_t) -1)
716 /* No mode set, we just shall set the sticky bit */
717 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
719 /* Only mode set, leave sticky bit untouched */
720 mode = (st.st_mode & ~0777) | mode;
723 r = chmod_and_chown(fs, mode, uid, gid);
727 /* Always keep values for "cgroup.procs" in sync with "tasks" */
728 r = cg_get_path(controller, path, "cgroup.procs", &procs);
732 return chmod_and_chown(procs, mode, uid, gid);
735 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
736 char fs[sizeof("/proc/") - 1 + DECIMAL_STR_MAX(pid_t) + sizeof("/cgroup")];
737 _cleanup_fclose_ FILE *f = NULL;
745 controller = SYSTEMD_CGROUP_CONTROLLER;
750 sprintf(fs, "/proc/%lu/cgroup", (unsigned long) pid);
753 return errno == ENOENT ? -ESRCH : -errno;
755 cs = strlen(controller);
757 FOREACH_LINE(line, f, return -errno) {
762 l = strchr(line, ':');
767 if (!strneq(l, controller, cs))
773 p = strdup(l + cs + 1);
784 int cg_install_release_agent(const char *controller, const char *agent) {
785 _cleanup_free_ char *fs = NULL, *contents = NULL;
791 r = cg_get_path(controller, NULL, "release_agent", &fs);
795 r = read_one_line_file(fs, &contents);
799 sc = strstrip(contents);
801 r = write_string_file(fs, agent);
804 } else if (!streq(sc, agent))
809 r = cg_get_path(controller, NULL, "notify_on_release", &fs);
815 r = read_one_line_file(fs, &contents);
819 sc = strstrip(contents);
820 if (streq(sc, "0")) {
821 r = write_string_file(fs, "1");
834 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
835 _cleanup_fclose_ FILE *f = NULL;
836 pid_t pid = 0, self_pid;
842 r = cg_enumerate_tasks(controller, path, &f);
844 return r == -ENOENT ? 1 : r;
848 while ((r = cg_read_pid(f, &pid)) > 0) {
850 if (ignore_self && pid == self_pid)
863 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
864 _cleanup_free_ char *controller = NULL, *path = NULL;
869 r = cg_split_spec(spec, &controller, &path);
873 return cg_is_empty(controller, path, ignore_self);
876 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
877 _cleanup_closedir_ DIR *d = NULL;
883 r = cg_is_empty(controller, path, ignore_self);
887 r = cg_enumerate_subgroups(controller, path, &d);
889 return r == -ENOENT ? 1 : r;
891 while ((r = cg_read_subgroup(d, &fn)) > 0) {
892 _cleanup_free_ char *p = NULL;
894 p = strjoin(path, "/", fn, NULL);
899 r = cg_is_empty_recursive(controller, p, ignore_self);
910 int cg_split_spec(const char *spec, char **controller, char **path) {
912 char *t = NULL, *u = NULL;
917 if (!path_is_safe(spec))
934 e = strchr(spec, ':');
936 if (!filename_is_safe(spec))
953 t = strndup(spec, e-spec);
956 if (!filename_is_safe(t)) {
966 if (!path_is_safe(u)) {
985 int cg_join_spec(const char *controller, const char *path, char **spec) {
991 controller = "systemd";
992 else if (controller[0] == 0 ||
993 strchr(controller, ':') ||
994 strchr(controller, '/'))
997 if (!path_is_absolute(path))
1000 controller = normalize_controller(controller);
1002 s = strjoin(controller, ":", path, NULL);
1010 int cg_mangle_path(const char *path, char **result) {
1017 /* First check if it already is a filesystem path */
1018 if (path_startswith(path, "/sys/fs/cgroup")) {
1028 /* Otherwise treat it as cg spec */
1029 r = cg_split_spec(path, &c, &p);
1033 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1040 int cg_get_system_path(char **path) {
1046 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1048 p = strdup("/system");
1053 if (endswith(p, "/system"))
1058 q = strappend(p, "/system");
1069 int cg_get_root_path(char **path) {
1075 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &root);
1079 e = endswith(root, "/system");
1089 int cg_get_user_path(char **path) {
1090 _cleanup_free_ char *root = NULL;
1095 /* Figure out the place to put user cgroups below. We use the
1096 * same as PID 1 has but with the "/system" suffix replaced by
1099 if (cg_get_root_path(&root) < 0 || streq(root, "/"))
1100 p = strdup("/user");
1102 p = strappend(root, "/user");
1111 int cg_get_machine_path(char **path) {
1112 _cleanup_free_ char *root = NULL;
1117 if (cg_get_root_path(&root) < 0 || streq(root, "/"))
1118 p = strdup("/machine");
1120 p = strappend(root, "/machine");
1129 char **cg_shorten_controllers(char **controllers) {
1135 for (f = controllers, t = controllers; *f; f++) {
1139 p = normalize_controller(*f);
1141 if (streq(*f, "systemd")) {
1146 r = check_hierarchy(p);
1148 log_debug("Controller %s is not available, removing from controllers list.", *f);
1157 return strv_uniq(controllers);
1160 int cg_pid_get_path_shifted(pid_t pid, char **root, char **cgroup) {
1161 _cleanup_free_ char *cg_root = NULL;
1162 char *cg_process, *p;
1165 r = cg_get_root_path(&cg_root);
1169 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1173 p = path_startswith(cg_process, cg_root);
1192 cg_process[p-cg_process] = 0;
1200 /* non-static only for testing purposes */
1201 int cg_path_decode_unit(const char *cgroup, char **unit){
1202 char *p, *e, *c, *s, *k;
1207 e = strchrnul(cgroup, '/');
1208 c = strndupa(cgroup, e - cgroup);
1210 /* Could this be a valid unit name? */
1211 if (!unit_name_is_valid(c, true))
1214 if (!unit_name_is_template(c))
1220 e += strspn(e, "/");
1221 p = strchrnul(e, '/');
1223 /* Don't allow empty instance strings */
1227 k = strndupa(e, p - e);
1229 s = unit_name_replace_instance(c, k);
1239 int cg_path_get_unit(const char *path, char **unit) {
1245 e = path_startswith(path, "/system/");
1249 return cg_path_decode_unit(e, unit);
1252 int cg_pid_get_unit(pid_t pid, char **unit) {
1253 char _cleanup_free_ *cgroup = NULL;
1258 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1262 return cg_path_get_unit(cgroup, unit);
1265 static const char *skip_label(const char *e) {
1272 e += strspn(e, "/");
1276 int cg_path_get_user_unit(const char *path, char **unit) {
1282 /* We always have to parse the path from the beginning as unit
1283 * cgroups might have arbitrary child cgroups and we shouldn't get
1284 * confused by those */
1286 e = path_startswith(path, "/user/");
1290 /* Skip the user name */
1295 /* Skip the session ID */
1300 /* Skip the systemd cgroup */
1305 return cg_path_decode_unit(e, unit);
1308 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1309 char _cleanup_free_ *cgroup = NULL;
1314 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1318 return cg_path_get_user_unit(cgroup, unit);
1321 int cg_path_get_machine_name(const char *path, char **machine) {
1328 e = path_startswith(path, "/machine/");
1332 n = strchrnul(e, '/');
1336 s = strndup(e, n - e);
1344 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1345 char _cleanup_free_ *cgroup = NULL;
1350 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1354 return cg_path_get_machine_name(cgroup, machine);
1357 int cg_path_get_session(const char *path, char **session) {
1364 e = path_startswith(path, "/user/");
1368 /* Skip the user name */
1373 n = strchrnul(e, '/');
1377 if (n - e == 6 && memcmp(e, "shared", 6) == 0)
1380 s = strndup(e, n - e);
1388 int cg_pid_get_session(pid_t pid, char **session) {
1389 char _cleanup_free_ *cgroup = NULL;
1394 r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1398 return cg_path_get_session(cgroup, session);
1401 int cg_controller_from_attr(const char *attr, char **controller) {
1408 if (!filename_is_safe(attr))
1411 dot = strchr(attr, '.');
1417 c = strndup(attr, dot - attr);
1421 if (!filename_is_safe(c)) {