1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
39 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
47 r = cg_get_path(controller, path, "cgroup.procs", &fs);
61 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
69 r = cg_get_path(controller, path, "tasks", &fs);
83 int cg_read_pid(FILE *f, pid_t *_pid) {
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
90 if (fscanf(f, "%lu", &ul) != 1) {
95 return errno ? -errno : -EIO;
105 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
113 /* This is not recursive! */
115 r = cg_get_path(controller, path, NULL, &fs);
129 int cg_read_subgroup(DIR *d, char **fn) {
135 while ((de = readdir(d))) {
138 if (de->d_type != DT_DIR)
141 if (streq(de->d_name, ".") ||
142 streq(de->d_name, ".."))
145 if (!(b = strdup(de->d_name)))
158 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
162 r = cg_get_path(controller, path, NULL, &p);
169 /* If the sticky bit is set don't remove the directory */
171 tasks = strappend(p, "/tasks");
177 r = file_is_priv_sticky(tasks);
189 return (r < 0 && errno != ENOENT) ? -errno : 0;
192 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
197 Set *allocated_set = NULL;
203 /* This goes through the tasks list and kills them all. This
204 * is repeated until no further processes are added to the
205 * tasks list, to properly handle forking processes */
208 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
217 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
218 if (ret >= 0 && r != -ENOENT)
224 while ((r = cg_read_pid(f, &pid)) > 0) {
226 if (pid == my_pid && ignore_self)
229 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
232 /* If we haven't killed this process yet, kill
234 if (kill(pid, sig) < 0) {
235 if (ret >= 0 && errno != ESRCH)
237 } else if (ret == 0) {
247 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
265 /* To avoid racing against processes which fork
266 * quicker than we can kill them we repeat this until
267 * no new pids need to be killed. */
273 set_free(allocated_set);
281 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
285 Set *allocated_set = NULL;
292 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
295 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
297 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
298 if (ret >= 0 && r != -ENOENT)
304 while ((r = cg_read_subgroup(d, &fn)) > 0) {
307 r = asprintf(&p, "%s/%s", path, fn);
317 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
320 if (r != 0 && ret >= 0)
324 if (r < 0 && ret >= 0)
328 if ((r = cg_rmdir(controller, path, true)) < 0) {
340 set_free(allocated_set);
345 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
351 /* This safely kills all processes; first it sends a SIGTERM,
352 * then checks 8 times after 200ms whether the group is now
353 * empty, then kills everything that is left with SIGKILL and
354 * finally checks 5 times after 200ms each whether the group
355 * is finally empty. */
357 for (i = 0; i < 15; i++) {
367 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
370 usleep(200 * USEC_PER_MSEC);
376 int cg_migrate(const char *controller, const char *from, const char *to, bool ignore_self) {
387 if (!(s = set_new(trivial_hash_func, trivial_compare_func)))
396 if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) {
397 if (ret >= 0 && r != -ENOENT)
403 while ((r = cg_read_pid(f, &pid)) > 0) {
405 /* This might do weird stuff if we aren't a
406 * single-threaded program. However, we
407 * luckily know we are not */
408 if (pid == my_pid && ignore_self)
411 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
414 if ((r = cg_attach(controller, to, pid)) < 0) {
415 if (ret >= 0 && r != -ESRCH)
422 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
451 int cg_migrate_recursive(const char *controller, const char *from, const char *to, bool ignore_self, bool rem) {
460 ret = cg_migrate(controller, from, to, ignore_self);
462 if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) {
463 if (ret >= 0 && r != -ENOENT)
468 while ((r = cg_read_subgroup(d, &fn)) > 0) {
471 r = asprintf(&p, "%s/%s", from, fn);
481 r = cg_migrate_recursive(controller, p, to, ignore_self, rem);
484 if (r != 0 && ret >= 0)
488 if (r < 0 && ret >= 0)
492 if ((r = cg_rmdir(controller, from, true)) < 0) {
506 static const char *normalize_controller(const char *controller) {
508 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
510 else if (startswith(controller, "name="))
511 return controller + 5;
516 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
519 if (!(controller || path))
524 t = join("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
526 t = join("/sys/fs/cgroup/", controller, "/", path, NULL);
528 t = join("/sys/fs/cgroup/", controller, "/", suffix, NULL);
530 t = join("/sys/fs/cgroup/", controller, NULL);
533 t = join(path, "/", suffix, NULL);
541 path_kill_slashes(t);
547 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
549 static __thread bool good = false;
553 if (_unlikely_(!good)) {
556 r = path_is_mount_point("/sys/fs/cgroup", false);
558 return r < 0 ? r : -ENOENT;
560 /* Cache this to save a few stat()s */
564 p = controller ? normalize_controller(controller) : NULL;
565 return join_path(p, path, suffix, fs);
568 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
575 if (isempty(controller))
578 p = normalize_controller(controller);
580 /* Check if this controller actually really exists */
581 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
582 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
583 if (access(cc, F_OK) < 0)
586 return join_path(p, path, suffix, fs);
589 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
593 if (typeflag != FTW_DP)
596 if (ftwbuf->level < 1)
599 p = strappend(path, "/tasks");
605 is_sticky = file_is_priv_sticky(p) > 0;
615 int cg_trim(const char *controller, const char *path, bool delete_root) {
622 r = cg_get_path(controller, path, NULL, &fs);
627 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
628 r = errno ? -errno : -EIO;
634 p = strappend(fs, "/tasks");
640 is_sticky = file_is_priv_sticky(p) > 0;
644 if (rmdir(fs) < 0 && errno != ENOENT) {
655 int cg_delete(const char *controller, const char *path) {
662 if ((r = parent_of_path(path, &parent)) < 0)
665 r = cg_migrate_recursive(controller, path, parent, false, true);
668 return r == -ENOENT ? 0 : r;
671 int cg_attach(const char *controller, const char *path, pid_t pid) {
680 r = cg_get_path_and_check(controller, path, "tasks", &fs);
687 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
690 r = write_one_line_file(fs, c);
696 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
703 if (mode != (mode_t) -1)
706 r = cg_get_path(controller, path, NULL, &fs);
710 r = chmod_and_chown(fs, mode, uid, gid);
716 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
723 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
726 if (mode != (mode_t) -1)
729 r = cg_get_path(controller, path, "tasks", &fs);
733 if (sticky >= 0 && mode != (mode_t) -1)
734 /* Both mode and sticky param are passed */
735 mode |= (sticky ? S_ISVTX : 0);
736 else if ((sticky >= 0 && mode == (mode_t) -1) ||
737 (mode != (mode_t) -1 && sticky < 0)) {
740 /* Only one param is passed, hence read the current
741 * mode from the file itself */
749 if (mode == (mode_t) -1)
750 /* No mode set, we just shall set the sticky bit */
751 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
753 /* Only mode set, leave sticky bit untouched */
754 mode = (st.st_mode & ~0777) | mode;
757 r = chmod_and_chown(fs, mode, uid, gid);
763 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
777 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
784 return errno == ENOENT ? -ESRCH : -errno;
786 cs = strlen(controller);
793 if (!(fgets(line, sizeof(line), f))) {
797 r = errno ? -errno : -EIO;
803 if (!(l = strchr(line, ':')))
807 if (strncmp(l, controller, cs) != 0)
813 if (!(p = strdup(l + cs + 1))) {
831 int cg_install_release_agent(const char *controller, const char *agent) {
832 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
838 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
841 if ((r = read_one_line_file(fs, &contents)) < 0)
844 sc = strstrip(contents);
847 if (asprintf(&line, "%s\n", agent) < 0) {
852 if ((r = write_one_line_file(fs, line)) < 0)
855 } else if (!streq(sc, agent)) {
862 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
867 if ((r = read_one_line_file(fs, &contents)) < 0)
870 sc = strstrip(contents);
872 if (streq(sc, "0")) {
873 if ((r = write_one_line_file(fs, "1\n")) < 0)
877 } else if (!streq(sc, "1")) {
891 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
892 pid_t pid = 0, self_pid;
899 r = cg_enumerate_tasks(controller, path, &f);
901 return r == -ENOENT ? 1 : r;
905 while ((r = cg_read_pid(f, &pid)) > 0) {
907 if (ignore_self && pid == self_pid)
922 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
929 r = cg_is_empty(controller, path, ignore_self);
933 r = cg_enumerate_subgroups(controller, path, &d);
935 return r == -ENOENT ? 1 : r;
937 while ((r = cg_read_subgroup(d, &fn)) > 0) {
940 r = asprintf(&p, "%s/%s", path, fn);
948 r = cg_is_empty_recursive(controller, p, ignore_self);
966 int cg_split_spec(const char *spec, char **controller, char **path) {
968 char *t = NULL, *u = NULL;
971 assert(controller || path);
976 if (!(t = strdup(spec)))
988 if (!(e = strchr(spec, ':'))) {
990 if (strchr(spec, '/') || spec[0] == 0)
994 if (!(t = strdup(spec)))
1008 memchr(spec, '/', e-spec))
1012 if (!(t = strndup(spec, e-spec)))
1016 if (!(u = strdup(e+1))) {
1030 int cg_join_spec(const char *controller, const char *path, char **spec) {
1034 if (!path_is_absolute(path) ||
1035 controller[0] == 0 ||
1036 strchr(controller, ':') ||
1037 strchr(controller, '/'))
1040 if (asprintf(spec, "%s:%s", controller, path) < 0)
1046 int cg_fix_path(const char *path, char **result) {
1053 /* First check if it already is a filesystem path */
1054 if (path_startswith(path, "/sys/fs/cgroup") &&
1055 access(path, F_OK) >= 0) {
1065 /* Otherwise treat it as cg spec */
1066 r = cg_split_spec(path, &c, &p);
1070 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1077 int cg_get_user_path(char **path) {
1082 /* Figure out the place to put user cgroups below. We use the
1083 * same as PID 1 has but with the "/system" suffix replaced by
1086 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1087 p = strdup("/user");
1089 if (endswith(root, "/system"))
1090 root[strlen(root) - 7] = 0;
1091 else if (streq(root, "/"))
1094 p = strappend(root, "/user");
1105 char **cg_shorten_controllers(char **controllers) {
1108 controllers = strv_uniq(controllers);
1113 for (f = controllers, t = controllers; *f; f++) {
1116 if (streq(*f, "systemd") || streq(*f, SYSTEMD_CGROUP_CONTROLLER)) {
1121 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(*f));
1122 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), *f);
1124 if (access(cc, F_OK) < 0) {
1125 log_debug("Controller %s is not available, removing from controllers list.", *f);