1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
39 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
48 if ((r = cg_get_path(controller, path, "cgroup.procs", &fs)) < 0)
61 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
70 if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
83 int cg_read_pid(FILE *f, pid_t *_pid) {
86 /* Note that the cgroup.procs might contain duplicates! See
87 * cgroups.txt for details. */
90 if (fscanf(f, "%lu", &ul) != 1) {
95 return errno ? -errno : -EIO;
105 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
114 /* This is not recursive! */
116 if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
129 int cg_read_subgroup(DIR *d, char **fn) {
135 while ((de = readdir(d))) {
138 if (de->d_type != DT_DIR)
141 if (streq(de->d_name, ".") ||
142 streq(de->d_name, ".."))
145 if (!(b = strdup(de->d_name)))
158 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
162 r = cg_get_path(controller, path, NULL, &p);
169 /* If the sticky bit is set don't remove the directory */
171 tasks = strappend(p, "/tasks");
177 r = file_is_priv_sticky(tasks);
189 return (r < 0 && errno != ENOENT) ? -errno : 0;
192 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
197 Set *allocated_set = NULL;
203 /* This goes through the tasks list and kills them all. This
204 * is repeated until no further processes are added to the
205 * tasks list, to properly handle forking processes */
208 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
217 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
218 if (ret >= 0 && r != -ENOENT)
224 while ((r = cg_read_pid(f, &pid)) > 0) {
226 if (pid == my_pid && ignore_self)
229 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
232 /* If we haven't killed this process yet, kill
234 if (kill(pid, sig) < 0) {
235 if (ret >= 0 && errno != ESRCH)
237 } else if (ret == 0) {
247 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
265 /* To avoid racing against processes which fork
266 * quicker than we can kill them we repeat this until
267 * no new pids need to be killed. */
273 set_free(allocated_set);
281 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
285 Set *allocated_set = NULL;
292 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
295 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
297 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
298 if (ret >= 0 && r != -ENOENT)
304 while ((r = cg_read_subgroup(d, &fn)) > 0) {
307 r = asprintf(&p, "%s/%s", path, fn);
317 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
320 if (r != 0 && ret >= 0)
324 if (r < 0 && ret >= 0)
328 if ((r = cg_rmdir(controller, path, true)) < 0) {
340 set_free(allocated_set);
345 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
351 /* This safely kills all processes; first it sends a SIGTERM,
352 * then checks 8 times after 200ms whether the group is now
353 * empty, then kills everything that is left with SIGKILL and
354 * finally checks 5 times after 200ms each whether the group
355 * is finally empty. */
357 for (i = 0; i < 15; i++) {
367 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
370 usleep(200 * USEC_PER_MSEC);
376 int cg_migrate(const char *controller, const char *from, const char *to, bool ignore_self) {
387 if (!(s = set_new(trivial_hash_func, trivial_compare_func)))
396 if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) {
397 if (ret >= 0 && r != -ENOENT)
403 while ((r = cg_read_pid(f, &pid)) > 0) {
405 /* This might do weird stuff if we aren't a
406 * single-threaded program. However, we
407 * luckily know we are not */
408 if (pid == my_pid && ignore_self)
411 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
414 if ((r = cg_attach(controller, to, pid)) < 0) {
415 if (ret >= 0 && r != -ESRCH)
422 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
451 int cg_migrate_recursive(const char *controller, const char *from, const char *to, bool ignore_self, bool rem) {
460 ret = cg_migrate(controller, from, to, ignore_self);
462 if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) {
463 if (ret >= 0 && r != -ENOENT)
468 while ((r = cg_read_subgroup(d, &fn)) > 0) {
471 r = asprintf(&p, "%s/%s", from, fn);
481 r = cg_migrate_recursive(controller, p, to, ignore_self, rem);
484 if (r != 0 && ret >= 0)
488 if (r < 0 && ret >= 0)
492 if ((r = cg_rmdir(controller, from, true)) < 0) {
506 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
509 static __thread bool good = false;
514 if (_unlikely_(!good)) {
517 r = path_is_mount_point("/sys/fs/cgroup", false);
519 return r < 0 ? r : -ENOENT;
521 /* Cache this to save a few stat()s */
525 if (isempty(controller))
528 /* This is a very minimal lookup from controller names to
529 * paths. Since we have mounted most hierarchies ourselves
530 * should be kinda safe, but eventually we might want to
531 * extend this to have a fallback to actually check
532 * /proc/mounts. Might need caching then. */
534 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
536 else if (startswith(controller, "name="))
542 t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL);
544 t = join("/sys/fs/cgroup/", p, "/", path, NULL);
546 t = join("/sys/fs/cgroup/", p, "/", suffix, NULL);
548 t = join("/sys/fs/cgroup/", p, NULL);
553 path_kill_slashes(t);
559 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
563 if (typeflag != FTW_DP)
566 if (ftwbuf->level < 1)
569 p = strappend(path, "/tasks");
575 is_sticky = file_is_priv_sticky(p) > 0;
585 int cg_trim(const char *controller, const char *path, bool delete_root) {
592 r = cg_get_path(controller, path, NULL, &fs);
597 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
598 r = errno ? -errno : -EIO;
604 p = strappend(fs, "/tasks");
610 is_sticky = file_is_priv_sticky(p) > 0;
614 if (rmdir(fs) < 0 && errno != ENOENT) {
625 int cg_delete(const char *controller, const char *path) {
632 if ((r = parent_of_path(path, &parent)) < 0)
635 r = cg_migrate_recursive(controller, path, parent, false, true);
638 return r == -ENOENT ? 0 : r;
641 int cg_create(const char *controller, const char *path) {
648 if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
651 r = mkdir_parents(fs, 0755);
654 if (mkdir(fs, 0755) >= 0)
656 else if (errno == EEXIST)
667 int cg_attach(const char *controller, const char *path, pid_t pid) {
676 if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
682 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
685 r = write_one_line_file(fs, c);
691 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
698 if ((r = cg_create(controller, path)) < 0)
701 if ((q = cg_attach(controller, path, pid)) < 0)
704 /* This does not remove the cgroup on failure */
709 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
716 if (mode != (mode_t) -1)
719 r = cg_get_path(controller, path, NULL, &fs);
723 r = chmod_and_chown(fs, mode, uid, gid);
729 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
736 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
739 if (mode != (mode_t) -1)
742 r = cg_get_path(controller, path, "tasks", &fs);
746 if (sticky >= 0 && mode != (mode_t) -1)
747 /* Both mode and sticky param are passed */
748 mode |= (sticky ? S_ISVTX : 0);
749 else if ((sticky >= 0 && mode == (mode_t) -1) ||
750 (mode != (mode_t) -1 && sticky < 0)) {
753 /* Only one param is passed, hence read the current
754 * mode from the file itself */
762 if (mode == (mode_t) -1)
763 /* No mode set, we just shall set the sticky bit */
764 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
766 /* Only mode set, leave sticky bit untouched */
767 mode = (st.st_mode & ~0777) | mode;
770 r = chmod_and_chown(fs, mode, uid, gid);
776 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
790 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
797 return errno == ENOENT ? -ESRCH : -errno;
799 cs = strlen(controller);
806 if (!(fgets(line, sizeof(line), f))) {
810 r = errno ? -errno : -EIO;
816 if (!(l = strchr(line, ':')))
820 if (strncmp(l, controller, cs) != 0)
826 if (!(p = strdup(l + cs + 1))) {
844 int cg_install_release_agent(const char *controller, const char *agent) {
845 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
851 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
854 if ((r = read_one_line_file(fs, &contents)) < 0)
857 sc = strstrip(contents);
860 if (asprintf(&line, "%s\n", agent) < 0) {
865 if ((r = write_one_line_file(fs, line)) < 0)
868 } else if (!streq(sc, agent)) {
875 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
880 if ((r = read_one_line_file(fs, &contents)) < 0)
883 sc = strstrip(contents);
885 if (streq(sc, "0")) {
886 if ((r = write_one_line_file(fs, "1\n")) < 0)
890 } else if (!streq(sc, "1")) {
904 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
913 if ((r = cg_enumerate_tasks(controller, path, &f)) < 0)
914 return r == -ENOENT ? 1 : r;
916 while ((r = cg_read_pid(f, &pid)) > 0) {
918 if (ignore_self && pid == getpid())
933 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
941 if ((r = cg_is_empty(controller, path, ignore_self)) <= 0)
944 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0)
945 return r == -ENOENT ? 1 : r;
947 while ((r = cg_read_subgroup(d, &fn)) > 0) {
950 r = asprintf(&p, "%s/%s", path, fn);
958 r = cg_is_empty_recursive(controller, p, ignore_self);
976 int cg_split_spec(const char *spec, char **controller, char **path) {
978 char *t = NULL, *u = NULL;
981 assert(controller || path);
986 if (!(t = strdup(spec)))
998 if (!(e = strchr(spec, ':'))) {
1000 if (strchr(spec, '/') || spec[0] == 0)
1004 if (!(t = strdup(spec)))
1018 memchr(spec, '/', e-spec))
1022 if (!(t = strndup(spec, e-spec)))
1026 if (!(u = strdup(e+1))) {
1040 int cg_join_spec(const char *controller, const char *path, char **spec) {
1044 if (!path_is_absolute(path) ||
1045 controller[0] == 0 ||
1046 strchr(controller, ':') ||
1047 strchr(controller, '/'))
1050 if (asprintf(spec, "%s:%s", controller, path) < 0)
1056 int cg_fix_path(const char *path, char **result) {
1063 /* First check if it already is a filesystem path */
1064 if (path_is_absolute(path) &&
1065 path_startswith(path, "/sys/fs/cgroup") &&
1066 access(path, F_OK) >= 0) {
1068 if (!(t = strdup(path)))
1075 /* Otherwise treat it as cg spec */
1076 if ((r = cg_split_spec(path, &c, &p)) < 0)
1079 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1086 int cg_get_user_path(char **path) {
1091 /* Figure out the place to put user cgroups below. We use the
1092 * same as PID 1 has but with the "/system" suffix replaced by
1095 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1096 p = strdup("/user");
1098 if (endswith(root, "/system"))
1099 root[strlen(root) - 7] = 0;
1100 else if (streq(root, "/"))
1103 p = strappend(root, "/user");