1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
38 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
47 if ((r = cg_get_path(controller, path, "cgroup.procs", &fs)) < 0)
60 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
69 if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
82 int cg_read_pid(FILE *f, pid_t *_pid) {
85 /* Note that the cgroup.procs might contain duplicates! See
86 * cgroups.txt for details. */
89 if (fscanf(f, "%lu", &ul) != 1) {
94 return errno ? -errno : -EIO;
104 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
113 /* This is not recursive! */
115 if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
128 int cg_read_subgroup(DIR *d, char **fn) {
134 while ((de = readdir(d))) {
137 if (de->d_type != DT_DIR)
140 if (streq(de->d_name, ".") ||
141 streq(de->d_name, ".."))
144 if (!(b = strdup(de->d_name)))
157 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
161 r = cg_get_path(controller, path, NULL, &p);
168 /* If the sticky bit is set don't remove the directory */
170 tasks = strappend(p, "/tasks");
176 r = file_is_priv_sticky(tasks);
188 return (r < 0 && errno != ENOENT) ? -errno : 0;
191 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
196 Set *allocated_set = NULL;
202 /* This goes through the tasks list and kills them all. This
203 * is repeated until no further processes are added to the
204 * tasks list, to properly handle forking processes */
207 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
216 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
217 if (ret >= 0 && r != -ENOENT)
223 while ((r = cg_read_pid(f, &pid)) > 0) {
225 if (pid == my_pid && ignore_self)
228 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
231 /* If we haven't killed this process yet, kill
233 if (kill(pid, sig) < 0) {
234 if (ret >= 0 && errno != ESRCH)
236 } else if (ret == 0) {
246 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
264 /* To avoid racing against processes which fork
265 * quicker than we can kill them we repeat this until
266 * no new pids need to be killed. */
272 set_free(allocated_set);
280 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
284 Set *allocated_set = NULL;
291 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
294 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
296 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
297 if (ret >= 0 && r != -ENOENT)
303 while ((r = cg_read_subgroup(d, &fn)) > 0) {
306 r = asprintf(&p, "%s/%s", path, fn);
316 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
319 if (r != 0 && ret >= 0)
323 if (r < 0 && ret >= 0)
327 if ((r = cg_rmdir(controller, path, true)) < 0) {
339 set_free(allocated_set);
344 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
350 /* This safely kills all processes; first it sends a SIGTERM,
351 * then checks 8 times after 200ms whether the group is now
352 * empty, then kills everything that is left with SIGKILL and
353 * finally checks 5 times after 200ms each whether the group
354 * is finally empty. */
356 for (i = 0; i < 15; i++) {
366 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
369 usleep(200 * USEC_PER_MSEC);
375 int cg_migrate(const char *controller, const char *from, const char *to, bool ignore_self) {
386 if (!(s = set_new(trivial_hash_func, trivial_compare_func)))
395 if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) {
396 if (ret >= 0 && r != -ENOENT)
402 while ((r = cg_read_pid(f, &pid)) > 0) {
404 /* This might do weird stuff if we aren't a
405 * single-threaded program. However, we
406 * luckily know we are not */
407 if (pid == my_pid && ignore_self)
410 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
413 if ((r = cg_attach(controller, to, pid)) < 0) {
414 if (ret >= 0 && r != -ESRCH)
421 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
450 int cg_migrate_recursive(const char *controller, const char *from, const char *to, bool ignore_self, bool rem) {
459 ret = cg_migrate(controller, from, to, ignore_self);
461 if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) {
462 if (ret >= 0 && r != -ENOENT)
467 while ((r = cg_read_subgroup(d, &fn)) > 0) {
470 r = asprintf(&p, "%s/%s", from, fn);
480 r = cg_migrate_recursive(controller, p, to, ignore_self, rem);
483 if (r != 0 && ret >= 0)
487 if (r < 0 && ret >= 0)
491 if ((r = cg_rmdir(controller, from, true)) < 0) {
505 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
508 static __thread bool good = false;
513 if (_unlikely_(!good)) {
516 r = path_is_mount_point("/sys/fs/cgroup", false);
518 return r < 0 ? r : -ENOENT;
520 /* Cache this to save a few stat()s */
524 if (isempty(controller))
527 /* This is a very minimal lookup from controller names to
528 * paths. Since we have mounted most hierarchies ourselves
529 * should be kinda safe, but eventually we might want to
530 * extend this to have a fallback to actually check
531 * /proc/mounts. Might need caching then. */
533 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
535 else if (startswith(controller, "name="))
541 t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL);
543 t = join("/sys/fs/cgroup/", p, "/", path, NULL);
545 t = join("/sys/fs/cgroup/", p, "/", suffix, NULL);
547 t = join("/sys/fs/cgroup/", p, NULL);
552 path_kill_slashes(t);
558 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
562 if (typeflag != FTW_DP)
565 if (ftwbuf->level < 1)
568 p = strappend(path, "/tasks");
574 is_sticky = file_is_priv_sticky(p) > 0;
584 int cg_trim(const char *controller, const char *path, bool delete_root) {
591 r = cg_get_path(controller, path, NULL, &fs);
596 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
597 r = errno ? -errno : -EIO;
603 p = strappend(fs, "/tasks");
609 is_sticky = file_is_priv_sticky(p) > 0;
613 if (rmdir(fs) < 0 && errno != ENOENT) {
624 int cg_delete(const char *controller, const char *path) {
631 if ((r = parent_of_path(path, &parent)) < 0)
634 r = cg_migrate_recursive(controller, path, parent, false, true);
637 return r == -ENOENT ? 0 : r;
640 int cg_create(const char *controller, const char *path) {
647 if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
650 r = mkdir_parents(fs, 0755);
653 if (mkdir(fs, 0755) >= 0)
655 else if (errno == EEXIST)
666 int cg_attach(const char *controller, const char *path, pid_t pid) {
675 if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
681 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
684 r = write_one_line_file(fs, c);
690 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
697 if ((r = cg_create(controller, path)) < 0)
700 if ((q = cg_attach(controller, path, pid)) < 0)
703 /* This does not remove the cgroup on failure */
708 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
715 if (mode != (mode_t) -1)
718 r = cg_get_path(controller, path, NULL, &fs);
722 r = chmod_and_chown(fs, mode, uid, gid);
728 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
735 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
738 if (mode != (mode_t) -1)
741 r = cg_get_path(controller, path, "tasks", &fs);
745 if (sticky >= 0 && mode != (mode_t) -1)
746 /* Both mode and sticky param are passed */
747 mode |= (sticky ? S_ISVTX : 0);
748 else if ((sticky >= 0 && mode == (mode_t) -1) ||
749 (mode != (mode_t) -1 && sticky < 0)) {
752 /* Only one param is passed, hence read the current
753 * mode from the file itself */
761 if (mode == (mode_t) -1)
762 /* No mode set, we just shall set the sticky bit */
763 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
765 /* Only mode set, leave sticky bit untouched */
766 mode = (st.st_mode & ~0777) | mode;
769 r = chmod_and_chown(fs, mode, uid, gid);
775 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
789 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
796 return errno == ENOENT ? -ESRCH : -errno;
798 cs = strlen(controller);
805 if (!(fgets(line, sizeof(line), f))) {
809 r = errno ? -errno : -EIO;
815 if (!(l = strchr(line, ':')))
819 if (strncmp(l, controller, cs) != 0)
825 if (!(p = strdup(l + cs + 1))) {
843 int cg_install_release_agent(const char *controller, const char *agent) {
844 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
850 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
853 if ((r = read_one_line_file(fs, &contents)) < 0)
856 sc = strstrip(contents);
859 if (asprintf(&line, "%s\n", agent) < 0) {
864 if ((r = write_one_line_file(fs, line)) < 0)
867 } else if (!streq(sc, agent)) {
874 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
879 if ((r = read_one_line_file(fs, &contents)) < 0)
882 sc = strstrip(contents);
884 if (streq(sc, "0")) {
885 if ((r = write_one_line_file(fs, "1\n")) < 0)
889 } else if (!streq(sc, "1")) {
903 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
912 if ((r = cg_enumerate_tasks(controller, path, &f)) < 0)
913 return r == -ENOENT ? 1 : r;
915 while ((r = cg_read_pid(f, &pid)) > 0) {
917 if (ignore_self && pid == getpid())
932 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
940 if ((r = cg_is_empty(controller, path, ignore_self)) <= 0)
943 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0)
944 return r == -ENOENT ? 1 : r;
946 while ((r = cg_read_subgroup(d, &fn)) > 0) {
949 r = asprintf(&p, "%s/%s", path, fn);
957 r = cg_is_empty_recursive(controller, p, ignore_self);
975 int cg_split_spec(const char *spec, char **controller, char **path) {
977 char *t = NULL, *u = NULL;
980 assert(controller || path);
985 if (!(t = strdup(spec)))
997 if (!(e = strchr(spec, ':'))) {
999 if (strchr(spec, '/') || spec[0] == 0)
1003 if (!(t = strdup(spec)))
1017 memchr(spec, '/', e-spec))
1021 if (!(t = strndup(spec, e-spec)))
1025 if (!(u = strdup(e+1))) {
1039 int cg_join_spec(const char *controller, const char *path, char **spec) {
1043 if (!path_is_absolute(path) ||
1044 controller[0] == 0 ||
1045 strchr(controller, ':') ||
1046 strchr(controller, '/'))
1049 if (asprintf(spec, "%s:%s", controller, path) < 0)
1055 int cg_fix_path(const char *path, char **result) {
1062 /* First check if it already is a filesystem path */
1063 if (path_is_absolute(path) &&
1064 path_startswith(path, "/sys/fs/cgroup") &&
1065 access(path, F_OK) >= 0) {
1067 if (!(t = strdup(path)))
1074 /* Otherwise treat it as cg spec */
1075 if ((r = cg_split_spec(path, &c, &p)) < 0)
1078 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1085 int cg_get_user_path(char **path) {
1090 /* Figure out the place to put user cgroups below. We use the
1091 * same as PID 1 has but with the "/system" suffix replaced by
1094 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1095 p = strdup("/user");
1097 if (endswith(root, "/system"))
1098 root[strlen(root) - 7] = 0;
1099 else if (streq(root, "/"))
1102 p = strappend(root, "/user");