1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
38 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
47 if ((r = cg_get_path(controller, path, "cgroup.procs", &fs)) < 0)
60 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
69 if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
82 int cg_read_pid(FILE *f, pid_t *_pid) {
85 /* Note that the cgroup.procs might contain duplicates! See
86 * cgroups.txt for details. */
89 if (fscanf(f, "%lu", &ul) != 1) {
94 return errno ? -errno : -EIO;
104 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
113 /* This is not recursive! */
115 if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
128 int cg_read_subgroup(DIR *d, char **fn) {
134 while ((de = readdir(d))) {
137 if (de->d_type != DT_DIR)
140 if (streq(de->d_name, ".") ||
141 streq(de->d_name, ".."))
144 if (!(b = strdup(de->d_name)))
157 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
161 r = cg_get_path(controller, path, NULL, &p);
168 /* If the sticky bit is set don't remove the directory */
170 tasks = strappend(p, "/tasks");
176 r = file_is_priv_sticky(tasks);
188 return (r < 0 && errno != ENOENT) ? -errno : 0;
191 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
196 Set *allocated_set = NULL;
202 /* This goes through the tasks list and kills them all. This
203 * is repeated until no further processes are added to the
204 * tasks list, to properly handle forking processes */
207 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
216 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
217 if (ret >= 0 && r != -ENOENT)
223 while ((r = cg_read_pid(f, &pid)) > 0) {
225 if (pid == my_pid && ignore_self)
228 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
231 /* If we haven't killed this process yet, kill
233 if (kill(pid, sig) < 0) {
234 if (ret >= 0 && errno != ESRCH)
236 } else if (ret == 0) {
246 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
264 /* To avoid racing against processes which fork
265 * quicker than we can kill them we repeat this until
266 * no new pids need to be killed. */
272 set_free(allocated_set);
280 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
284 Set *allocated_set = NULL;
291 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
294 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
296 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
297 if (ret >= 0 && r != -ENOENT)
303 while ((r = cg_read_subgroup(d, &fn)) > 0) {
306 r = asprintf(&p, "%s/%s", path, fn);
316 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
319 if (r != 0 && ret >= 0)
323 if (r < 0 && ret >= 0)
327 if ((r = cg_rmdir(controller, path, true)) < 0) {
339 set_free(allocated_set);
344 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
350 /* This safely kills all processes; first it sends a SIGTERM,
351 * then checks 8 times after 200ms whether the group is now
352 * empty, then kills everything that is left with SIGKILL and
353 * finally checks 5 times after 200ms each whether the group
354 * is finally empty. */
356 for (i = 0; i < 15; i++) {
366 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
369 usleep(200 * USEC_PER_MSEC);
375 int cg_migrate(const char *controller, const char *from, const char *to, bool ignore_self) {
386 if (!(s = set_new(trivial_hash_func, trivial_compare_func)))
395 if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) {
396 if (ret >= 0 && r != -ENOENT)
402 while ((r = cg_read_pid(f, &pid)) > 0) {
404 /* This might do weird stuff if we aren't a
405 * single-threaded program. However, we
406 * luckily know we are not */
407 if (pid == my_pid && ignore_self)
410 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
413 if ((r = cg_attach(controller, to, pid)) < 0) {
414 if (ret >= 0 && r != -ESRCH)
421 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
450 int cg_migrate_recursive(const char *controller, const char *from, const char *to, bool ignore_self, bool rem) {
459 ret = cg_migrate(controller, from, to, ignore_self);
461 if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) {
462 if (ret >= 0 && r != -ENOENT)
467 while ((r = cg_read_subgroup(d, &fn)) > 0) {
470 r = asprintf(&p, "%s/%s", from, fn);
480 r = cg_migrate_recursive(controller, p, to, ignore_self, rem);
483 if (r != 0 && ret >= 0)
487 if (r < 0 && ret >= 0)
491 if ((r = cg_rmdir(controller, from, true)) < 0) {
505 static const char *normalize_controller(const char *controller) {
507 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
509 else if (startswith(controller, "name="))
510 return controller + 5;
515 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
519 t = join("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
521 t = join("/sys/fs/cgroup/", controller, "/", path, NULL);
523 t = join("/sys/fs/cgroup/", controller, "/", suffix, NULL);
525 t = join("/sys/fs/cgroup/", controller, NULL);
530 path_kill_slashes(t);
536 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
538 static __thread bool good = false;
543 if (isempty(controller))
546 if (_unlikely_(!good)) {
549 r = path_is_mount_point("/sys/fs/cgroup", false);
551 return r < 0 ? r : -ENOENT;
553 /* Cache this to save a few stat()s */
557 p = normalize_controller(controller);
559 return join_path(p, path, suffix, fs);
562 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
569 if (isempty(controller))
572 p = normalize_controller(controller);
574 /* Check if this controller actually really exists */
575 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
576 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
577 if (access(cc, F_OK) < 0)
580 return join_path(p, path, suffix, fs);
583 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
587 if (typeflag != FTW_DP)
590 if (ftwbuf->level < 1)
593 p = strappend(path, "/tasks");
599 is_sticky = file_is_priv_sticky(p) > 0;
609 int cg_trim(const char *controller, const char *path, bool delete_root) {
616 r = cg_get_path(controller, path, NULL, &fs);
621 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
622 r = errno ? -errno : -EIO;
628 p = strappend(fs, "/tasks");
634 is_sticky = file_is_priv_sticky(p) > 0;
638 if (rmdir(fs) < 0 && errno != ENOENT) {
649 int cg_delete(const char *controller, const char *path) {
656 if ((r = parent_of_path(path, &parent)) < 0)
659 r = cg_migrate_recursive(controller, path, parent, false, true);
662 return r == -ENOENT ? 0 : r;
665 int cg_attach(const char *controller, const char *path, pid_t pid) {
674 r = cg_get_path_and_check(controller, path, "tasks", &fs);
681 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
684 r = write_one_line_file(fs, c);
690 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
697 if (mode != (mode_t) -1)
700 r = cg_get_path(controller, path, NULL, &fs);
704 r = chmod_and_chown(fs, mode, uid, gid);
710 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
717 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
720 if (mode != (mode_t) -1)
723 r = cg_get_path(controller, path, "tasks", &fs);
727 if (sticky >= 0 && mode != (mode_t) -1)
728 /* Both mode and sticky param are passed */
729 mode |= (sticky ? S_ISVTX : 0);
730 else if ((sticky >= 0 && mode == (mode_t) -1) ||
731 (mode != (mode_t) -1 && sticky < 0)) {
734 /* Only one param is passed, hence read the current
735 * mode from the file itself */
743 if (mode == (mode_t) -1)
744 /* No mode set, we just shall set the sticky bit */
745 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
747 /* Only mode set, leave sticky bit untouched */
748 mode = (st.st_mode & ~0777) | mode;
751 r = chmod_and_chown(fs, mode, uid, gid);
757 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
771 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
778 return errno == ENOENT ? -ESRCH : -errno;
780 cs = strlen(controller);
787 if (!(fgets(line, sizeof(line), f))) {
791 r = errno ? -errno : -EIO;
797 if (!(l = strchr(line, ':')))
801 if (strncmp(l, controller, cs) != 0)
807 if (!(p = strdup(l + cs + 1))) {
825 int cg_install_release_agent(const char *controller, const char *agent) {
826 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
832 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
835 if ((r = read_one_line_file(fs, &contents)) < 0)
838 sc = strstrip(contents);
841 if (asprintf(&line, "%s\n", agent) < 0) {
846 if ((r = write_one_line_file(fs, line)) < 0)
849 } else if (!streq(sc, agent)) {
856 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
861 if ((r = read_one_line_file(fs, &contents)) < 0)
864 sc = strstrip(contents);
866 if (streq(sc, "0")) {
867 if ((r = write_one_line_file(fs, "1\n")) < 0)
871 } else if (!streq(sc, "1")) {
885 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
894 if ((r = cg_enumerate_tasks(controller, path, &f)) < 0)
895 return r == -ENOENT ? 1 : r;
897 while ((r = cg_read_pid(f, &pid)) > 0) {
899 if (ignore_self && pid == getpid())
914 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
922 if ((r = cg_is_empty(controller, path, ignore_self)) <= 0)
925 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0)
926 return r == -ENOENT ? 1 : r;
928 while ((r = cg_read_subgroup(d, &fn)) > 0) {
931 r = asprintf(&p, "%s/%s", path, fn);
939 r = cg_is_empty_recursive(controller, p, ignore_self);
957 int cg_split_spec(const char *spec, char **controller, char **path) {
959 char *t = NULL, *u = NULL;
962 assert(controller || path);
967 if (!(t = strdup(spec)))
979 if (!(e = strchr(spec, ':'))) {
981 if (strchr(spec, '/') || spec[0] == 0)
985 if (!(t = strdup(spec)))
999 memchr(spec, '/', e-spec))
1003 if (!(t = strndup(spec, e-spec)))
1007 if (!(u = strdup(e+1))) {
1021 int cg_join_spec(const char *controller, const char *path, char **spec) {
1025 if (!path_is_absolute(path) ||
1026 controller[0] == 0 ||
1027 strchr(controller, ':') ||
1028 strchr(controller, '/'))
1031 if (asprintf(spec, "%s:%s", controller, path) < 0)
1037 int cg_fix_path(const char *path, char **result) {
1044 /* First check if it already is a filesystem path */
1045 if (path_is_absolute(path) &&
1046 path_startswith(path, "/sys/fs/cgroup") &&
1047 access(path, F_OK) >= 0) {
1049 if (!(t = strdup(path)))
1056 /* Otherwise treat it as cg spec */
1057 if ((r = cg_split_spec(path, &c, &p)) < 0)
1060 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1067 int cg_get_user_path(char **path) {
1072 /* Figure out the place to put user cgroups below. We use the
1073 * same as PID 1 has but with the "/system" suffix replaced by
1076 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1077 p = strdup("/user");
1079 if (endswith(root, "/system"))
1080 root[strlen(root) - 7] = 0;
1081 else if (streq(root, "/"))
1084 p = strappend(root, "/user");