1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
40 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
48 r = cg_get_path(controller, path, "cgroup.procs", &fs);
62 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
70 r = cg_get_path(controller, path, "tasks", &fs);
84 int cg_read_pid(FILE *f, pid_t *_pid) {
87 /* Note that the cgroup.procs might contain duplicates! See
88 * cgroups.txt for details. */
91 if (fscanf(f, "%lu", &ul) != 1) {
96 return errno ? -errno : -EIO;
106 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
114 /* This is not recursive! */
116 r = cg_get_path(controller, path, NULL, &fs);
130 int cg_read_subgroup(DIR *d, char **fn) {
136 while ((de = readdir(d))) {
139 if (de->d_type != DT_DIR)
142 if (streq(de->d_name, ".") ||
143 streq(de->d_name, ".."))
146 if (!(b = strdup(de->d_name)))
159 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
163 r = cg_get_path(controller, path, NULL, &p);
170 /* If the sticky bit is set don't remove the directory */
172 tasks = strappend(p, "/tasks");
178 r = file_is_priv_sticky(tasks);
190 return (r < 0 && errno != ENOENT) ? -errno : 0;
193 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
198 Set *allocated_set = NULL;
204 /* This goes through the tasks list and kills them all. This
205 * is repeated until no further processes are added to the
206 * tasks list, to properly handle forking processes */
209 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
218 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
219 if (ret >= 0 && r != -ENOENT)
225 while ((r = cg_read_pid(f, &pid)) > 0) {
227 if (pid == my_pid && ignore_self)
230 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
233 /* If we haven't killed this process yet, kill
235 if (kill(pid, sig) < 0) {
236 if (ret >= 0 && errno != ESRCH)
238 } else if (ret == 0) {
248 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
266 /* To avoid racing against processes which fork
267 * quicker than we can kill them we repeat this until
268 * no new pids need to be killed. */
274 set_free(allocated_set);
282 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
286 Set *allocated_set = NULL;
293 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
296 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
298 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
299 if (ret >= 0 && r != -ENOENT)
305 while ((r = cg_read_subgroup(d, &fn)) > 0) {
308 r = asprintf(&p, "%s/%s", path, fn);
318 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
321 if (r != 0 && ret >= 0)
325 if (r < 0 && ret >= 0)
329 if ((r = cg_rmdir(controller, path, true)) < 0) {
341 set_free(allocated_set);
346 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
352 /* This safely kills all processes; first it sends a SIGTERM,
353 * then checks 8 times after 200ms whether the group is now
354 * empty, then kills everything that is left with SIGKILL and
355 * finally checks 5 times after 200ms each whether the group
356 * is finally empty. */
358 for (i = 0; i < 15; i++) {
368 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
371 usleep(200 * USEC_PER_MSEC);
377 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
379 _cleanup_set_free_ Set *s = NULL;
382 _cleanup_fclose_ FILE *f = NULL;
389 s = set_new(trivial_hash_func, trivial_compare_func);
399 r = cg_enumerate_tasks(cfrom, pfrom, &f);
401 if (ret >= 0 && r != -ENOENT)
407 while ((r = cg_read_pid(f, &pid)) > 0) {
409 /* This might do weird stuff if we aren't a
410 * single-threaded program. However, we
411 * luckily know we are not */
412 if (pid == my_pid && ignore_self)
415 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
418 r = cg_attach(cto, pto, pid);
420 if (ret >= 0 && r != -ESRCH)
427 r = set_put(s, LONG_TO_PTR(pid));
450 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
452 _cleanup_closedir_ DIR *d = NULL;
460 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
462 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
464 if (ret >= 0 && r != -ENOENT)
469 while ((r = cg_read_subgroup(d, &fn)) > 0) {
470 _cleanup_free_ char *p = NULL;
472 p = strjoin(pfrom, "/", fn, NULL);
481 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
482 if (r != 0 && ret >= 0)
486 if (r < 0 && ret >= 0)
490 r = cg_rmdir(cfrom, pfrom, true);
491 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
498 static const char *normalize_controller(const char *controller) {
500 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
502 else if (startswith(controller, "name="))
503 return controller + 5;
508 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
511 if (!(controller || path))
516 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
518 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
520 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
522 t = strjoin("/sys/fs/cgroup/", controller, NULL);
525 t = strjoin(path, "/", suffix, NULL);
533 path_kill_slashes(t);
539 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
541 static __thread bool good = false;
545 if (_unlikely_(!good)) {
548 r = path_is_mount_point("/sys/fs/cgroup", false);
550 return r < 0 ? r : -ENOENT;
552 /* Cache this to save a few stat()s */
556 p = controller ? normalize_controller(controller) : NULL;
557 return join_path(p, path, suffix, fs);
560 static int check(const char *p) {
565 /* Check if this controller actually really exists */
566 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
567 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
568 if (access(cc, F_OK) < 0)
574 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
581 if (isempty(controller))
584 /* Normalize the controller syntax */
585 p = normalize_controller(controller);
587 /* Check if this controller actually really exists */
592 return join_path(p, path, suffix, fs);
595 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
599 if (typeflag != FTW_DP)
602 if (ftwbuf->level < 1)
605 p = strappend(path, "/tasks");
611 is_sticky = file_is_priv_sticky(p) > 0;
621 int cg_trim(const char *controller, const char *path, bool delete_root) {
628 r = cg_get_path(controller, path, NULL, &fs);
633 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
634 r = errno ? -errno : -EIO;
640 p = strappend(fs, "/tasks");
646 is_sticky = file_is_priv_sticky(p) > 0;
650 if (rmdir(fs) < 0 && errno != ENOENT) {
661 int cg_delete(const char *controller, const char *path) {
668 if ((r = path_get_parent(path, &parent)) < 0)
671 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
674 return r == -ENOENT ? 0 : r;
677 int cg_attach(const char *controller, const char *path, pid_t pid) {
686 r = cg_get_path_and_check(controller, path, "tasks", &fs);
693 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
696 r = write_one_line_file(fs, c);
702 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
709 if (mode != (mode_t) -1)
712 r = cg_get_path(controller, path, NULL, &fs);
716 r = chmod_and_chown(fs, mode, uid, gid);
722 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
729 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
732 if (mode != (mode_t) -1)
735 r = cg_get_path(controller, path, "tasks", &fs);
739 if (sticky >= 0 && mode != (mode_t) -1)
740 /* Both mode and sticky param are passed */
741 mode |= (sticky ? S_ISVTX : 0);
742 else if ((sticky >= 0 && mode == (mode_t) -1) ||
743 (mode != (mode_t) -1 && sticky < 0)) {
746 /* Only one param is passed, hence read the current
747 * mode from the file itself */
755 if (mode == (mode_t) -1)
756 /* No mode set, we just shall set the sticky bit */
757 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
759 /* Only mode set, leave sticky bit untouched */
760 mode = (st.st_mode & ~0777) | mode;
763 r = chmod_and_chown(fs, mode, uid, gid);
769 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
783 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
790 return errno == ENOENT ? -ESRCH : -errno;
792 cs = strlen(controller);
799 if (!(fgets(line, sizeof(line), f))) {
803 r = errno ? -errno : -EIO;
809 if (!(l = strchr(line, ':')))
813 if (strncmp(l, controller, cs) != 0)
819 if (!(p = strdup(l + cs + 1))) {
837 int cg_install_release_agent(const char *controller, const char *agent) {
838 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
844 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
847 if ((r = read_one_line_file(fs, &contents)) < 0)
850 sc = strstrip(contents);
853 if (asprintf(&line, "%s\n", agent) < 0) {
858 if ((r = write_one_line_file(fs, line)) < 0)
861 } else if (!streq(sc, agent)) {
868 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
873 if ((r = read_one_line_file(fs, &contents)) < 0)
876 sc = strstrip(contents);
878 if (streq(sc, "0")) {
879 if ((r = write_one_line_file(fs, "1\n")) < 0)
883 } else if (!streq(sc, "1")) {
897 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
898 pid_t pid = 0, self_pid;
905 r = cg_enumerate_tasks(controller, path, &f);
907 return r == -ENOENT ? 1 : r;
911 while ((r = cg_read_pid(f, &pid)) > 0) {
913 if (ignore_self && pid == self_pid)
928 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
930 _cleanup_free_ char *controller = NULL, *path = NULL;
934 r = cg_split_spec(spec, &controller, &path);
938 return cg_is_empty(controller, path, ignore_self);
941 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
948 r = cg_is_empty(controller, path, ignore_self);
952 r = cg_enumerate_subgroups(controller, path, &d);
954 return r == -ENOENT ? 1 : r;
956 while ((r = cg_read_subgroup(d, &fn)) > 0) {
959 r = asprintf(&p, "%s/%s", path, fn);
967 r = cg_is_empty_recursive(controller, p, ignore_self);
985 int cg_split_spec(const char *spec, char **controller, char **path) {
987 char *t = NULL, *u = NULL;
1007 e = strchr(spec, ':');
1009 if (strchr(spec, '/') || spec[0] == 0)
1026 if (e[1] != '/' || e == spec || memchr(spec, '/', e-spec))
1030 t = strndup(spec, e-spec);
1053 int cg_join_spec(const char *controller, const char *path, char **spec) {
1057 if (!path_is_absolute(path) ||
1058 controller[0] == 0 ||
1059 strchr(controller, ':') ||
1060 strchr(controller, '/'))
1063 if (asprintf(spec, "%s:%s", controller, path) < 0)
1069 int cg_fix_path(const char *path, char **result) {
1076 /* First check if it already is a filesystem path */
1077 if (path_startswith(path, "/sys/fs/cgroup") &&
1078 access(path, F_OK) >= 0) {
1088 /* Otherwise treat it as cg spec */
1089 r = cg_split_spec(path, &c, &p);
1093 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1100 int cg_get_user_path(char **path) {
1105 /* Figure out the place to put user cgroups below. We use the
1106 * same as PID 1 has but with the "/system" suffix replaced by
1109 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1110 p = strdup("/user");
1112 if (endswith(root, "/system"))
1113 root[strlen(root) - 7] = 0;
1114 else if (streq(root, "/"))
1117 p = strappend(root, "/user");
1128 char **cg_shorten_controllers(char **controllers) {
1131 controllers = strv_uniq(controllers);
1136 for (f = controllers, t = controllers; *f; f++) {
1140 if (streq(*f, "systemd") || streq(*f, SYSTEMD_CGROUP_CONTROLLER)) {
1145 p = normalize_controller(*f);
1149 log_debug("Controller %s is not available, removing from controllers list.", *f);
1161 int cg_pid_get_cgroup(pid_t pid, char **root, char **cgroup) {
1162 char *cg_process, *cg_init, *p;
1170 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1174 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &cg_init);
1180 if (endswith(cg_init, "/system"))
1181 cg_init[strlen(cg_init)-7] = 0;
1182 else if (streq(cg_init, "/"))
1185 if (startswith(cg_process, cg_init))
1186 p = cg_process + strlen(cg_init);
1205 cg_process[p-cg_process] = 0;
1213 int cg_pid_get_unit(pid_t pid, char **unit) {
1215 char *cgroup, *p, *at, *b;
1221 r = cg_pid_get_cgroup(pid, NULL, &cgroup);
1225 if (!startswith(cgroup, "/system/")) {
1231 k = strcspn(p, "/");
1233 at = memchr(p, '@', k);
1234 if (at && at[1] == '.') {
1237 /* This is a templated service */
1243 j = strcspn(p+k+1, "/");
1245 b = malloc(k + j + 1);
1248 memcpy(b, p, at - p + 1);
1249 memcpy(b + (at - p) + 1, p + k + 1, j);
1250 memcpy(b + (at - p) + 1 + j, at + 1, k - (at - p) - 1);