1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
41 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
49 r = cg_get_path(controller, path, "cgroup.procs", &fs);
63 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
71 r = cg_get_path(controller, path, "tasks", &fs);
85 int cg_read_pid(FILE *f, pid_t *_pid) {
88 /* Note that the cgroup.procs might contain duplicates! See
89 * cgroups.txt for details. */
92 if (fscanf(f, "%lu", &ul) != 1) {
97 return errno ? -errno : -EIO;
107 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
115 /* This is not recursive! */
117 r = cg_get_path(controller, path, NULL, &fs);
131 int cg_read_subgroup(DIR *d, char **fn) {
137 while ((de = readdir(d))) {
140 if (de->d_type != DT_DIR)
143 if (streq(de->d_name, ".") ||
144 streq(de->d_name, ".."))
147 if (!(b = strdup(de->d_name)))
160 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
164 r = cg_get_path(controller, path, NULL, &p);
171 /* If the sticky bit is set don't remove the directory */
173 tasks = strappend(p, "/tasks");
179 r = file_is_priv_sticky(tasks);
191 return (r < 0 && errno != ENOENT) ? -errno : 0;
194 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
199 Set *allocated_set = NULL;
205 /* This goes through the tasks list and kills them all. This
206 * is repeated until no further processes are added to the
207 * tasks list, to properly handle forking processes */
210 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
219 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
220 if (ret >= 0 && r != -ENOENT)
226 while ((r = cg_read_pid(f, &pid)) > 0) {
228 if (pid == my_pid && ignore_self)
231 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
234 /* If we haven't killed this process yet, kill
236 if (kill(pid, sig) < 0) {
237 if (ret >= 0 && errno != ESRCH)
239 } else if (ret == 0) {
249 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
267 /* To avoid racing against processes which fork
268 * quicker than we can kill them we repeat this until
269 * no new pids need to be killed. */
275 set_free(allocated_set);
283 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
287 Set *allocated_set = NULL;
294 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
297 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
299 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
300 if (ret >= 0 && r != -ENOENT)
306 while ((r = cg_read_subgroup(d, &fn)) > 0) {
309 r = asprintf(&p, "%s/%s", path, fn);
319 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
322 if (r != 0 && ret >= 0)
326 if (r < 0 && ret >= 0)
330 if ((r = cg_rmdir(controller, path, true)) < 0) {
342 set_free(allocated_set);
347 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
353 /* This safely kills all processes; first it sends a SIGTERM,
354 * then checks 8 times after 200ms whether the group is now
355 * empty, then kills everything that is left with SIGKILL and
356 * finally checks 5 times after 200ms each whether the group
357 * is finally empty. */
359 for (i = 0; i < 15; i++) {
369 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
372 usleep(200 * USEC_PER_MSEC);
378 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
380 _cleanup_set_free_ Set *s = NULL;
383 _cleanup_fclose_ FILE *f = NULL;
390 s = set_new(trivial_hash_func, trivial_compare_func);
400 r = cg_enumerate_tasks(cfrom, pfrom, &f);
402 if (ret >= 0 && r != -ENOENT)
408 while ((r = cg_read_pid(f, &pid)) > 0) {
410 /* This might do weird stuff if we aren't a
411 * single-threaded program. However, we
412 * luckily know we are not */
413 if (pid == my_pid && ignore_self)
416 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
419 r = cg_attach(cto, pto, pid);
421 if (ret >= 0 && r != -ESRCH)
428 r = set_put(s, LONG_TO_PTR(pid));
451 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
453 _cleanup_closedir_ DIR *d = NULL;
461 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
463 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
465 if (ret >= 0 && r != -ENOENT)
470 while ((r = cg_read_subgroup(d, &fn)) > 0) {
471 _cleanup_free_ char *p = NULL;
473 p = strjoin(pfrom, "/", fn, NULL);
482 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
483 if (r != 0 && ret >= 0)
487 if (r < 0 && ret >= 0)
491 r = cg_rmdir(cfrom, pfrom, true);
492 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
499 static const char *normalize_controller(const char *controller) {
501 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
503 else if (startswith(controller, "name="))
504 return controller + 5;
509 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
512 if (!(controller || path))
517 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
519 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
521 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
523 t = strjoin("/sys/fs/cgroup/", controller, NULL);
526 t = strjoin(path, "/", suffix, NULL);
534 path_kill_slashes(t);
540 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
542 static __thread bool good = false;
546 if (_unlikely_(!good)) {
549 r = path_is_mount_point("/sys/fs/cgroup", false);
551 return r < 0 ? r : -ENOENT;
553 /* Cache this to save a few stat()s */
557 p = controller ? normalize_controller(controller) : NULL;
558 return join_path(p, path, suffix, fs);
561 static int check(const char *p) {
566 /* Check if this controller actually really exists */
567 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
568 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
569 if (access(cc, F_OK) < 0)
575 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
582 if (isempty(controller))
585 /* Normalize the controller syntax */
586 p = normalize_controller(controller);
588 /* Check if this controller actually really exists */
593 return join_path(p, path, suffix, fs);
596 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
600 if (typeflag != FTW_DP)
603 if (ftwbuf->level < 1)
606 p = strappend(path, "/tasks");
612 is_sticky = file_is_priv_sticky(p) > 0;
622 int cg_trim(const char *controller, const char *path, bool delete_root) {
629 r = cg_get_path(controller, path, NULL, &fs);
634 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
635 r = errno ? -errno : -EIO;
641 p = strappend(fs, "/tasks");
647 is_sticky = file_is_priv_sticky(p) > 0;
651 if (rmdir(fs) < 0 && errno != ENOENT) {
662 int cg_delete(const char *controller, const char *path) {
669 if ((r = path_get_parent(path, &parent)) < 0)
672 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
675 return r == -ENOENT ? 0 : r;
678 int cg_attach(const char *controller, const char *path, pid_t pid) {
687 r = cg_get_path_and_check(controller, path, "tasks", &fs);
694 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
697 r = write_one_line_file(fs, c);
703 int cg_set_group_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
710 if (mode != (mode_t) -1)
713 r = cg_get_path(controller, path, NULL, &fs);
717 r = chmod_and_chown(fs, mode, uid, gid);
723 int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
730 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
733 if (mode != (mode_t) -1)
736 r = cg_get_path(controller, path, "tasks", &fs);
740 if (sticky >= 0 && mode != (mode_t) -1)
741 /* Both mode and sticky param are passed */
742 mode |= (sticky ? S_ISVTX : 0);
743 else if ((sticky >= 0 && mode == (mode_t) -1) ||
744 (mode != (mode_t) -1 && sticky < 0)) {
747 /* Only one param is passed, hence read the current
748 * mode from the file itself */
756 if (mode == (mode_t) -1)
757 /* No mode set, we just shall set the sticky bit */
758 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
760 /* Only mode set, leave sticky bit untouched */
761 mode = (st.st_mode & ~0777) | mode;
764 r = chmod_and_chown(fs, mode, uid, gid);
770 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
784 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
791 return errno == ENOENT ? -ESRCH : -errno;
793 cs = strlen(controller);
800 if (!(fgets(line, sizeof(line), f))) {
804 r = errno ? -errno : -EIO;
810 if (!(l = strchr(line, ':')))
814 if (strncmp(l, controller, cs) != 0)
820 if (!(p = strdup(l + cs + 1))) {
838 int cg_install_release_agent(const char *controller, const char *agent) {
839 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
845 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
848 if ((r = read_one_line_file(fs, &contents)) < 0)
851 sc = strstrip(contents);
854 if (asprintf(&line, "%s\n", agent) < 0) {
859 if ((r = write_one_line_file(fs, line)) < 0)
862 } else if (!streq(sc, agent)) {
869 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
874 if ((r = read_one_line_file(fs, &contents)) < 0)
877 sc = strstrip(contents);
879 if (streq(sc, "0")) {
880 if ((r = write_one_line_file(fs, "1\n")) < 0)
884 } else if (!streq(sc, "1")) {
898 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
899 pid_t pid = 0, self_pid;
906 r = cg_enumerate_tasks(controller, path, &f);
908 return r == -ENOENT ? 1 : r;
912 while ((r = cg_read_pid(f, &pid)) > 0) {
914 if (ignore_self && pid == self_pid)
929 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
931 _cleanup_free_ char *controller = NULL, *path = NULL;
935 r = cg_split_spec(spec, &controller, &path);
939 return cg_is_empty(controller, path, ignore_self);
942 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
949 r = cg_is_empty(controller, path, ignore_self);
953 r = cg_enumerate_subgroups(controller, path, &d);
955 return r == -ENOENT ? 1 : r;
957 while ((r = cg_read_subgroup(d, &fn)) > 0) {
960 r = asprintf(&p, "%s/%s", path, fn);
968 r = cg_is_empty_recursive(controller, p, ignore_self);
986 int cg_split_spec(const char *spec, char **controller, char **path) {
988 char *t = NULL, *u = NULL;
993 if (!path_is_safe(spec))
1010 e = strchr(spec, ':');
1012 if (!filename_is_safe(spec))
1029 t = strndup(spec, e-spec);
1032 if (!filename_is_safe(t)) {
1042 if (!path_is_safe(u)) {
1061 int cg_join_spec(const char *controller, const char *path, char **spec) {
1065 if (!path_is_absolute(path) ||
1066 controller[0] == 0 ||
1067 strchr(controller, ':') ||
1068 strchr(controller, '/'))
1071 if (asprintf(spec, "%s:%s", controller, path) < 0)
1077 int cg_fix_path(const char *path, char **result) {
1084 /* First check if it already is a filesystem path */
1085 if (path_startswith(path, "/sys/fs/cgroup") &&
1086 access(path, F_OK) >= 0) {
1096 /* Otherwise treat it as cg spec */
1097 r = cg_split_spec(path, &c, &p);
1101 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1108 int cg_get_user_path(char **path) {
1113 /* Figure out the place to put user cgroups below. We use the
1114 * same as PID 1 has but with the "/system" suffix replaced by
1117 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1118 p = strdup("/user");
1120 if (endswith(root, "/system"))
1121 root[strlen(root) - 7] = 0;
1122 else if (streq(root, "/"))
1125 p = strappend(root, "/user");
1136 char **cg_shorten_controllers(char **controllers) {
1139 controllers = strv_uniq(controllers);
1144 for (f = controllers, t = controllers; *f; f++) {
1148 if (streq(*f, "systemd") || streq(*f, SYSTEMD_CGROUP_CONTROLLER)) {
1153 p = normalize_controller(*f);
1157 log_debug("Controller %s is not available, removing from controllers list.", *f);
1169 int cg_pid_get_cgroup(pid_t pid, char **root, char **cgroup) {
1170 char *cg_process, *cg_init, *p;
1178 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1182 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &cg_init);
1188 if (endswith(cg_init, "/system"))
1189 cg_init[strlen(cg_init)-7] = 0;
1190 else if (streq(cg_init, "/"))
1193 if (startswith(cg_process, cg_init))
1194 p = cg_process + strlen(cg_init);
1213 cg_process[p-cg_process] = 0;
1221 static int instance_unit_from_cgroup(char *cgroup){
1226 at = strstr(cgroup, "@.");
1228 /* This is a templated service */
1231 char _cleanup_free_ *i2 = NULL, *s = NULL;
1233 i = strchr(at, '/');
1234 if (!i || !i[1]) /* disallow empty instances */
1237 s = strndup(at + 1, i - at - 1);
1249 /* non-static only for testing purposes */
1250 int cgroup_to_unit(char *cgroup, char **unit){
1257 r = instance_unit_from_cgroup(cgroup);
1261 p = strrchr(cgroup, '/');
1264 r = unit_name_is_valid(p + 1, true);
1268 *unit = strdup(p + 1);
1275 static int cg_pid_get(const char *prefix, pid_t pid, char **unit) {
1277 char _cleanup_free_ *cgroup = NULL;
1282 r = cg_pid_get_cgroup(pid, NULL, &cgroup);
1286 if (!startswith(cgroup, prefix))
1289 r = cgroup_to_unit(cgroup, unit);
1293 int cg_pid_get_unit(pid_t pid, char **unit) {
1294 return cg_pid_get("/system/", pid, unit);
1297 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1298 return cg_pid_get("/user/", pid, unit);
1301 int cg_controller_from_attr(const char *attr, char **controller) {
1308 if (!filename_is_safe(attr))
1311 dot = strchr(attr, '.');
1317 c = strndup(attr, dot - attr);
1321 if (!filename_is_safe(c)) {