1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/types.h>
32 #include "cgroup-util.h"
37 #include "path-util.h"
39 #include "unit-name.h"
42 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
50 r = cg_get_path(controller, path, "cgroup.procs", &fs);
64 int cg_enumerate_tasks(const char *controller, const char *path, FILE **_f) {
72 r = cg_get_path(controller, path, "tasks", &fs);
86 int cg_read_pid(FILE *f, pid_t *_pid) {
89 /* Note that the cgroup.procs might contain duplicates! See
90 * cgroups.txt for details. */
93 if (fscanf(f, "%lu", &ul) != 1) {
98 return errno ? -errno : -EIO;
108 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
116 /* This is not recursive! */
118 r = cg_get_path(controller, path, NULL, &fs);
132 int cg_read_subgroup(DIR *d, char **fn) {
138 while ((de = readdir(d))) {
141 if (de->d_type != DT_DIR)
144 if (streq(de->d_name, ".") ||
145 streq(de->d_name, ".."))
148 if (!(b = strdup(de->d_name)))
161 int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
165 r = cg_get_path(controller, path, NULL, &p);
172 /* If the sticky bit is set don't remove the directory */
174 tasks = strappend(p, "/tasks");
180 r = file_is_priv_sticky(tasks);
192 return (r < 0 && errno != ENOENT) ? -errno : 0;
195 int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
200 Set *allocated_set = NULL;
206 /* This goes through the tasks list and kills them all. This
207 * is repeated until no further processes are added to the
208 * tasks list, to properly handle forking processes */
211 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
220 if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
221 if (ret >= 0 && r != -ENOENT)
227 while ((r = cg_read_pid(f, &pid)) > 0) {
229 if (pid == my_pid && ignore_self)
232 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
235 /* If we haven't killed this process yet, kill
237 if (kill(pid, sig) < 0) {
238 if (ret >= 0 && errno != ESRCH)
240 } else if (ret == 0) {
250 if ((r = set_put(s, LONG_TO_PTR(pid))) < 0) {
268 /* To avoid racing against processes which fork
269 * quicker than we can kill them we repeat this until
270 * no new pids need to be killed. */
276 set_free(allocated_set);
284 int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
288 Set *allocated_set = NULL;
295 if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
298 ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
300 if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
301 if (ret >= 0 && r != -ENOENT)
307 while ((r = cg_read_subgroup(d, &fn)) > 0) {
310 r = asprintf(&p, "%s/%s", path, fn);
320 r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
323 if (r != 0 && ret >= 0)
327 if (r < 0 && ret >= 0)
331 if ((r = cg_rmdir(controller, path, true)) < 0) {
343 set_free(allocated_set);
348 int cg_kill_recursive_and_wait(const char *controller, const char *path, bool rem) {
354 /* This safely kills all processes; first it sends a SIGTERM,
355 * then checks 8 times after 200ms whether the group is now
356 * empty, then kills everything that is left with SIGKILL and
357 * finally checks 5 times after 200ms each whether the group
358 * is finally empty. */
360 for (i = 0; i < 15; i++) {
370 if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
373 usleep(200 * USEC_PER_MSEC);
379 int cg_migrate(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self) {
381 _cleanup_set_free_ Set *s = NULL;
384 _cleanup_fclose_ FILE *f = NULL;
391 s = set_new(trivial_hash_func, trivial_compare_func);
401 r = cg_enumerate_tasks(cfrom, pfrom, &f);
403 if (ret >= 0 && r != -ENOENT)
409 while ((r = cg_read_pid(f, &pid)) > 0) {
411 /* This might do weird stuff if we aren't a
412 * single-threaded program. However, we
413 * luckily know we are not */
414 if (pid == my_pid && ignore_self)
417 if (set_get(s, LONG_TO_PTR(pid)) == LONG_TO_PTR(pid))
420 r = cg_attach(cto, pto, pid);
422 if (ret >= 0 && r != -ESRCH)
429 r = set_put(s, LONG_TO_PTR(pid));
452 int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto, const char *pto, bool ignore_self, bool rem) {
454 _cleanup_closedir_ DIR *d = NULL;
462 ret = cg_migrate(cfrom, pfrom, cto, pto, ignore_self);
464 r = cg_enumerate_subgroups(cfrom, pfrom, &d);
466 if (ret >= 0 && r != -ENOENT)
471 while ((r = cg_read_subgroup(d, &fn)) > 0) {
472 _cleanup_free_ char *p = NULL;
474 p = strjoin(pfrom, "/", fn, NULL);
483 r = cg_migrate_recursive(cfrom, p, cto, pto, ignore_self, rem);
484 if (r != 0 && ret >= 0)
488 if (r < 0 && ret >= 0)
492 r = cg_rmdir(cfrom, pfrom, true);
493 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
500 static const char *normalize_controller(const char *controller) {
502 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
504 else if (startswith(controller, "name="))
505 return controller + 5;
510 static int join_path(const char *controller, const char *path, const char *suffix, char **fs) {
513 if (!(controller || path))
518 t = strjoin("/sys/fs/cgroup/", controller, "/", path, "/", suffix, NULL);
520 t = strjoin("/sys/fs/cgroup/", controller, "/", path, NULL);
522 t = strjoin("/sys/fs/cgroup/", controller, "/", suffix, NULL);
524 t = strjoin("/sys/fs/cgroup/", controller, NULL);
527 t = strjoin(path, "/", suffix, NULL);
535 path_kill_slashes(t);
541 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
543 static __thread bool good = false;
547 if (_unlikely_(!good)) {
550 r = path_is_mount_point("/sys/fs/cgroup", false);
552 return r < 0 ? r : -ENOENT;
554 /* Cache this to save a few stat()s */
558 p = controller ? normalize_controller(controller) : NULL;
559 return join_path(p, path, suffix, fs);
562 static int check(const char *p) {
567 /* Check if this controller actually really exists */
568 cc = alloca(sizeof("/sys/fs/cgroup/") + strlen(p));
569 strcpy(stpcpy(cc, "/sys/fs/cgroup/"), p);
570 if (access(cc, F_OK) < 0)
576 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
583 if (isempty(controller))
586 /* Normalize the controller syntax */
587 p = normalize_controller(controller);
589 /* Check if this controller actually really exists */
594 return join_path(p, path, suffix, fs);
597 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
601 if (typeflag != FTW_DP)
604 if (ftwbuf->level < 1)
607 p = strappend(path, "/tasks");
613 is_sticky = file_is_priv_sticky(p) > 0;
623 int cg_trim(const char *controller, const char *path, bool delete_root) {
630 r = cg_get_path(controller, path, NULL, &fs);
635 if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
636 r = errno ? -errno : -EIO;
642 p = strappend(fs, "/tasks");
648 is_sticky = file_is_priv_sticky(p) > 0;
652 if (rmdir(fs) < 0 && errno != ENOENT) {
663 int cg_delete(const char *controller, const char *path) {
670 if ((r = path_get_parent(path, &parent)) < 0)
673 r = cg_migrate_recursive(controller, path, controller, parent, false, true);
676 return r == -ENOENT ? 0 : r;
679 int cg_attach(const char *controller, const char *path, pid_t pid) {
680 _cleanup_free_ char *fs = NULL;
681 char c[DECIMAL_STR_MAX(pid_t) + 2];
688 r = cg_get_path_and_check(controller, path, "tasks", &fs);
695 snprintf(c, sizeof(c), "%lu\n", (unsigned long) pid);
697 return write_string_file(fs, c);
700 int cg_set_group_access(
701 const char *controller,
707 _cleanup_free_ char *fs = NULL;
713 if (mode != (mode_t) -1)
716 r = cg_get_path(controller, path, NULL, &fs);
720 return chmod_and_chown(fs, mode, uid, gid);
723 int cg_set_task_access(
724 const char *controller,
731 _cleanup_free_ char *fs = NULL, *procs = NULL;
737 if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
740 if (mode != (mode_t) -1)
743 r = cg_get_path(controller, path, "tasks", &fs);
747 if (sticky >= 0 && mode != (mode_t) -1)
748 /* Both mode and sticky param are passed */
749 mode |= (sticky ? S_ISVTX : 0);
750 else if ((sticky >= 0 && mode == (mode_t) -1) ||
751 (mode != (mode_t) -1 && sticky < 0)) {
754 /* Only one param is passed, hence read the current
755 * mode from the file itself */
761 if (mode == (mode_t) -1)
762 /* No mode set, we just shall set the sticky bit */
763 mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
765 /* Only mode set, leave sticky bit untouched */
766 mode = (st.st_mode & ~0777) | mode;
769 r = chmod_and_chown(fs, mode, uid, gid);
773 /* Always keep values for "cgroup.procs" in sync with "tasks" */
774 r = cg_get_path(controller, path, "cgroup.procs", &procs);
778 return chmod_and_chown(procs, mode, uid, gid);
781 int cg_get_by_pid(const char *controller, pid_t pid, char **path) {
795 if (asprintf(&fs, "/proc/%lu/cgroup", (unsigned long) pid) < 0)
802 return errno == ENOENT ? -ESRCH : -errno;
804 cs = strlen(controller);
811 if (!(fgets(line, sizeof(line), f))) {
815 r = errno ? -errno : -EIO;
821 if (!(l = strchr(line, ':')))
825 if (!strneq(l, controller, cs))
831 if (!(p = strdup(l + cs + 1))) {
849 int cg_install_release_agent(const char *controller, const char *agent) {
850 char *fs = NULL, *contents = NULL, *line = NULL, *sc;
856 if ((r = cg_get_path(controller, NULL, "release_agent", &fs)) < 0)
859 if ((r = read_one_line_file(fs, &contents)) < 0)
862 sc = strstrip(contents);
865 if (asprintf(&line, "%s\n", agent) < 0) {
870 r = write_string_file(fs, line);
874 } else if (!streq(sc, agent)) {
881 if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
886 if ((r = read_one_line_file(fs, &contents)) < 0)
889 sc = strstrip(contents);
891 if (streq(sc, "0")) {
892 if ((r = write_string_file(fs, "1\n")) < 0)
896 } else if (!streq(sc, "1")) {
910 int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
911 pid_t pid = 0, self_pid;
918 r = cg_enumerate_tasks(controller, path, &f);
920 return r == -ENOENT ? 1 : r;
924 while ((r = cg_read_pid(f, &pid)) > 0) {
926 if (ignore_self && pid == self_pid)
941 int cg_is_empty_by_spec(const char *spec, bool ignore_self) {
943 _cleanup_free_ char *controller = NULL, *path = NULL;
947 r = cg_split_spec(spec, &controller, &path);
951 return cg_is_empty(controller, path, ignore_self);
954 int cg_is_empty_recursive(const char *controller, const char *path, bool ignore_self) {
961 r = cg_is_empty(controller, path, ignore_self);
965 r = cg_enumerate_subgroups(controller, path, &d);
967 return r == -ENOENT ? 1 : r;
969 while ((r = cg_read_subgroup(d, &fn)) > 0) {
972 r = asprintf(&p, "%s/%s", path, fn);
980 r = cg_is_empty_recursive(controller, p, ignore_self);
998 int cg_split_spec(const char *spec, char **controller, char **path) {
1000 char *t = NULL, *u = NULL;
1005 if (!path_is_safe(spec))
1022 e = strchr(spec, ':');
1024 if (!filename_is_safe(spec))
1041 t = strndup(spec, e-spec);
1044 if (!filename_is_safe(t)) {
1054 if (!path_is_safe(u)) {
1073 int cg_join_spec(const char *controller, const char *path, char **spec) {
1077 if (!path_is_absolute(path) ||
1078 controller[0] == 0 ||
1079 strchr(controller, ':') ||
1080 strchr(controller, '/'))
1083 if (asprintf(spec, "%s:%s", controller, path) < 0)
1089 int cg_fix_path(const char *path, char **result) {
1096 /* First check if it already is a filesystem path */
1097 if (path_startswith(path, "/sys/fs/cgroup") &&
1098 access(path, F_OK) >= 0) {
1108 /* Otherwise treat it as cg spec */
1109 r = cg_split_spec(path, &c, &p);
1113 r = cg_get_path(c ? c : SYSTEMD_CGROUP_CONTROLLER, p ? p : "/", NULL, result);
1120 int cg_get_user_path(char **path) {
1125 /* Figure out the place to put user cgroups below. We use the
1126 * same as PID 1 has but with the "/system" suffix replaced by
1129 if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
1130 p = strdup("/user");
1132 if (endswith(root, "/system"))
1133 root[strlen(root) - 7] = 0;
1134 else if (streq(root, "/"))
1137 p = strappend(root, "/user");
1148 char **cg_shorten_controllers(char **controllers) {
1151 controllers = strv_uniq(controllers);
1156 for (f = controllers, t = controllers; *f; f++) {
1160 if (streq(*f, "systemd") || streq(*f, SYSTEMD_CGROUP_CONTROLLER)) {
1165 p = normalize_controller(*f);
1169 log_debug("Controller %s is not available, removing from controllers list.", *f);
1181 int cg_pid_get_cgroup(pid_t pid, char **root, char **cgroup) {
1182 char *cg_process, *cg_init, *p;
1190 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &cg_process);
1194 r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &cg_init);
1200 if (endswith(cg_init, "/system"))
1201 cg_init[strlen(cg_init)-7] = 0;
1202 else if (streq(cg_init, "/"))
1205 if (startswith(cg_process, cg_init))
1206 p = cg_process + strlen(cg_init);
1225 cg_process[p-cg_process] = 0;
1233 static int instance_unit_from_cgroup(char *cgroup){
1238 at = strstr(cgroup, "@.");
1240 /* This is a templated service */
1243 char _cleanup_free_ *i2 = NULL, *s = NULL;
1245 i = strchr(at, '/');
1246 if (!i || !i[1]) /* disallow empty instances */
1249 s = strndup(at + 1, i - at - 1);
1261 /* non-static only for testing purposes */
1262 int cgroup_to_unit(char *cgroup, char **unit){
1269 r = instance_unit_from_cgroup(cgroup);
1273 p = strrchr(cgroup, '/');
1276 r = unit_name_is_valid(p + 1, true);
1280 *unit = strdup(p + 1);
1287 static int cg_pid_get(const char *prefix, pid_t pid, char **unit) {
1289 char _cleanup_free_ *cgroup = NULL;
1294 r = cg_pid_get_cgroup(pid, NULL, &cgroup);
1298 if (!startswith(cgroup, prefix))
1301 r = cgroup_to_unit(cgroup, unit);
1305 int cg_pid_get_unit(pid_t pid, char **unit) {
1306 return cg_pid_get("/system/", pid, unit);
1309 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1310 return cg_pid_get("/user/", pid, unit);
1313 int cg_controller_from_attr(const char *attr, char **controller) {
1320 if (!filename_is_safe(attr))
1323 dot = strchr(attr, '.');
1329 c = strndup(attr, dot - attr);
1333 if (!filename_is_safe(c)) {