-/*-*- Mode: C; c-basic-offset: 8 -*-*/
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
#include <string.h>
#include <stdlib.h>
#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <ftw.h>
#include "cgroup-util.h"
#include "log.h"
return 0;
}
-int cg_rmdir(const char *controller, const char *path) {
+int cg_rmdir(const char *controller, const char *path, bool honour_sticky) {
char *p;
int r;
- if ((r = cg_get_path(controller, path, NULL, &p)) < 0)
+ r = cg_get_path(controller, path, NULL, &p);
+ if (r < 0)
return r;
+ if (honour_sticky) {
+ char *tasks;
+
+ /* If the sticky bit is set don't remove the directory */
+
+ tasks = strappend(p, "/tasks");
+ if (!tasks) {
+ free(p);
+ return -ENOMEM;
+ }
+
+ r = file_is_priv_sticky(tasks);
+ free(tasks);
+
+ if (r > 0) {
+ free(p);
+ return 0;
+ }
+ }
+
r = rmdir(p);
free(p);
- return r < 0 ? -errno : 0;
+ return (r < 0 && errno != ENOENT) ? -errno : 0;
}
-int cg_kill(const char *controller, const char *path, int sig, bool ignore_self) {
+int cg_kill(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, Set *s) {
bool done = false;
- Set *s;
int r, ret = 0;
pid_t my_pid;
FILE *f = NULL;
+ Set *allocated_set = NULL;
assert(controller);
assert(path);
* is repeated until no further processes are added to the
* tasks list, to properly handle forking processes */
- if (!(s = set_new(trivial_hash_func, trivial_compare_func)))
- return -ENOMEM;
+ if (!s)
+ if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
+ return -ENOMEM;
my_pid = getpid();
do {
- pid_t pid;
+ pid_t pid = 0;
done = true;
if ((r = cg_enumerate_processes(controller, path, &f)) < 0) {
- if (ret >= 0)
+ if (ret >= 0 && r != -ENOENT)
ret = r;
goto finish;
/* If we haven't killed this process yet, kill
* it */
- if (kill(pid, sig) < 0 && errno != ESRCH) {
- if (ret >= 0)
+ if (kill(pid, sig) < 0) {
+ if (ret >= 0 && errno != ESRCH)
ret = -errno;
- } else if (ret == 0)
+ } else if (ret == 0) {
+
+ if (sigcont)
+ kill(pid, SIGCONT);
+
ret = 1;
+ }
done = false;
} while (!done);
finish:
- set_free(s);
+ if (allocated_set)
+ set_free(allocated_set);
if (f)
fclose(f);
return ret;
}
-int cg_kill_recursive(const char *controller, const char *path, int sig, bool ignore_self, bool rem) {
+int cg_kill_recursive(const char *controller, const char *path, int sig, bool sigcont, bool ignore_self, bool rem, Set *s) {
int r, ret = 0;
DIR *d = NULL;
char *fn;
+ Set *allocated_set = NULL;
assert(path);
assert(controller);
assert(sig >= 0);
- ret = cg_kill(controller, path, sig, ignore_self);
+ if (!s)
+ if (!(s = allocated_set = set_new(trivial_hash_func, trivial_compare_func)))
+ return -ENOMEM;
+
+ ret = cg_kill(controller, path, sig, sigcont, ignore_self, s);
if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0) {
- if (ret >= 0)
+ if (ret >= 0 && r != -ENOENT)
ret = r;
goto finish;
goto finish;
}
- r = cg_kill_recursive(controller, p, sig, ignore_self, rem);
+ r = cg_kill_recursive(controller, p, sig, sigcont, ignore_self, rem, s);
free(p);
if (r != 0 && ret >= 0)
ret = r;
if (rem)
- if ((r = cg_rmdir(controller, path)) < 0) {
- if (ret >= 0)
+ if ((r = cg_rmdir(controller, path, true)) < 0) {
+ if (ret >= 0 &&
+ r != -ENOENT &&
+ r != -EBUSY)
ret = r;
}
if (d)
closedir(d);
+ if (allocated_set)
+ set_free(allocated_set);
+
return ret;
}
assert(controller);
/* This safely kills all processes; first it sends a SIGTERM,
- * then checks 8 times after 50ms whether the group is
- * now empty, and finally kills everything that is left with
- * SIGKILL */
+ * then checks 8 times after 200ms whether the group is now
+ * empty, then kills everything that is left with SIGKILL and
+ * finally checks 5 times after 200ms each whether the group
+ * is finally empty. */
- for (i = 0; i < 10; i++) {
+ for (i = 0; i < 15; i++) {
int sig, r;
if (i <= 0)
sig = SIGTERM;
- else if (i >= 9)
+ else if (i == 9)
sig = SIGKILL;
else
sig = 0;
- if ((r = cg_kill_recursive(controller, path, sig, true, rem)) <= 0)
+ if ((r = cg_kill_recursive(controller, path, sig, true, true, rem, NULL)) <= 0)
return r;
- usleep(50 * USEC_PER_MSEC);
+ usleep(200 * USEC_PER_MSEC);
}
return 0;
my_pid = getpid();
do {
- pid_t pid;
+ pid_t pid = 0;
done = true;
if ((r = cg_enumerate_tasks(controller, from, &f)) < 0) {
- if (ret >= 0)
+ if (ret >= 0 && r != -ENOENT)
ret = r;
goto finish;
continue;
if ((r = cg_attach(controller, to, pid)) < 0) {
- if (ret >= 0)
+ if (ret >= 0 && r != -ESRCH)
ret = r;
} else if (ret == 0)
ret = 1;
ret = cg_migrate(controller, from, to, ignore_self);
if ((r = cg_enumerate_subgroups(controller, from, &d)) < 0) {
- if (ret >= 0)
+ if (ret >= 0 && r != -ENOENT)
ret = r;
goto finish;
}
ret = r;
if (rem)
- if ((r = cg_rmdir(controller, from)) < 0) {
- if (ret >= 0)
+ if ((r = cg_rmdir(controller, from, true)) < 0) {
+ if (ret >= 0 &&
+ r != -ENOENT &&
+ r != -EBUSY)
ret = r;
}
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
const char *p;
- char *mp;
- int r;
+ char *t;
+ static __thread bool good = false;
assert(controller);
assert(fs);
+ if (_unlikely_(!good)) {
+ int r;
+
+ r = path_is_mount_point("/sys/fs/cgroup", false);
+ if (r <= 0)
+ return r < 0 ? r : -ENOENT;
+
+ /* Cache this to save a few stat()s */
+ good = true;
+ }
+
+ if (isempty(controller))
+ return -EINVAL;
+
/* This is a very minimal lookup from controller names to
* paths. Since we have mounted most hierarchies ourselves
* should be kinda safe, but eventually we might want to
else
p = controller;
- if (asprintf(&mp, "/cgroup/%s", p) < 0)
- return -ENOMEM;
-
- if ((r = path_is_mount_point(mp)) <= 0) {
- free(mp);
- return r < 0 ? r : -ENOENT;
- }
-
if (path && suffix)
- r = asprintf(fs, "%s/%s/%s", mp, path, suffix);
+ t = join("/sys/fs/cgroup/", p, "/", path, "/", suffix, NULL);
else if (path)
- r = asprintf(fs, "%s/%s", mp, path);
+ t = join("/sys/fs/cgroup/", p, "/", path, NULL);
else if (suffix)
- r = asprintf(fs, "%s/%s", mp, suffix);
- else {
- path_kill_slashes(mp);
- *fs = mp;
+ t = join("/sys/fs/cgroup/", p, "/", suffix, NULL);
+ else
+ t = join("/sys/fs/cgroup/", p, NULL);
+
+ if (!t)
+ return -ENOMEM;
+
+ path_kill_slashes(t);
+
+ *fs = t;
+ return 0;
+}
+
+static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
+ char *p;
+ bool is_sticky;
+
+ if (typeflag != FTW_DP)
return 0;
+
+ if (ftwbuf->level < 1)
+ return 0;
+
+ p = strappend(path, "/tasks");
+ if (!p) {
+ errno = ENOMEM;
+ return 1;
}
- free(mp);
- path_kill_slashes(*fs);
- return r < 0 ? -ENOMEM : 0;
+ is_sticky = file_is_priv_sticky(p) > 0;
+ free(p);
+
+ if (is_sticky)
+ return 0;
+
+ rmdir(path);
+ return 0;
}
int cg_trim(const char *controller, const char *path, bool delete_root) {
char *fs;
- int r;
+ int r = 0;
assert(controller);
assert(path);
- if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
return r;
- r = rm_rf(fs, true, delete_root);
+ errno = 0;
+ if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) < 0)
+ r = errno ? -errno : -EIO;
+
+ if (delete_root) {
+ bool is_sticky;
+ char *p;
+
+ p = strappend(fs, "/tasks");
+ if (!p) {
+ free(fs);
+ return -ENOMEM;
+ }
+
+ is_sticky = file_is_priv_sticky(p) > 0;
+ free(p);
+
+ if (!is_sticky)
+ if (rmdir(fs) < 0 && errno != ENOENT) {
+ if (r == 0)
+ r = -errno;
+ }
+ }
+
free(fs);
return r;
r = cg_migrate_recursive(controller, path, parent, false, true);
free(parent);
- return r;
+ return r == -ENOENT ? 0 : r;
}
int cg_create(const char *controller, const char *path) {
if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
return r;
- r = mkdir_p(fs, 0755);
+ r = mkdir_parents(fs, 0755);
+
+ if (r >= 0) {
+ if (mkdir(fs, 0755) >= 0)
+ r = 1;
+ else if (errno == EEXIST)
+ r = 0;
+ else
+ r = -errno;
+ }
+
free(fs);
return r;
}
int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
- int r;
+ int r, q;
assert(controller);
assert(path);
if ((r = cg_create(controller, path)) < 0)
return r;
- if ((r = cg_attach(controller, path, pid)) < 0)
- return r;
+ if ((q = cg_attach(controller, path, pid)) < 0)
+ return q;
/* This does not remove the cgroup on failure */
assert(controller);
assert(path);
- if ((r = cg_get_path(controller, path, NULL, &fs)) < 0)
+ if (mode != (mode_t) -1)
+ mode &= 0777;
+
+ r = cg_get_path(controller, path, NULL, &fs);
+ if (r < 0)
return r;
r = chmod_and_chown(fs, mode, uid, gid);
return r;
}
-int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid) {
+int cg_set_task_access(const char *controller, const char *path, mode_t mode, uid_t uid, gid_t gid, int sticky) {
char *fs;
int r;
assert(controller);
assert(path);
- if ((r = cg_get_path(controller, path, "tasks", &fs)) < 0)
+ if (mode == (mode_t) -1 && uid == (uid_t) -1 && gid == (gid_t) -1 && sticky < 0)
+ return 0;
+
+ if (mode != (mode_t) -1)
+ mode &= 0666;
+
+ r = cg_get_path(controller, path, "tasks", &fs);
+ if (r < 0)
return r;
+ if (sticky >= 0 && mode != (mode_t) -1)
+ /* Both mode and sticky param are passed */
+ mode |= (sticky ? S_ISVTX : 0);
+ else if ((sticky >= 0 && mode == (mode_t) -1) ||
+ (mode != (mode_t) -1 && sticky < 0)) {
+ struct stat st;
+
+ /* Only one param is passed, hence read the current
+ * mode from the file itself */
+
+ r = lstat(fs, &st);
+ if (r < 0) {
+ free(fs);
+ return -errno;
+ }
+
+ if (mode == (mode_t) -1)
+ /* No mode set, we just shall set the sticky bit */
+ mode = (st.st_mode & ~S_ISVTX) | (sticky ? S_ISVTX : 0);
+ else
+ /* Only mode set, leave sticky bit untouched */
+ mode = (st.st_mode & ~0777) | mode;
+ }
+
r = chmod_and_chown(fs, mode, uid, gid);
free(fs);
f = fopen(fs, "re");
free(fs);
+ if (!f)
+ return errno == ENOENT ? -ESRCH : -errno;
+
cs = strlen(controller);
while (!feof(f)) {
free(fs);
fs = NULL;
- if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0) {
- r = -ENOMEM;
+ if ((r = cg_get_path(controller, NULL, "notify_on_release", &fs)) < 0)
goto finish;
- }
free(contents);
contents = NULL;
}
int cg_is_empty(const char *controller, const char *path, bool ignore_self) {
- pid_t pid;
+ pid_t pid = 0;
int r;
- FILE *f;
+ FILE *f = NULL;
bool found = false;
assert(controller);
assert(path);
if ((r = cg_enumerate_tasks(controller, path, &f)) < 0)
- return r;
+ return r == -ENOENT ? 1 : r;
while ((r = cg_read_pid(f, &pid)) > 0) {
return r;
if ((r = cg_enumerate_subgroups(controller, path, &d)) < 0)
- return r;
+ return r == -ENOENT ? 1 : r;
while ((r = cg_read_subgroup(d, &fn)) > 0) {
char *p = NULL;
/* First check if it already is a filesystem path */
if (path_is_absolute(path) &&
- path_startswith(path, "/cgroup") &&
+ path_startswith(path, "/sys/fs/cgroup") &&
access(path, F_OK) >= 0) {
if (!(t = strdup(path)))
return r;
}
+
+int cg_get_user_path(char **path) {
+ char *root, *p;
+
+ assert(path);
+
+ /* Figure out the place to put user cgroups below. We use the
+ * same as PID 1 has but with the "/system" suffix replaced by
+ * "/user" */
+
+ if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root) < 0)
+ p = strdup("/user");
+ else {
+ if (endswith(root, "/system"))
+ root[strlen(root) - 7] = 0;
+ else if (streq(root, "/"))
+ root[0] = 0;
+
+ p = strappend(root, "/user");
+ free(root);
+ }
+
+ if (!p)
+ return -ENOMEM;
+
+ *path = p;
+ return 0;
+}