+#include "path-util.h"
+#include "process-util.h"
+//#include "special.h"
+
+#include "cgroup.h"
+
+#define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
+
+// UNNEEDED by elogind
+#if 0
+void cgroup_context_init(CGroupContext *c) {
+ assert(c);
+
+ /* Initialize everything to the kernel defaults, assuming the
+ * structure is preinitialized to 0 */
+
+ c->cpu_shares = CGROUP_CPU_SHARES_INVALID;
+ c->startup_cpu_shares = CGROUP_CPU_SHARES_INVALID;
+ c->cpu_quota_per_sec_usec = USEC_INFINITY;
+
+ c->memory_limit = (uint64_t) -1;
+
+ c->blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
+ c->startup_blockio_weight = CGROUP_BLKIO_WEIGHT_INVALID;
+
+ c->tasks_max = (uint64_t) -1;
+
+ c->netclass_type = CGROUP_NETCLASS_TYPE_NONE;
+}
+
+void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
+ assert(c);
+ assert(a);
+
+ LIST_REMOVE(device_allow, c->device_allow, a);
+ free(a->path);
+ free(a);
+}
+
+void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
+ assert(c);
+ assert(w);
+
+ LIST_REMOVE(device_weights, c->blockio_device_weights, w);
+ free(w->path);
+ free(w);
+}
+
+void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
+ assert(c);
+ assert(b);
+
+ LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
+ free(b->path);
+ free(b);
+}
+
+void cgroup_context_done(CGroupContext *c) {
+ assert(c);
+
+ while (c->blockio_device_weights)
+ cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
+
+ while (c->blockio_device_bandwidths)
+ cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
+
+ while (c->device_allow)
+ cgroup_context_free_device_allow(c, c->device_allow);
+}
+
+void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
+ CGroupBlockIODeviceBandwidth *b;
+ CGroupBlockIODeviceWeight *w;
+ CGroupDeviceAllow *a;
+ char u[FORMAT_TIMESPAN_MAX];
+
+ assert(c);
+ assert(f);
+
+ prefix = strempty(prefix);
+
+ fprintf(f,
+ "%sCPUAccounting=%s\n"
+ "%sBlockIOAccounting=%s\n"
+ "%sMemoryAccounting=%s\n"
+ "%sTasksAccounting=%s\n"
+ "%sCPUShares=%" PRIu64 "\n"
+ "%sStartupCPUShares=%" PRIu64 "\n"
+ "%sCPUQuotaPerSecSec=%s\n"
+ "%sBlockIOWeight=%" PRIu64 "\n"
+ "%sStartupBlockIOWeight=%" PRIu64 "\n"
+ "%sMemoryLimit=%" PRIu64 "\n"
+ "%sTasksMax=%" PRIu64 "\n"
+ "%sDevicePolicy=%s\n"
+ "%sDelegate=%s\n",
+ prefix, yes_no(c->cpu_accounting),
+ prefix, yes_no(c->blockio_accounting),
+ prefix, yes_no(c->memory_accounting),
+ prefix, yes_no(c->tasks_accounting),
+ prefix, c->cpu_shares,
+ prefix, c->startup_cpu_shares,
+ prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
+ prefix, c->blockio_weight,
+ prefix, c->startup_blockio_weight,
+ prefix, c->memory_limit,
+ prefix, c->tasks_max,
+ prefix, cgroup_device_policy_to_string(c->device_policy),
+ prefix, yes_no(c->delegate));
+
+ LIST_FOREACH(device_allow, a, c->device_allow)
+ fprintf(f,
+ "%sDeviceAllow=%s %s%s%s\n",
+ prefix,
+ a->path,
+ a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
+
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights)
+ fprintf(f,
+ "%sBlockIODeviceWeight=%s %" PRIu64,
+ prefix,
+ w->path,
+ w->weight);
+
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ char buf[FORMAT_BYTES_MAX];
+
+ fprintf(f,
+ "%s%s=%s %s\n",
+ prefix,
+ b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
+ b->path,
+ format_bytes(buf, sizeof(buf), b->bandwidth));
+ }
+}
+
+static int lookup_blkio_device(const char *p, dev_t *dev) {
+ struct stat st;
+ int r;
+
+ assert(p);
+ assert(dev);
+
+ r = stat(p, &st);
+ if (r < 0)
+ return log_warning_errno(errno, "Couldn't stat device %s: %m", p);
+
+ if (S_ISBLK(st.st_mode))
+ *dev = st.st_rdev;
+ else if (major(st.st_dev) != 0) {
+ /* If this is not a device node then find the block
+ * device this file is stored on */
+ *dev = st.st_dev;
+
+ /* If this is a partition, try to get the originating
+ * block device */
+ block_get_whole_disk(*dev, dev);
+ } else {
+ log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int whitelist_device(const char *path, const char *node, const char *acc) {
+ char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
+ struct stat st;
+ int r;
+
+ assert(path);
+ assert(acc);
+
+ if (stat(node, &st) < 0) {
+ log_warning("Couldn't stat device %s", node);
+ return -errno;
+ }
+
+ if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
+ log_warning("%s is not a device.", node);
+ return -ENODEV;
+ }
+
+ sprintf(buf,
+ "%c %u:%u %s",
+ S_ISCHR(st.st_mode) ? 'c' : 'b',
+ major(st.st_rdev), minor(st.st_rdev),
+ acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+
+ return r;
+}
+
+static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
+ _cleanup_fclose_ FILE *f = NULL;
+ char line[LINE_MAX];
+ bool good = false;
+ int r;
+
+ assert(path);
+ assert(acc);
+ assert(type == 'b' || type == 'c');
+
+ f = fopen("/proc/devices", "re");
+ if (!f)
+ return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
+
+ FOREACH_LINE(line, f, goto fail) {
+ char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
+ unsigned maj;
+
+ truncate_nl(line);
+
+ if (type == 'c' && streq(line, "Character devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (type == 'b' && streq(line, "Block devices:")) {
+ good = true;
+ continue;
+ }
+
+ if (isempty(line)) {
+ good = false;
+ continue;
+ }
+
+ if (!good)
+ continue;
+
+ p = strstrip(line);
+
+ w = strpbrk(p, WHITESPACE);
+ if (!w)
+ continue;
+ *w = 0;
+
+ r = safe_atou(p, &maj);
+ if (r < 0)
+ continue;
+ if (maj <= 0)
+ continue;
+
+ w++;
+ w += strspn(w, WHITESPACE);
+
+ if (fnmatch(name, w, 0) != 0)
+ continue;
+
+ sprintf(buf,
+ "%c %u:* %s",
+ type,
+ maj,
+ acc);
+
+ r = cg_set_attribute("devices", path, "devices.allow", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set devices.allow on %s: %m", path);
+ }
+
+ return 0;
+
+fail:
+ log_warning_errno(errno, "Failed to read /proc/devices: %m");
+ return -errno;
+}
+
+void cgroup_context_apply(CGroupContext *c, CGroupMask mask, const char *path, uint32_t netclass, ManagerState state) {
+ bool is_root;
+ int r;
+
+ assert(c);
+ assert(path);
+
+ if (mask == 0)
+ return;
+
+ /* Some cgroup attributes are not supported on the root cgroup,
+ * hence silently ignore */
+ is_root = isempty(path) || path_equal(path, "/");
+ if (is_root)
+ /* Make sure we don't try to display messages with an empty path. */
+ path = "/";
+
+ /* We generally ignore errors caused by read-only mounted
+ * cgroup trees (assuming we are running in a container then),
+ * and missing cgroups, i.e. EROFS and ENOENT. */
+
+ if ((mask & CGROUP_MASK_CPU) && !is_root) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t), DECIMAL_STR_MAX(usec_t)) + 1];
+
+ sprintf(buf, "%" PRIu64 "\n",
+ IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->startup_cpu_shares :
+ c->cpu_shares != CGROUP_CPU_SHARES_INVALID ? c->cpu_shares : CGROUP_CPU_SHARES_DEFAULT);
+ r = cg_set_attribute("cpu", path, "cpu.shares", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.shares on %s: %m", path);
+
+ sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
+ r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_period_us on %s: %m", path);
+
+ if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
+ sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
+ r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
+ } else
+ r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set cpu.cfs_quota_us on %s: %m", path);
+ }
+
+ if (mask & CGROUP_MASK_BLKIO) {
+ char buf[MAX(DECIMAL_STR_MAX(uint64_t)+1,
+ DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
+ CGroupBlockIODeviceWeight *w;
+ CGroupBlockIODeviceBandwidth *b;
+
+ if (!is_root) {
+ sprintf(buf, "%" PRIu64 "\n",
+ IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->startup_blockio_weight :
+ c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ? c->blockio_weight : CGROUP_BLKIO_WEIGHT_DEFAULT);
+ r = cg_set_attribute("blkio", path, "blkio.weight", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.weight on %s: %m", path);
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+ dev_t dev;
+
+ r = lookup_blkio_device(w->path, &dev);
+ if (r < 0)
+ continue;
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), w->weight);
+ r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set blkio.weight_device on %s: %m", path);
+ }
+ }
+
+ /* FIXME: no way to reset this list */
+ LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
+ const char *a;
+ dev_t dev;
+
+ r = lookup_blkio_device(b->path, &dev);
+ if (r < 0)
+ continue;
+
+ a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
+
+ sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
+ r = cg_set_attribute("blkio", path, a, buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set %s on %s: %m", a, path);
+ }
+ }
+
+ if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
+ if (c->memory_limit != (uint64_t) -1) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+
+ sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
+
+ if (cg_unified() <= 0)
+ r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
+ else
+ r = cg_set_attribute("memory", path, "memory.max", buf);
+
+ } else {
+ if (cg_unified() <= 0)
+ r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
+ else
+ r = cg_set_attribute("memory", path, "memory.max", "max");
+ }
+
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set memory.limit_in_bytes/memory.max on %s: %m", path);
+ }
+
+ if ((mask & CGROUP_MASK_DEVICES) && !is_root) {
+ CGroupDeviceAllow *a;
+
+ /* Changing the devices list of a populated cgroup
+ * might result in EINVAL, hence ignore EINVAL
+ * here. */
+
+ if (c->device_allow || c->device_policy != CGROUP_AUTO)
+ r = cg_set_attribute("devices", path, "devices.deny", "a");
+ else
+ r = cg_set_attribute("devices", path, "devices.allow", "a");
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to reset devices.list on %s: %m", path);
+
+ if (c->device_policy == CGROUP_CLOSED ||
+ (c->device_policy == CGROUP_AUTO && c->device_allow)) {
+ static const char auto_devices[] =
+ "/dev/null\0" "rwm\0"
+ "/dev/zero\0" "rwm\0"
+ "/dev/full\0" "rwm\0"
+ "/dev/random\0" "rwm\0"
+ "/dev/urandom\0" "rwm\0"
+ "/dev/tty\0" "rwm\0"
+ "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
+
+ const char *x, *y;
+
+ NULSTR_FOREACH_PAIR(x, y, auto_devices)
+ whitelist_device(path, x, y);
+
+ whitelist_major(path, "pts", 'c', "rw");
+ whitelist_major(path, "kdbus", 'c', "rw");
+ whitelist_major(path, "kdbus/*", 'c', "rw");
+ }
+
+ LIST_FOREACH(device_allow, a, c->device_allow) {
+ char acc[4];
+ unsigned k = 0;
+
+ if (a->r)
+ acc[k++] = 'r';
+ if (a->w)
+ acc[k++] = 'w';
+ if (a->m)
+ acc[k++] = 'm';
+
+ if (k == 0)
+ continue;
+
+ acc[k++] = 0;
+
+ if (startswith(a->path, "/dev/"))
+ whitelist_device(path, a->path, acc);
+ else if (startswith(a->path, "block-"))
+ whitelist_major(path, a->path + 6, 'b', acc);
+ else if (startswith(a->path, "char-"))
+ whitelist_major(path, a->path + 5, 'c', acc);
+ else
+ log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
+ }
+ }
+
+ if ((mask & CGROUP_MASK_PIDS) && !is_root) {
+
+ if (c->tasks_max != (uint64_t) -1) {
+ char buf[DECIMAL_STR_MAX(uint64_t) + 2];
+
+ sprintf(buf, "%" PRIu64 "\n", c->tasks_max);
+ r = cg_set_attribute("pids", path, "pids.max", buf);
+ } else
+ r = cg_set_attribute("pids", path, "pids.max", "max");
+
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set pids.max on %s: %m", path);
+ }
+
+ if (mask & CGROUP_MASK_NET_CLS) {
+ char buf[DECIMAL_STR_MAX(uint32_t)];
+
+ sprintf(buf, "%" PRIu32, netclass);
+
+ r = cg_set_attribute("net_cls", path, "net_cls.classid", buf);
+ if (r < 0)
+ log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EACCES) ? LOG_DEBUG : LOG_WARNING, r,
+ "Failed to set net_cls.classid on %s: %m", path);
+ }
+}
+
+CGroupMask cgroup_context_get_mask(CGroupContext *c) {
+ CGroupMask mask = 0;
+
+ /* Figure out which controllers we need */
+
+ if (c->cpu_accounting ||
+ c->cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->startup_cpu_shares != CGROUP_CPU_SHARES_INVALID ||
+ c->cpu_quota_per_sec_usec != USEC_INFINITY)
+ mask |= CGROUP_MASK_CPUACCT | CGROUP_MASK_CPU;
+
+ if (c->blockio_accounting ||
+ c->blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->startup_blockio_weight != CGROUP_BLKIO_WEIGHT_INVALID ||
+ c->blockio_device_weights ||
+ c->blockio_device_bandwidths)
+ mask |= CGROUP_MASK_BLKIO;
+
+ if (c->memory_accounting ||
+ c->memory_limit != (uint64_t) -1)
+ mask |= CGROUP_MASK_MEMORY;