1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include "path-util.h"
27 #include "cgroup-util.h"
30 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
32 void cgroup_context_init(CGroupContext *c) {
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
38 c->cpu_shares = (unsigned long) -1;
39 c->startup_cpu_shares = (unsigned long) -1;
40 c->memory_limit = (uint64_t) -1;
41 c->blockio_weight = (unsigned long) -1;
42 c->startup_blockio_weight = (unsigned long) -1;
44 c->cpu_quota_per_sec_usec = USEC_INFINITY;
47 void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) {
51 LIST_REMOVE(device_allow, c->device_allow, a);
56 void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w) {
60 LIST_REMOVE(device_weights, c->blockio_device_weights, w);
65 void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b) {
69 LIST_REMOVE(device_bandwidths, c->blockio_device_bandwidths, b);
74 void cgroup_context_done(CGroupContext *c) {
77 while (c->blockio_device_weights)
78 cgroup_context_free_blockio_device_weight(c, c->blockio_device_weights);
80 while (c->blockio_device_bandwidths)
81 cgroup_context_free_blockio_device_bandwidth(c, c->blockio_device_bandwidths);
83 while (c->device_allow)
84 cgroup_context_free_device_allow(c, c->device_allow);
87 void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
88 CGroupBlockIODeviceBandwidth *b;
89 CGroupBlockIODeviceWeight *w;
91 char u[FORMAT_TIMESPAN_MAX];
96 prefix = strempty(prefix);
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
103 "%sStartupCPUShares=%lu\n"
104 "%sCPUQuotaPerSecSec=%s\n"
105 "%sBlockIOWeight=%lu\n"
106 "%sStartupBlockIOWeight=%lu\n"
107 "%sMemoryLimit=%" PRIu64 "\n"
108 "%sDevicePolicy=%s\n"
110 prefix, yes_no(c->cpu_accounting),
111 prefix, yes_no(c->blockio_accounting),
112 prefix, yes_no(c->memory_accounting),
113 prefix, c->cpu_shares,
114 prefix, c->startup_cpu_shares,
115 prefix, format_timespan(u, sizeof(u), c->cpu_quota_per_sec_usec, 1),
116 prefix, c->blockio_weight,
117 prefix, c->startup_blockio_weight,
118 prefix, c->memory_limit,
119 prefix, cgroup_device_policy_to_string(c->device_policy),
120 prefix, yes_no(c->delegate));
122 LIST_FOREACH(device_allow, a, c->device_allow)
124 "%sDeviceAllow=%s %s%s%s\n",
127 a->r ? "r" : "", a->w ? "w" : "", a->m ? "m" : "");
129 LIST_FOREACH(device_weights, w, c->blockio_device_weights)
131 "%sBlockIODeviceWeight=%s %lu",
136 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
137 char buf[FORMAT_BYTES_MAX];
142 b->read ? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
144 format_bytes(buf, sizeof(buf), b->bandwidth));
148 static int lookup_blkio_device(const char *p, dev_t *dev) {
157 return log_warning_errno(errno, "Couldn't stat device %s: %m", p);
159 if (S_ISBLK(st.st_mode))
161 else if (major(st.st_dev) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
166 /* If this is a partition, try to get the originating
168 block_get_whole_disk(*dev, dev);
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p);
177 static int whitelist_device(const char *path, const char *node, const char *acc) {
178 char buf[2+DECIMAL_STR_MAX(dev_t)*2+2+4];
185 if (stat(node, &st) < 0) {
186 log_warning("Couldn't stat device %s", node);
190 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
191 log_warning("%s is not a device.", node);
197 S_ISCHR(st.st_mode) ? 'c' : 'b',
198 major(st.st_rdev), minor(st.st_rdev),
201 r = cg_set_attribute("devices", path, "devices.allow", buf);
203 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
204 "Failed to set devices.allow on %s: %m", path);
209 static int whitelist_major(const char *path, const char *name, char type, const char *acc) {
210 _cleanup_fclose_ FILE *f = NULL;
217 assert(type == 'b' || type == 'c');
219 f = fopen("/proc/devices", "re");
221 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s (%c): %m", name, type);
223 FOREACH_LINE(line, f, goto fail) {
224 char buf[2+DECIMAL_STR_MAX(unsigned)+3+4], *p, *w;
229 if (type == 'c' && streq(line, "Character devices:")) {
234 if (type == 'b' && streq(line, "Block devices:")) {
249 w = strpbrk(p, WHITESPACE);
254 r = safe_atou(p, &maj);
261 w += strspn(w, WHITESPACE);
263 if (fnmatch(name, w, 0) != 0)
272 r = cg_set_attribute("devices", path, "devices.allow", buf);
274 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
275 "Failed to set devices.allow on %s: %m", path);
281 log_warning_errno(errno, "Failed to read /proc/devices: %m");
285 void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const char *path, ManagerState state) {
295 /* Some cgroup attributes are not support on the root cgroup,
296 * hence silently ignore */
297 is_root = isempty(path) || path_equal(path, "/");
299 /* Make sure we don't try to display messages with an empty path. */
302 /* We generally ignore errors caused by read-only mounted
303 * cgroup trees (assuming we are running in a container then),
304 * and missing cgroups, i.e. EROFS and ENOENT. */
306 if ((mask & CGROUP_CPU) && !is_root) {
307 char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1];
309 sprintf(buf, "%lu\n",
310 IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_cpu_shares != (unsigned long) -1 ? c->startup_cpu_shares :
311 c->cpu_shares != (unsigned long) -1 ? c->cpu_shares : 1024);
312 r = cg_set_attribute("cpu", path, "cpu.shares", buf);
314 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
315 "Failed to set cpu.shares on %s: %m", path);
317 sprintf(buf, USEC_FMT "\n", CGROUP_CPU_QUOTA_PERIOD_USEC);
318 r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf);
320 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
321 "Failed to set cpu.cfs_period_us on %s: %m", path);
323 if (c->cpu_quota_per_sec_usec != USEC_INFINITY) {
324 sprintf(buf, USEC_FMT "\n", c->cpu_quota_per_sec_usec * CGROUP_CPU_QUOTA_PERIOD_USEC / USEC_PER_SEC);
325 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf);
327 r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1");
329 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
330 "Failed to set cpu.cfs_quota_us on %s: %m", path);
333 if (mask & CGROUP_BLKIO) {
334 char buf[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
335 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
336 DECIMAL_STR_MAX(dev_t)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
337 CGroupBlockIODeviceWeight *w;
338 CGroupBlockIODeviceBandwidth *b;
341 sprintf(buf, "%lu\n", IN_SET(state, MANAGER_STARTING, MANAGER_INITIALIZING) && c->startup_blockio_weight != (unsigned long) -1 ? c->startup_blockio_weight :
342 c->blockio_weight != (unsigned long) -1 ? c->blockio_weight : 1000);
343 r = cg_set_attribute("blkio", path, "blkio.weight", buf);
345 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
346 "Failed to set blkio.weight on %s: %m", path);
348 /* FIXME: no way to reset this list */
349 LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
352 r = lookup_blkio_device(w->path, &dev);
356 sprintf(buf, "%u:%u %lu", major(dev), minor(dev), w->weight);
357 r = cg_set_attribute("blkio", path, "blkio.weight_device", buf);
359 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
360 "Failed to set blkio.weight_device on %s: %m", path);
364 /* FIXME: no way to reset this list */
365 LIST_FOREACH(device_bandwidths, b, c->blockio_device_bandwidths) {
369 r = lookup_blkio_device(b->path, &dev);
373 a = b->read ? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
375 sprintf(buf, "%u:%u %" PRIu64 "\n", major(dev), minor(dev), b->bandwidth);
376 r = cg_set_attribute("blkio", path, a, buf);
378 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
379 "Failed to set %s on %s: %m", a, path);
383 if ((mask & CGROUP_MEMORY) & !is_root) {
384 if (c->memory_limit != (uint64_t) -1) {
385 char buf[DECIMAL_STR_MAX(uint64_t) + 1];
387 sprintf(buf, "%" PRIu64 "\n", c->memory_limit);
388 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
390 r = cg_set_attribute("memory", path, "memory.limit_in_bytes", "-1");
393 log_full_errno(IN_SET(r, -ENOENT, -EROFS) ? LOG_DEBUG : LOG_WARNING, r,
394 "Failed to set memory.limit_in_bytes on %s: %m", path);
397 if ((mask & CGROUP_DEVICE) && !is_root) {
398 CGroupDeviceAllow *a;
400 /* Changing the devices list of a populated cgroup
401 * might result in EINVAL, hence ignore EINVAL
404 if (c->device_allow || c->device_policy != CGROUP_AUTO)
405 r = cg_set_attribute("devices", path, "devices.deny", "a");
407 r = cg_set_attribute("devices", path, "devices.allow", "a");
409 log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL) ? LOG_DEBUG : LOG_WARNING, r,
410 "Failed to reset devices.list on %s: %m", path);
412 if (c->device_policy == CGROUP_CLOSED ||
413 (c->device_policy == CGROUP_AUTO && c->device_allow)) {
414 static const char auto_devices[] =
415 "/dev/null\0" "rwm\0"
416 "/dev/zero\0" "rwm\0"
417 "/dev/full\0" "rwm\0"
418 "/dev/random\0" "rwm\0"
419 "/dev/urandom\0" "rwm\0"
421 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
425 NULSTR_FOREACH_PAIR(x, y, auto_devices)
426 whitelist_device(path, x, y);
428 whitelist_major(path, "pts", 'c', "rw");
429 whitelist_major(path, "kdbus", 'c', "rw");
430 whitelist_major(path, "kdbus/*", 'c', "rw");
433 LIST_FOREACH(device_allow, a, c->device_allow) {
449 if (startswith(a->path, "/dev/"))
450 whitelist_device(path, a->path, acc);
451 else if (startswith(a->path, "block-"))
452 whitelist_major(path, a->path + 6, 'b', acc);
453 else if (startswith(a->path, "char-"))
454 whitelist_major(path, a->path + 5, 'c', acc);
456 log_debug("Ignoring device %s while writing cgroup attribute.", a->path);
461 CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) {
462 CGroupControllerMask mask = 0;
464 /* Figure out which controllers we need */
466 if (c->cpu_accounting ||
467 c->cpu_shares != (unsigned long) -1 ||
468 c->startup_cpu_shares != (unsigned long) -1 ||
469 c->cpu_quota_per_sec_usec != USEC_INFINITY)
470 mask |= CGROUP_CPUACCT | CGROUP_CPU;
472 if (c->blockio_accounting ||
473 c->blockio_weight != (unsigned long) -1 ||
474 c->startup_blockio_weight != (unsigned long) -1 ||
475 c->blockio_device_weights ||
476 c->blockio_device_bandwidths)
477 mask |= CGROUP_BLKIO;
479 if (c->memory_accounting ||
480 c->memory_limit != (uint64_t) -1)
481 mask |= CGROUP_MEMORY;
483 if (c->device_allow ||
484 c->device_policy != CGROUP_AUTO)
485 mask |= CGROUP_DEVICE;
490 CGroupControllerMask unit_get_cgroup_mask(Unit *u) {
493 c = unit_get_cgroup_context(u);
497 /* If delegation is turned on, then turn on all cgroups,
498 * unless the process we fork into it is known to drop
499 * privileges anyway, and shouldn't get access to the
500 * controllers anyway. */
505 e = unit_get_exec_context(u);
506 if (!e || exec_context_maintains_privileges(e))
507 return _CGROUP_CONTROLLER_MASK_ALL;
510 return cgroup_context_get_mask(c);
513 CGroupControllerMask unit_get_members_mask(Unit *u) {
516 if (u->cgroup_members_mask_valid)
517 return u->cgroup_members_mask;
519 u->cgroup_members_mask = 0;
521 if (u->type == UNIT_SLICE) {
525 SET_FOREACH(member, u->dependencies[UNIT_BEFORE], i) {
530 if (UNIT_DEREF(member->slice) != u)
533 u->cgroup_members_mask |=
534 unit_get_cgroup_mask(member) |
535 unit_get_members_mask(member);
539 u->cgroup_members_mask_valid = true;
540 return u->cgroup_members_mask;
543 CGroupControllerMask unit_get_siblings_mask(Unit *u) {
546 if (UNIT_ISSET(u->slice))
547 return unit_get_members_mask(UNIT_DEREF(u->slice));
549 return unit_get_cgroup_mask(u) | unit_get_members_mask(u);
552 CGroupControllerMask unit_get_target_mask(Unit *u) {
553 CGroupControllerMask mask;
555 mask = unit_get_cgroup_mask(u) | unit_get_members_mask(u) | unit_get_siblings_mask(u);
556 mask &= u->manager->cgroup_supported;
561 /* Recurse from a unit up through its containing slices, propagating
562 * mask bits upward. A unit is also member of itself. */
563 void unit_update_cgroup_members_masks(Unit *u) {
564 CGroupControllerMask m;
569 /* Calculate subtree mask */
570 m = unit_get_cgroup_mask(u) | unit_get_members_mask(u);
572 /* See if anything changed from the previous invocation. If
573 * not, we're done. */
574 if (u->cgroup_subtree_mask_valid && m == u->cgroup_subtree_mask)
578 u->cgroup_subtree_mask_valid &&
579 ((m & ~u->cgroup_subtree_mask) != 0) &&
580 ((~m & u->cgroup_subtree_mask) == 0);
582 u->cgroup_subtree_mask = m;
583 u->cgroup_subtree_mask_valid = true;
585 if (UNIT_ISSET(u->slice)) {
586 Unit *s = UNIT_DEREF(u->slice);
589 /* There's more set now than before. We
590 * propagate the new mask to the parent's mask
591 * (not caring if it actually was valid or
594 s->cgroup_members_mask |= m;
597 /* There's less set now than before (or we
598 * don't know), we need to recalculate
599 * everything, so let's invalidate the
600 * parent's members mask */
602 s->cgroup_members_mask_valid = false;
604 /* And now make sure that this change also hits our
606 unit_update_cgroup_members_masks(s);
610 static const char *migrate_callback(CGroupControllerMask mask, void *userdata) {
617 if (u->cgroup_path &&
618 u->cgroup_realized &&
619 (u->cgroup_realized_mask & mask) == mask)
620 return u->cgroup_path;
622 u = UNIT_DEREF(u->slice);
628 static int unit_create_cgroups(Unit *u, CGroupControllerMask mask) {
634 c = unit_get_cgroup_context(u);
638 if (!u->cgroup_path) {
639 _cleanup_free_ char *path = NULL;
641 path = unit_default_cgroup_path(u);
645 r = hashmap_put(u->manager->cgroup_unit, path, u);
647 log_error(r == -EEXIST ? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path, strerror(-r));
651 u->cgroup_path = path;
656 /* First, create our own group */
657 r = cg_create_everywhere(u->manager->cgroup_supported, mask, u->cgroup_path);
659 return log_error_errno(r, "Failed to create cgroup %s: %m", u->cgroup_path);
661 /* Keep track that this is now realized */
662 u->cgroup_realized = true;
663 u->cgroup_realized_mask = mask;
665 if (u->type != UNIT_SLICE && !c->delegate) {
667 /* Then, possibly move things over, but not if
668 * subgroups may contain processes, which is the case
669 * for slice and delegation units. */
670 r = cg_migrate_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->cgroup_path, migrate_callback, u);
672 log_warning_errno(r, "Failed to migrate cgroup from to %s: %m", u->cgroup_path);
678 int unit_attach_pids_to_cgroup(Unit *u) {
682 r = unit_realize_cgroup(u);
686 r = cg_attach_many_everywhere(u->manager->cgroup_supported, u->cgroup_path, u->pids, migrate_callback, u);
693 static bool unit_has_mask_realized(Unit *u, CGroupControllerMask mask) {
696 return u->cgroup_realized && u->cgroup_realized_mask == mask;
699 /* Check if necessary controllers and attributes for a unit are in place.
702 * If not, create paths, move processes over, and set attributes.
704 * Returns 0 on success and < 0 on failure. */
705 static int unit_realize_cgroup_now(Unit *u, ManagerState state) {
706 CGroupControllerMask mask;
711 if (u->in_cgroup_queue) {
712 LIST_REMOVE(cgroup_queue, u->manager->cgroup_queue, u);
713 u->in_cgroup_queue = false;
716 mask = unit_get_target_mask(u);
718 if (unit_has_mask_realized(u, mask))
721 /* First, realize parents */
722 if (UNIT_ISSET(u->slice)) {
723 r = unit_realize_cgroup_now(UNIT_DEREF(u->slice), state);
728 /* And then do the real work */
729 r = unit_create_cgroups(u, mask);
733 /* Finally, apply the necessary attributes. */
734 cgroup_context_apply(unit_get_cgroup_context(u), mask, u->cgroup_path, state);
739 static void unit_add_to_cgroup_queue(Unit *u) {
741 if (u->in_cgroup_queue)
744 LIST_PREPEND(cgroup_queue, u->manager->cgroup_queue, u);
745 u->in_cgroup_queue = true;
748 unsigned manager_dispatch_cgroup_queue(Manager *m) {
754 state = manager_state(m);
756 while ((i = m->cgroup_queue)) {
757 assert(i->in_cgroup_queue);
759 r = unit_realize_cgroup_now(i, state);
761 log_warning_errno(r, "Failed to realize cgroups for queued unit %s: %m", i->id);
769 static void unit_queue_siblings(Unit *u) {
772 /* This adds the siblings of the specified unit and the
773 * siblings of all parent units to the cgroup queue. (But
774 * neither the specified unit itself nor the parents.) */
776 while ((slice = UNIT_DEREF(u->slice))) {
780 SET_FOREACH(m, slice->dependencies[UNIT_BEFORE], i) {
784 /* Skip units that have a dependency on the slice
785 * but aren't actually in it. */
786 if (UNIT_DEREF(m->slice) != slice)
789 /* No point in doing cgroup application for units
790 * without active processes. */
791 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m)))
794 /* If the unit doesn't need any new controllers
795 * and has current ones realized, it doesn't need
797 if (unit_has_mask_realized(m, unit_get_target_mask(m)))
800 unit_add_to_cgroup_queue(m);
807 int unit_realize_cgroup(Unit *u) {
812 c = unit_get_cgroup_context(u);
816 /* So, here's the deal: when realizing the cgroups for this
817 * unit, we need to first create all parents, but there's more
818 * actually: for the weight-based controllers we also need to
819 * make sure that all our siblings (i.e. units that are in the
820 * same slice as we are) have cgroups, too. Otherwise, things
821 * would become very uneven as each of their processes would
822 * get as much resources as all our group together. This call
823 * will synchronously create the parent cgroups, but will
824 * defer work on the siblings to the next event loop
827 /* Add all sibling slices to the cgroup queue. */
828 unit_queue_siblings(u);
830 /* And realize this one now (and apply the values) */
831 return unit_realize_cgroup_now(u, manager_state(u->manager));
834 void unit_destroy_cgroup_if_empty(Unit *u) {
842 r = cg_trim_everywhere(u->manager->cgroup_supported, u->cgroup_path, !unit_has_name(u, SPECIAL_ROOT_SLICE));
844 log_debug_errno(r, "Failed to destroy cgroup %s: %m", u->cgroup_path);
848 hashmap_remove(u->manager->cgroup_unit, u->cgroup_path);
850 free(u->cgroup_path);
851 u->cgroup_path = NULL;
852 u->cgroup_realized = false;
853 u->cgroup_realized_mask = 0;
856 pid_t unit_search_main_pid(Unit *u) {
857 _cleanup_fclose_ FILE *f = NULL;
858 pid_t pid = 0, npid, mypid;
865 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, &f) < 0)
869 while (cg_read_pid(f, &npid) > 0) {
875 /* Ignore processes that aren't our kids */
876 if (get_parent_of_pid(npid, &ppid) >= 0 && ppid != mypid)
880 /* Dang, there's more than one daemonized PID
881 in this group, so we don't know what process
882 is the main process. */
893 int manager_setup_cgroup(Manager *m) {
894 _cleanup_free_ char *path = NULL;
899 /* 1. Determine hierarchy */
900 free(m->cgroup_root);
901 m->cgroup_root = NULL;
903 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &m->cgroup_root);
905 return log_error_errno(r, "Cannot determine cgroup we are running in: %m");
907 /* LEGACY: Already in /system.slice? If so, let's cut this
908 * off. This is to support live upgrades from older systemd
909 * versions where PID 1 was moved there. */
910 if (m->running_as == SYSTEMD_SYSTEM) {
913 e = endswith(m->cgroup_root, "/" SPECIAL_SYSTEM_SLICE);
915 e = endswith(m->cgroup_root, "/system");
920 /* And make sure to store away the root value without trailing
921 * slash, even for the root dir, so that we can easily prepend
923 if (streq(m->cgroup_root, "/"))
924 m->cgroup_root[0] = 0;
927 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, NULL, &path);
929 return log_error_errno(r, "Cannot find cgroup mount point: %m");
931 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER ". File system hierarchy is at %s.", path);
934 /* 3. Install agent */
935 if (m->running_as == SYSTEMD_SYSTEM) {
936 r = cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER, SYSTEMD_CGROUP_AGENT_PATH);
938 log_warning_errno(r, "Failed to install release agent, ignoring: %m");
940 log_debug("Installed release agent.");
942 log_debug("Release agent already installed.");
945 /* 4. Make sure we are in the root cgroup */
946 r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, 0);
948 return log_error_errno(r, "Failed to create root cgroup hierarchy: %m");
950 /* 5. And pin it, so that it cannot be unmounted */
951 safe_close(m->pin_cgroupfs_fd);
953 m->pin_cgroupfs_fd = open(path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY|O_NONBLOCK);
954 if (m->pin_cgroupfs_fd < 0)
955 return log_error_errno(errno, "Failed to open pin file: %m");
957 /* 6. Always enable hierarchial support if it exists... */
958 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
961 /* 7. Figure out which controllers are supported */
962 m->cgroup_supported = cg_mask_supported();
967 void manager_shutdown_cgroup(Manager *m, bool delete) {
970 /* We can't really delete the group, since we are in it. But
972 if (delete && m->cgroup_root)
973 cg_trim(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, false);
975 m->pin_cgroupfs_fd = safe_close(m->pin_cgroupfs_fd);
977 free(m->cgroup_root);
978 m->cgroup_root = NULL;
981 Unit* manager_get_unit_by_cgroup(Manager *m, const char *cgroup) {
988 u = hashmap_get(m->cgroup_unit, cgroup);
1002 u = hashmap_get(m->cgroup_unit, p);
1008 Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
1009 _cleanup_free_ char *cgroup = NULL;
1017 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &cgroup);
1021 return manager_get_unit_by_cgroup(m, cgroup);
1024 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
1031 u = manager_get_unit_by_cgroup(m, cgroup);
1033 r = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, u->cgroup_path, true);
1035 if (UNIT_VTABLE(u)->notify_cgroup_empty)
1036 UNIT_VTABLE(u)->notify_cgroup_empty(u);
1038 unit_add_to_gc_queue(u);
1045 static const char* const cgroup_device_policy_table[_CGROUP_DEVICE_POLICY_MAX] = {
1046 [CGROUP_AUTO] = "auto",
1047 [CGROUP_CLOSED] = "closed",
1048 [CGROUP_STRICT] = "strict",
1051 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy, CGroupDevicePolicy);