chiark / gitweb /
core: introduce MemorySwapMax=
[elogind.git] / src / core / cgroup.c
index bf8bbf5801609d1760af9580c515fde3e4f8d042..bc454551b7eaad32331edd478e0bd33631191a7f 100644 (file)
 //#include "special.h"
 #include "string-table.h"
 #include "string-util.h"
+#include "stdio-util.h"
 
 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
 
 #if 0 /// UNNEEDED by elogind
+static void cgroup_compat_warn(void) {
+        static bool cgroup_compat_warned = false;
+
+        if (cgroup_compat_warned)
+                return;
+
+        log_warning("cgroup compatibility translation between legacy and unified hierarchy settings activated. See cgroup-compat debug messages for details.");
+        cgroup_compat_warned = true;
+}
+
+#define log_cgroup_compat(unit, fmt, ...) do {                                  \
+                cgroup_compat_warn();                                           \
+                log_unit_debug(unit, "cgroup-compat: " fmt, ##__VA_ARGS__);     \
+        } while (false)
+
 void cgroup_context_init(CGroupContext *c) {
         assert(c);
 
@@ -48,6 +64,7 @@ void cgroup_context_init(CGroupContext *c) {
 
         c->memory_high = CGROUP_LIMIT_MAX;
         c->memory_max = CGROUP_LIMIT_MAX;
+       c->memory_swap_max = CGROUP_LIMIT_MAX;
 
         c->memory_limit = CGROUP_LIMIT_MAX;
 
@@ -153,6 +170,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 "%sMemoryLow=%" PRIu64 "\n"
                 "%sMemoryHigh=%" PRIu64 "\n"
                 "%sMemoryMax=%" PRIu64 "\n"
+                "%sMemorySwapMax=%" PRIu64 "\n"
                 "%sMemoryLimit=%" PRIu64 "\n"
                 "%sTasksMax=%" PRIu64 "\n"
                 "%sDevicePolicy=%s\n"
@@ -172,6 +190,7 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) {
                 prefix, c->memory_low,
                 prefix, c->memory_high,
                 prefix, c->memory_max,
+                prefix, c->memory_swap_max,
                 prefix, c->memory_limit,
                 prefix, c->tasks_max,
                 prefix, cgroup_device_policy_to_string(c->device_policy),
@@ -506,7 +525,7 @@ static unsigned cgroup_apply_blkio_device_limit(Unit *u, const char *dev_path, u
 }
 
 static bool cgroup_context_has_unified_memory_config(CGroupContext *c) {
-        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX;
+        return c->memory_low > 0 || c->memory_high != CGROUP_LIMIT_MAX || c->memory_max != CGROUP_LIMIT_MAX || c->memory_swap_max != CGROUP_LIMIT_MAX;
 }
 
 static void cgroup_apply_unified_memory_limit(Unit *u, const char *file, uint64_t v) {
@@ -587,9 +606,14 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
                         if (has_io)
                                 weight = cgroup_context_io_weight(c, state);
-                        else if (has_blockio)
-                                weight = cgroup_weight_blkio_to_io(cgroup_context_blkio_weight(c, state));
-                        else
+                        else if (has_blockio) {
+                                uint64_t blkio_weight = cgroup_context_blkio_weight(c, state);
+
+                                weight = cgroup_weight_blkio_to_io(blkio_weight);
+
+                                log_cgroup_compat(u, "Applying [Startup]BlockIOWeight %" PRIu64 " as [Startup]IOWeight %" PRIu64,
+                                                  blkio_weight, weight);
+                        } else
                                 weight = CGROUP_WEIGHT_DEFAULT;
 
                         xsprintf(buf, "default %" PRIu64 "\n", weight);
@@ -608,8 +632,14 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 CGroupBlockIODeviceWeight *w;
 
                                 /* FIXME: no way to reset this list */
-                                LIST_FOREACH(device_weights, w, c->blockio_device_weights)
-                                        cgroup_apply_io_device_weight(u, w->path, cgroup_weight_blkio_to_io(w->weight));
+                                LIST_FOREACH(device_weights, w, c->blockio_device_weights) {
+                                        weight = cgroup_weight_blkio_to_io(w->weight);
+
+                                        log_cgroup_compat(u, "Applying BlockIODeviceWeight %" PRIu64 " as IODeviceWeight %" PRIu64 " for %s",
+                                                          w->weight, weight, w->path);
+
+                                        cgroup_apply_io_device_weight(u, w->path, weight);
+                                }
                         }
                 }
 
@@ -634,6 +664,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 limits[CGROUP_IO_RBPS_MAX] = b->rbps;
                                 limits[CGROUP_IO_WBPS_MAX] = b->wbps;
 
+                                log_cgroup_compat(u, "Applying BlockIO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as IO{Read|Write}BandwidthMax for %s",
+                                                  b->rbps, b->wbps, b->path);
+
                                 if (!cgroup_apply_io_device_limit(u, b->path, limits))
                                         cgroup_context_free_blockio_device_bandwidth(c, b);
                         }
@@ -650,9 +683,14 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
 
                         if (has_blockio)
                                 weight = cgroup_context_blkio_weight(c, state);
-                        else if (has_io)
+                        else if (has_io) {
+                                uint64_t io_weight = cgroup_context_io_weight(c, state);
+
                                 weight = cgroup_weight_io_to_blkio(cgroup_context_io_weight(c, state));
-                        else
+
+                                log_cgroup_compat(u, "Applying [Startup]IOWeight %" PRIu64 " as [Startup]BlockIOWeight %" PRIu64,
+                                                  io_weight, weight);
+                        } else
                                 weight = CGROUP_BLKIO_WEIGHT_DEFAULT;
 
                         xsprintf(buf, "%" PRIu64 "\n", weight);
@@ -671,8 +709,14 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 CGroupIODeviceWeight *w;
 
                                 /* FIXME: no way to reset this list */
-                                LIST_FOREACH(device_weights, w, c->io_device_weights)
-                                        cgroup_apply_blkio_device_weight(u, w->path, cgroup_weight_io_to_blkio(w->weight));
+                                LIST_FOREACH(device_weights, w, c->io_device_weights) {
+                                        weight = cgroup_weight_io_to_blkio(w->weight);
+
+                                        log_cgroup_compat(u, "Applying IODeviceWeight %" PRIu64 " as BlockIODeviceWeight %" PRIu64 " for %s",
+                                                          w->weight, weight, w->path);
+
+                                        cgroup_apply_blkio_device_weight(u, w->path, weight);
+                                }
                         }
                 }
 
@@ -688,6 +732,9 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                         CGroupIODeviceLimit *l, *next;
 
                         LIST_FOREACH_SAFE(device_limits, l, next, c->io_device_limits) {
+                                log_cgroup_compat(u, "Applying IO{Read|Write}Bandwidth %" PRIu64 " %" PRIu64 " as BlockIO{Read|Write}BandwidthMax for %s",
+                                                  l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX], l->path);
+
                                 if (!cgroup_apply_blkio_device_limit(u, l->path, l->limits[CGROUP_IO_RBPS_MAX], l->limits[CGROUP_IO_WBPS_MAX]))
                                         cgroup_context_free_io_device_limit(c, l);
                         }
@@ -697,22 +744,37 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
         if ((mask & CGROUP_MASK_MEMORY) && !is_root) {
                 if (cg_unified() > 0) {
                         uint64_t max = c->memory_max;
-
-                        if (cgroup_context_has_unified_memory_config(c))
-                                max = c->memory_max;
-                        else
+                        uint64_t swap_max = c->memory_swap_max;
+                        if (cgroup_context_has_unified_memory_config(c)) {
+                                 max = c->memory_max;
+                                swap_max = c->memory_swap_max;
+                        } else {
                                 max = c->memory_limit;
 
+                                if (max != CGROUP_LIMIT_MAX)
+                                        log_cgroup_compat(u, "Applying MemoryLimit %" PRIu64 " as MemoryMax", max);
+                        }
+
                         cgroup_apply_unified_memory_limit(u, "memory.low", c->memory_low);
                         cgroup_apply_unified_memory_limit(u, "memory.high", c->memory_high);
                         cgroup_apply_unified_memory_limit(u, "memory.max", max);
+                        cgroup_apply_unified_memory_limit(u, "memory.swap.max", swap_max);
                 } else {
                         char buf[DECIMAL_STR_MAX(uint64_t) + 1];
+                        uint64_t val = c->memory_limit;
+
+                        if (val == CGROUP_LIMIT_MAX) {
+                                val = c->memory_max;
 
-                        if (c->memory_limit != CGROUP_LIMIT_MAX)
-                                xsprintf(buf, "%" PRIu64 "\n", c->memory_limit);
+                                if (val != CGROUP_LIMIT_MAX)
+                                        log_cgroup_compat(u, "Applying MemoryMax %" PRIi64 " as MemoryLimit", c->memory_max);
+                        }
+
+                        if (val == CGROUP_LIMIT_MAX)
+                                strncpy(buf, "-1\n", sizeof(buf));
                         else
-                                xsprintf(buf, "%" PRIu64 "\n", c->memory_max);
+                                xsprintf(buf, "%" PRIu64 "\n", val);
 
                         r = cg_set_attribute("memory", path, "memory.limit_in_bytes", buf);
                         if (r < 0)
@@ -745,7 +807,11 @@ static void cgroup_context_apply(Unit *u, CGroupMask mask, ManagerState state) {
                                 "/dev/random\0" "rwm\0"
                                 "/dev/urandom\0" "rwm\0"
                                 "/dev/tty\0" "rwm\0"
-                                "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
+                                "/dev/pts/ptmx\0" "rw\0" /* /dev/pts/ptmx may not be duplicated, but accessed */
+                                /* Allow /run/elogind/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+                                /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
+                                "/run/systemd/inaccessible/chr\0" "rwm\0"
+                                "/run/systemd/inaccessible/blk\0" "rwm\0";
 
                         const char *x, *y;
 
@@ -1081,7 +1147,7 @@ int unit_watch_cgroup(Unit *u) {
         /* Only applies to the unified hierarchy */
         r = cg_unified();
         if (r < 0)
-                return log_unit_error_errno(u, r, "Failed detect wether the unified hierarchy is used: %m");
+                return log_unit_error_errno(u, r, "Failed detect whether the unified hierarchy is used: %m");
         if (r == 0)
                 return 0;
 
@@ -1608,7 +1674,7 @@ int manager_setup_cgroup(Manager *m) {
                 /* 3. Install agent */
                 if (unified) {
 
-                        /* In the unified hierarchy we can can get
+                        /* In the unified hierarchy we can get
                          * cgroup empty notifications via inotify. */
 
 #if 0 /// elogind does not support the unified hierarchy, yet.
@@ -1673,7 +1739,7 @@ int manager_setup_cgroup(Manager *m) {
 
                 /* also, move all other userspace processes remaining
                  * in the root cgroup into that scope. */
-                r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, false);
+                r = cg_migrate(SYSTEMD_CGROUP_CONTROLLER, m->cgroup_root, SYSTEMD_CGROUP_CONTROLLER, scope_path, 0);
                 if (r < 0)
                         log_warning_errno(r, "Couldn't move remaining userspace processes, ignoring: %m");
 
@@ -1784,7 +1850,9 @@ Unit *manager_get_unit_by_pid(Manager *m, pid_t pid) {
 
         return manager_get_unit_by_pid_cgroup(m, pid);
 }
+#endif // 0
 
+#if 0 /// elogind must substitute this with its own variant
 int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
         Unit *u;
 
@@ -1799,7 +1867,28 @@ int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
 
         return unit_notify_cgroup_empty(u);
 }
+#else
+int manager_notify_cgroup_empty(Manager *m, const char *cgroup) {
+        Session *s;
+
+        assert(m);
+        assert(cgroup);
 
+        log_debug("Got cgroup empty notification for: %s", cgroup);
+
+        s = hashmap_get(m->sessions, cgroup);
+
+        if (s) {
+                session_finalize(s);
+                session_free(s);
+        } else
+                log_warning("Session not found: %s", cgroup);
+
+        return 0;
+}
+#endif // 0
+
+#if 0 /// UNNEEDED by elogind
 int unit_get_memory_current(Unit *u, uint64_t *ret) {
         _cleanup_free_ char *v = NULL;
         int r;