From b2f8b02ec27dfec9cbd23573f47aba494f2e9b5f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 25 Apr 2014 13:27:25 +0200 Subject: [PATCH] core: expose CFS CPU time quota as high-level unit properties --- man/systemd-system.conf.xml | 12 ++++ man/systemd.resource-control.xml | 52 +++++++++++++-- src/core/cgroup.c | 59 +++++++++++++++- src/core/cgroup.h | 6 ++ src/core/dbus-cgroup.c | 96 +++++++++++++++++++++++++++ src/core/load-fragment-gperf.gperf.m4 | 2 + src/core/load-fragment.c | 48 ++++++++++++++ src/core/load-fragment.h | 1 + src/core/main.c | 3 + src/core/manager.c | 1 + src/core/manager.h | 1 + src/core/system.conf | 1 + src/core/unit.c | 1 + src/libsystemd/sd-bus/bus-util.c | 64 ++++++++++++++++++ 14 files changed, 340 insertions(+), 7 deletions(-) diff --git a/man/systemd-system.conf.xml b/man/systemd-system.conf.xml index e2b2bd8b6..3814bd2ad 100644 --- a/man/systemd-system.conf.xml +++ b/man/systemd-system.conf.xml @@ -301,6 +301,18 @@ above. + + DefaultCPUQuotaPeriodSec= + + Sets the default CPU + quota period. Defaults to 100ms. This + controls th global default for the + CPUQuotaPeriodSec= + setting of units, see + systemd.resource-control5 + for details. + + DefaultTimeoutStartSec= DefaultTimeoutStopSec= diff --git a/man/systemd.resource-control.xml b/man/systemd.resource-control.xml index a51e600eb..5a996fd6d 100644 --- a/man/systemd.resource-control.xml +++ b/man/systemd.resource-control.xml @@ -117,11 +117,53 @@ along with systemd; If not, see . CPUShares=weight - Assign the specified overall CPU time share weight to - the processes executed. Takes an integer value. This - controls the cpu.shares control group - attribute, which defaults to 1024. For details about this - control group attribute, see Assign the specified CPU time share weight to the + processes executed. Takes an integer value. This controls + the cpu.shares control group attribute, + which defaults to 1024. For details about this control group + attribute, see sched-design-CFS.txt + The available CPU time is split up among all units within a + slice relative to their CPU time share weight. + + Implies CPUAccounting=true. + + + + + CPUQuota= + + + Assign the specified CPU time quota to the processes + executed. Takes a percentage value (suffixed with "%") or an + absolute time (suffixed by one of the common time units, us, + ms, s, ...). The percentage specifies how much CPU time the + unit shall get at maximum, relative to the total CPU time + available on one CPU. Use values > 100% for alloting CPU + time on more than one CPU. If an absolute time is specified + the processes of this unit will get this much absolute time + within each quota period, at maximum. This controls the + cpu.cfs_quota_us control group + attribute. For details about this control group attribute, + see sched-design-CFS.txt. + + Example: CPUShares=20% ensures that + the executed processes will never get more than 20% CPU time + on one CPU. + + Implies CPUAccounting=true. + + + + + CPUQuotaPeriodSec= + + + Specify the CPU quota period to use. Defaults to + 100ms. This controls the cpu.cfs_period_us + control group attribute. For details about this control + group attribute, see sched-design-CFS.txt. Implies CPUAccounting=true. diff --git a/src/core/cgroup.c b/src/core/cgroup.c index 3dd4c9129..1c05c2344 100644 --- a/src/core/cgroup.c +++ b/src/core/cgroup.c @@ -36,6 +36,10 @@ void cgroup_context_init(CGroupContext *c) { c->cpu_shares = 1024; c->memory_limit = (uint64_t) -1; c->blockio_weight = 1000; + + c->cpu_quota_per_sec_usec = (usec_t) -1; + c->cpu_quota_usec = (usec_t) -1; + c->cpu_quota_period_usec = 100*USEC_PER_MSEC; } void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a) { @@ -78,10 +82,37 @@ void cgroup_context_done(CGroupContext *c) { cgroup_context_free_device_allow(c, c->device_allow); } +usec_t cgroup_context_get_cpu_quota_usec(CGroupContext *c) { + assert(c); + + /* Returns the absolute CPU quota */ + + if (c->cpu_quota_usec != (usec_t) -1) + return c->cpu_quota_usec; + else if (c->cpu_quota_per_sec_usec != (usec_t) -1) + return c->cpu_quota_per_sec_usec*c->cpu_quota_period_usec/USEC_PER_SEC; + else + return (usec_t) -1; +} + +usec_t cgroup_context_get_cpu_quota_per_sec_usec(CGroupContext *c) { + assert(c); + + /* Returns the CPU quota relative to 1s */ + + if (c->cpu_quota_usec != (usec_t) -1) + return c->cpu_quota_usec*USEC_PER_SEC/c->cpu_quota_period_usec; + else if (c->cpu_quota_per_sec_usec != (usec_t) -1) + return c->cpu_quota_per_sec_usec; + else + return (usec_t) -1; +} + void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { CGroupBlockIODeviceBandwidth *b; CGroupBlockIODeviceWeight *w; CGroupDeviceAllow *a; + char t[FORMAT_TIMESPAN_MAX], s[FORMAT_TIMESPAN_MAX], u[FORMAT_TIMESPAN_MAX]; assert(c); assert(f); @@ -93,6 +124,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { "%sBlockIOAccounting=%s\n" "%sMemoryAccounting=%s\n" "%sCPUShares=%lu\n" + "%sCPUQuota=%s\n" + "%sCPUQuotaPerSecSec=%s\n" + "%sCPUQuotaPeriodSec=%s\n" "%sBlockIOWeight=%lu\n" "%sMemoryLimit=%" PRIu64 "\n" "%sDevicePolicy=%s\n", @@ -100,6 +134,9 @@ void cgroup_context_dump(CGroupContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->blockio_accounting), prefix, yes_no(c->memory_accounting), prefix, c->cpu_shares, + prefix, strna(format_timespan(u, sizeof(u), cgroup_context_get_cpu_quota_usec(c), 1)), + prefix, strna(format_timespan(t, sizeof(t), cgroup_context_get_cpu_quota_per_sec_usec(c), 1)), + prefix, strna(format_timespan(s, sizeof(s), c->cpu_quota_period_usec, 1)), prefix, c->blockio_weight, prefix, c->memory_limit, prefix, cgroup_device_policy_to_string(c->device_policy)); @@ -284,12 +321,27 @@ void cgroup_context_apply(CGroupContext *c, CGroupControllerMask mask, const cha is_root = isempty(path) || path_equal(path, "/"); if ((mask & CGROUP_CPU) && !is_root) { - char buf[DECIMAL_STR_MAX(unsigned long) + 1]; + char buf[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t)) + 1]; + usec_t q; sprintf(buf, "%lu\n", c->cpu_shares); r = cg_set_attribute("cpu", path, "cpu.shares", buf); if (r < 0) log_warning("Failed to set cpu.shares on %s: %s", path, strerror(-r)); + + sprintf(buf, USEC_FMT "\n", c->cpu_quota_period_usec); + r = cg_set_attribute("cpu", path, "cpu.cfs_period_us", buf); + if (r < 0) + log_warning("Failed to set cpu.cfs_period_us on %s: %s", path, strerror(-r)); + + q = cgroup_context_get_cpu_quota_usec(c); + if (q != (usec_t) -1) { + sprintf(buf, USEC_FMT "\n", q); + r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", buf); + } else + r = cg_set_attribute("cpu", path, "cpu.cfs_quota_us", "-1"); + if (r < 0) + log_warning("Failed to set cpu.cfs_quota_us on %s: %s", path, strerror(-r)); } if (mask & CGROUP_BLKIO) { @@ -415,7 +467,10 @@ CGroupControllerMask cgroup_context_get_mask(CGroupContext *c) { /* Figure out which controllers we need */ - if (c->cpu_accounting || c->cpu_shares != 1024) + if (c->cpu_accounting || + c->cpu_shares != 1024 || + c->cpu_quota_usec != (usec_t) -1 || + c->cpu_quota_per_sec_usec != (usec_t) -1) mask |= CGROUP_CPUACCT | CGROUP_CPU; if (c->blockio_accounting || diff --git a/src/core/cgroup.h b/src/core/cgroup.h index be717ad87..2b19add79 100644 --- a/src/core/cgroup.h +++ b/src/core/cgroup.h @@ -71,6 +71,9 @@ struct CGroupContext { bool memory_accounting; unsigned long cpu_shares; + usec_t cpu_quota_per_sec_usec; + usec_t cpu_quota_usec; + usec_t cpu_quota_period_usec; unsigned long blockio_weight; LIST_HEAD(CGroupBlockIODeviceWeight, blockio_device_weights); @@ -97,6 +100,9 @@ void cgroup_context_free_device_allow(CGroupContext *c, CGroupDeviceAllow *a); void cgroup_context_free_blockio_device_weight(CGroupContext *c, CGroupBlockIODeviceWeight *w); void cgroup_context_free_blockio_device_bandwidth(CGroupContext *c, CGroupBlockIODeviceBandwidth *b); +usec_t cgroup_context_get_cpu_quota_usec(CGroupContext *c); +usec_t cgroup_context_get_cpu_quota_per_sec_usec(CGroupContext *c); + CGroupControllerMask unit_get_cgroup_mask(Unit *u); CGroupControllerMask unit_get_siblings_mask(Unit *u); CGroupControllerMask unit_get_members_mask(Unit *u); diff --git a/src/core/dbus-cgroup.c b/src/core/dbus-cgroup.c index 5b1c4e37c..e9bdabf93 100644 --- a/src/core/dbus-cgroup.c +++ b/src/core/dbus-cgroup.c @@ -133,10 +133,49 @@ static int property_get_device_allow( return sd_bus_message_close_container(reply); } +static int property_get_cpu_quota_usec( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + + assert(bus); + assert(reply); + assert(c); + + return sd_bus_message_append(reply, "t", cgroup_context_get_cpu_quota_usec(c)); +} + +static int property_get_cpu_quota_per_sec_usec( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + CGroupContext *c = userdata; + + assert(bus); + assert(reply); + assert(c); + + return sd_bus_message_append(reply, "t", cgroup_context_get_cpu_quota_per_sec_usec(c)); +} + const sd_bus_vtable bus_cgroup_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("CPUAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, cpu_accounting), 0), SD_BUS_PROPERTY("CPUShares", "t", bus_property_get_ulong, offsetof(CGroupContext, cpu_shares), 0), + SD_BUS_PROPERTY("CPUQuotaPerSecUSec", "t", property_get_cpu_quota_per_sec_usec, 0, 0), + SD_BUS_PROPERTY("CPUQuotaUSec", "t", property_get_cpu_quota_usec, 0, 0), + SD_BUS_PROPERTY("CPUQuotaPeriodUSec", "t", bus_property_get_usec, offsetof(CGroupContext, cpu_quota_period_usec), 0), SD_BUS_PROPERTY("BlockIOAccounting", "b", bus_property_get_bool, offsetof(CGroupContext, blockio_accounting), 0), SD_BUS_PROPERTY("BlockIOWeight", "t", bus_property_get_ulong, offsetof(CGroupContext, blockio_weight), 0), SD_BUS_PROPERTY("BlockIODeviceWeight", "a(st)", property_get_blockio_device_weight, 0, 0), @@ -199,6 +238,63 @@ int bus_cgroup_set_property( return 1; + } else if (streq(name, "CPUQuotaPerSecUSec")) { + uint64_t u64; + + r = sd_bus_message_read(message, "t", &u64); + if (r < 0) + return r; + + if (u64 <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaPerSecUSec value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_quota_per_sec_usec = u64; + c->cpu_quota_usec = (uint64_t) -1; + u->cgroup_realized_mask &= ~CGROUP_CPU; + unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%0.f%%", (double) (c->cpu_quota_per_sec_usec / 10000)); + } + + return 1; + + } else if (streq(name, "CPUQuotaUSec")) { + uint64_t u64; + + r = sd_bus_message_read(message, "t", &u64); + if (r < 0) + return r; + + if (u64 <= 0) + return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaUSec value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_quota_usec = u64; + c->cpu_quota_per_sec_usec = (uint64_t) -1; + u->cgroup_realized_mask &= ~CGROUP_CPU; + unit_write_drop_in_private_format(u, mode, "CPUQuota", "CPUQuota=%" PRIu64 "us", u64); + } + + return 1; + + } else if (streq(name, "CPUQuotaPeriodUSec")) { + + uint64_t u64; + + r = sd_bus_message_read(message, "t", &u64); + if (r < 0) + return r; + + if (u64 <= 0 || u64 >= (usec_t) -1) + return sd_bus_error_set_errnof(error, EINVAL, "CPUQuotaPeriodUSec value out of range"); + + if (mode != UNIT_CHECK) { + c->cpu_quota_period_usec = u64; + u->cgroup_realized_mask &= ~CGROUP_CPU; + unit_write_drop_in_private_format(u, mode, name, "CPUQuotaPeriodSec=%" PRIu64 "us", c->cpu_quota_period_usec); + } + + return 1; + } else if (streq(name, "BlockIOAccounting")) { int b; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index b8d873894..21bccbb1c 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -106,6 +106,8 @@ m4_define(`CGROUP_CONTEXT_CONFIG_ITEMS', `$1.Slice, config_parse_unit_slice, 0, 0 $1.CPUAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.cpu_accounting) $1.CPUShares, config_parse_cpu_shares, 0, offsetof($1, cgroup_context) +$1.CPUQuota, config_parse_cpu_quota, 0, offsetof($1, cgroup_context) +$1.CPUQuotaPeriodSec, config_parse_sec, 0, offsetof($1, cgroup_context.cpu_quota_period_usec) $1.MemoryAccounting, config_parse_bool, 0, offsetof($1, cgroup_context.memory_accounting) $1.MemoryLimit, config_parse_memory_limit, 0, offsetof($1, cgroup_context) $1.DeviceAllow, config_parse_device_allow, 0, offsetof($1, cgroup_context) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 6c92935d0..3b36d1568 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -2455,6 +2455,54 @@ int config_parse_cpu_shares( return 0; } +int config_parse_cpu_quota( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + CGroupContext *c = data; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + c->cpu_quota_per_sec_usec = (usec_t) -1; + c->cpu_quota_usec = (usec_t) -1; + return 0; + } + + if (endswith(rvalue, "%")) { + double percent; + + if (sscanf(rvalue, "%lf%%", &percent) != 1 || percent <= 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU quota '%s' invalid. Ignoring.", rvalue); + return 0; + } + + c->cpu_quota_per_sec_usec = (usec_t) (percent * USEC_PER_SEC / 100); + c->cpu_quota_usec = (usec_t) -1; + } else { + r = parse_sec(rvalue, &c->cpu_quota_usec); + if (r < 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, "CPU quota '%s' invalid. Ignoring.", rvalue); + return 0; + } + + c->cpu_quota_per_sec_usec = (usec_t) -1; + } + + return 0; +} + int config_parse_memory_limit( const char *unit, const char *filename, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 0f3c0c7a7..242fd271b 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -96,6 +96,7 @@ int config_parse_runtime_directory(const char *unit, const char *filename, unsig int config_parse_set_status(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_no_new_priviliges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/main.c b/src/core/main.c index feabead96..a6a5aa70f 100644 --- a/src/core/main.c +++ b/src/core/main.c @@ -109,6 +109,7 @@ static struct rlimit *arg_default_rlimit[_RLIMIT_MAX] = {}; static uint64_t arg_capability_bounding_set_drop = 0; static nsec_t arg_timer_slack_nsec = (nsec_t) -1; static usec_t arg_default_timer_accuracy_usec = 1 * USEC_PER_MINUTE; +static usec_t arg_default_cpu_quota_period_usec = 100 * USEC_PER_MSEC; static Set* arg_syscall_archs = NULL; static FILE* arg_serialization = NULL; static bool arg_default_cpu_accounting = false; @@ -683,6 +684,7 @@ static int parse_config_file(void) { #endif { "Manager", "TimerSlackNSec", config_parse_nsec, 0, &arg_timer_slack_nsec }, { "Manager", "DefaultTimerAccuracySec", config_parse_sec, 0, &arg_default_timer_accuracy_usec }, + { "Manager", "DefaultCPUQuotaPeriodSec", config_parse_sec, 0, &arg_default_cpu_quota_period_usec }, { "Manager", "DefaultStandardOutput", config_parse_output, 0, &arg_default_std_output }, { "Manager", "DefaultStandardError", config_parse_output, 0, &arg_default_std_error }, { "Manager", "DefaultTimeoutStartSec", config_parse_sec, 0, &arg_default_timeout_start_usec }, @@ -1633,6 +1635,7 @@ int main(int argc, char *argv[]) { m->confirm_spawn = arg_confirm_spawn; m->default_timer_accuracy_usec = arg_default_timer_accuracy_usec; + m->default_cpu_quota_period_usec = arg_default_cpu_quota_period_usec; m->default_std_output = arg_default_std_output; m->default_std_error = arg_default_std_error; m->default_restart_usec = arg_default_restart_usec; diff --git a/src/core/manager.c b/src/core/manager.c index ce8759e0c..5772f402b 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -429,6 +429,7 @@ int manager_new(SystemdRunningAs running_as, Manager **_m) { m->running_as = running_as; m->exit_code = _MANAGER_EXIT_CODE_INVALID; m->default_timer_accuracy_usec = USEC_PER_MINUTE; + m->default_cpu_quota_period_usec = 100 * USEC_PER_MSEC; m->idle_pipe[0] = m->idle_pipe[1] = m->idle_pipe[2] = m->idle_pipe[3] = -1; diff --git a/src/core/manager.h b/src/core/manager.h index 14cdf8151..a3de351a8 100644 --- a/src/core/manager.h +++ b/src/core/manager.h @@ -240,6 +240,7 @@ struct Manager { bool default_cpu_accounting; bool default_memory_accounting; bool default_blockio_accounting; + usec_t default_cpu_quota_period_usec; usec_t default_timer_accuracy_usec; diff --git a/src/core/system.conf b/src/core/system.conf index 65a35a068..4d775fafb 100644 --- a/src/core/system.conf +++ b/src/core/system.conf @@ -24,6 +24,7 @@ #SystemCallArchitectures= #TimerSlackNSec= #DefaultTimerAccuracySec=1min +#DefaultCPUQuotaPeriodSec=100ms #DefaultStandardOutput=journal #DefaultStandardError=inherit #DefaultTimeoutStartSec=90s diff --git a/src/core/unit.c b/src/core/unit.c index 153b79b3a..6ac359e31 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -121,6 +121,7 @@ static void unit_init(Unit *u) { cc->cpu_accounting = u->manager->default_cpu_accounting; cc->blockio_accounting = u->manager->default_blockio_accounting; cc->memory_accounting = u->manager->default_memory_accounting; + cc->cpu_quota_period_usec = u->manager->default_cpu_quota_period_usec; } ec = unit_get_exec_context(u); diff --git a/src/libsystemd/sd-bus/bus-util.c b/src/libsystemd/sd-bus/bus-util.c index df7ab8a83..84b3fc5af 100644 --- a/src/libsystemd/sd-bus/bus-util.c +++ b/src/libsystemd/sd-bus/bus-util.c @@ -1250,6 +1250,70 @@ int bus_append_unit_property_assignment(sd_bus_message *m, const char *assignmen field = strndupa(assignment, eq - assignment); eq ++; + if (streq(field, "CPUQuota")) { + + if (isempty(eq)) { + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPerSecUSec"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "v", "t", (usec_t) -1); + + } else if (endswith(eq, "%")) { + double percent; + + if (sscanf(eq, "%lf%%", &percent) != 1 || percent <= 0) { + log_error("CPU quota '%s' invalid.", eq); + return -EINVAL; + } + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPerSecUSec"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "v", "t", (usec_t) percent * USEC_PER_SEC / 100); + } else { + usec_t us; + + r = parse_sec(eq, &us); + if (r < 0) { + log_error("CPU quota '%s' invalid.", eq); + return -EINVAL; + } + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaUSec"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "v", "t", us); + } + + if (r < 0) + return bus_log_create_error(r); + + return 0; + + } else if (streq(field, "CPUQuotaPeriodSec")) { + usec_t us; + + r = parse_sec(eq, &us); + if (r < 0) { + log_error("CPU period '%s' invalid.", eq); + return -EINVAL; + } + + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, "CPUQuotaPeriodUSec"); + if (r < 0) + return bus_log_create_error(r); + + r = sd_bus_message_append(m, "v", "t", us); + if (r < 0) + return bus_log_create_error(r); + + return 0; + } + r = sd_bus_message_append_basic(m, SD_BUS_TYPE_STRING, field); if (r < 0) return bus_log_create_error(r); -- 2.30.2