From: Lennart Poettering Date: Wed, 17 Jan 2018 17:40:10 +0000 (+0100) Subject: util-lib: add new procfs-util.[ch] API for dealing with tasks limits X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=commitdiff_plain;h=f3cd1b1561675c3824734f15889d9309c1e58091;p=elogind.git util-lib: add new procfs-util.[ch] API for dealing with tasks limits As it turns out the limit on concurrent tasks on Linux nasty to determine, hence let's appropriate helpers for this. --- diff --git a/src/basic/meson.build b/src/basic/meson.build index d4f0503a0..0c593cc71 100644 --- a/src/basic/meson.build +++ b/src/basic/meson.build @@ -36,6 +36,8 @@ # bitmap.c # bitmap.h # blkid-util.h +# blockdev-util.c +# blockdev-util.h # bpf-program.c # bpf-program.h # btrfs-ctree.h @@ -149,6 +151,8 @@ # proc-cmdline.h # process-util.c # process-util.h +# procfs-util.c +# procfs-util.h # random-util.c # random-util.h # ratelimit.c @@ -177,6 +181,8 @@ # smack-util.c # smack-util.h # socket-label.c +# socket-protocol-list.c +# socket-protocol-list.h # socket-util.c # socket-util.h # sparse-endian.h @@ -202,10 +208,10 @@ # time-util.h # umask-util.h # unaligned.h -# unit-name.c -# unit-name.h # unit-def.c # unit-def.h +# unit-name.c +# unit-name.h # user-util.c # user-util.h # utf8.c @@ -404,11 +410,20 @@ errno_list_txt = custom_target( command : [generate_errno_list, cpp], capture : true) +generate_socket_protocol_list = find_program('generate-socket-protocol-list.sh') +socket_protocol_list_txt = custom_target( + 'socket-protocol-list.txt', + output : 'socket-protocol-list.txt', + command : [generate_socket_protocol_list, cpp], + capture : true) + generated_gperf_headers = [] #if 0 /// elogind has only the cap and errno list. # foreach item : [['af', af_list_txt, 'af', ''], # ['arphrd', arphrd_list_txt, 'arphrd', 'ARPHRD_'], # ['cap', cap_list_txt, 'capability', ''], +# ['errno', errno_list_txt, 'errno', ''], +# ['socket-protocol', socket_protocol_list_txt, 'socket_protocol', 'IPPROTO_']] #else foreach item : [['cap', cap_list_txt, 'capability', ''], ['errno', errno_list_txt, 'errno', '']] diff --git a/src/basic/procfs-util.c b/src/basic/procfs-util.c new file mode 100644 index 000000000..ca2ec988e --- /dev/null +++ b/src/basic/procfs-util.c @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +//#include + +//#include "alloc-util.h" +//#include "fileio.h" +//#include "parse-util.h" +//#include "process-util.h" +//#include "procfs-util.h" +//#include "stdio-util.h" +//#include "string-util.h" + +int procfs_tasks_get_limit(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + uint64_t pid_max, threads_max; + int r; + + assert(ret); + + /* So there are two sysctl files that control the system limit of processes: + * + * 1. kernel.threads-max: this is probably the sysctl that makes more sense, as it directly puts a limit on + * concurrent tasks. + * + * 2. kernel.pid_max: this limits the numeric range PIDs can take, and thus indirectly also limits the number + * of concurrent threads. AFAICS it's primarily a compatibility concept: some crappy old code used a signed + * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond INT16_MAX by + * default. + * + * By default #2 is set to much lower values than #1, hence the limit people come into contact with first, as + * it's the lowest boundary they need to bump when they want higher number of processes. + * + * Also note the weird definition of #2: PIDs assigned will be kept below this value, which means the number of + * tasks that can be created is one lower, as PID 0 is not a valid process ID. */ + + r = read_one_line_file("/proc/sys/kernel/pid_max", &value); + if (r < 0) + return r; + + r = safe_atou64(value, &pid_max); + if (r < 0) + return r; + + value = mfree(value); + r = read_one_line_file("/proc/sys/kernel/threads-max", &value); + if (r < 0) + return r; + + r = safe_atou64(value, &threads_max); + if (r < 0) + return r; + + /* Subtract one from pid_max, since PID 0 is not a valid PID */ + *ret = MIN(pid_max-1, threads_max); + return 0; +} + +int procfs_tasks_set_limit(uint64_t limit) { + char buffer[DECIMAL_STR_MAX(uint64_t)+1]; + _cleanup_free_ char *value = NULL; + uint64_t pid_max; + int r; + + if (limit == 0) /* This makes no sense, we are userspace and hence count as tasks too, and we want to live, + * hence the limit conceptually has to be above 0. Also, most likely if anyone asks for a zero + * limit he/she probably means "no limit", hence let's better refuse this to avoid + * confusion. */ + return -EINVAL; + + /* The Linux kernel doesn't allow this value to go below 20, hence don't allow this either, higher values than + * TASKS_MAX are not accepted by the pid_max sysctl. We'll treat anything this high as "unbounded" and hence + * set it to the maximum. */ + limit = CLAMP(limit, 20U, TASKS_MAX); + + r = read_one_line_file("/proc/sys/kernel/pid_max", &value); + if (r < 0) + return r; + r = safe_atou64(value, &pid_max); + if (r < 0) + return r; + + /* As pid_max is about the numeric pid_t range we'll bump it if necessary, but only ever increase it, never + * decrease it, as threads-max is the much more relevant sysctl. */ + if (limit > pid_max-1) { + sprintf(buffer, "%" PRIu64, limit+1); /* Add one, since PID 0 is not a valid PID */ + r = write_string_file("/proc/sys/kernel/pid_max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) + return r; + } + + sprintf(buffer, "%" PRIu64, limit); + r = write_string_file("/proc/sys/kernel/threads-max", buffer, WRITE_STRING_FILE_DISABLE_BUFFER); + if (r < 0) { + uint64_t threads_max; + + /* Hmm, we couldn't write this? If so, maybe it was already set properly? In that case let's not + * generate an error */ + + value = mfree(value); + if (read_one_line_file("/proc/sys/kernel/threads-max", &value) < 0) + return r; /* return original error */ + + if (safe_atou64(value, &threads_max) < 0) + return r; /* return original error */ + + if (MIN(pid_max-1, threads_max) != limit) + return r; /* return original error */ + + /* Yay! Value set already matches what we were trying to set, hence consider this a success. */ + } + + return 0; +} + +int procfs_tasks_get_current(uint64_t *ret) { + _cleanup_free_ char *value = NULL; + const char *p, *nr; + size_t n; + int r; + + assert(ret); + + r = read_one_line_file("/proc/loadavg", &value); + if (r < 0) + return r; + + /* Look for the second part of the fourth field, which is separated by a slash from the first part. None of the + * earlier fields use a slash, hence let's use this to find the right spot. */ + p = strchr(value, '/'); + if (!p) + return -EINVAL; + + p++; + n = strspn(p, DIGITS); + nr = strndupa(p, n); + + return safe_atou64(nr, ret); +} diff --git a/src/basic/procfs-util.h b/src/basic/procfs-util.h new file mode 100644 index 000000000..a03891e78 --- /dev/null +++ b/src/basic/procfs-util.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ +#pragma once + +//#include + +int procfs_tasks_get_limit(uint64_t *ret); +int procfs_tasks_set_limit(uint64_t limit); +int procfs_tasks_get_current(uint64_t *ret); diff --git a/src/test/meson.build b/src/test/meson.build index 3674740c0..530163aaf 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -409,17 +409,6 @@ tests += [ # libblkid]], #endif // 0 - [['src/test/test-watch-pid.c', - 'src/test/test-helper.c'], - [libcore, - libshared], - [libmount, - threads, - librt, - libseccomp, - libselinux, - libblkid]], - [['src/test/test-hashmap.c', 'src/test/test-hashmap-plain.c', test_hashmap_ordered_c], @@ -445,6 +434,10 @@ tests += [ [], []], + [['src/test/test-procfs-util.c'], + [], + []], + [['src/test/test-unaligned.c'], [], []], diff --git a/src/test/test-procfs-util.c b/src/test/test-procfs-util.c new file mode 100644 index 000000000..6c4d15ec8 --- /dev/null +++ b/src/test/test-procfs-util.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +//#include + +//#include "log.h" +//#include "procfs-util.h" + +int main(int argc, char *argv[]) { + uint64_t v; + int r; + + log_parse_environment(); + log_open(); + + assert_se(procfs_tasks_get_current(&v) >= 0); + log_info("Current number of tasks: %" PRIu64, v); + + assert_se(procfs_tasks_get_limit(&v) >= 0); + log_info("Limit of tasks: %" PRIu64, v); + assert_se(v > 0); + assert_se(procfs_tasks_set_limit(v) >= 0); + + if (v > 100) { + uint64_t w; + r = procfs_tasks_set_limit(v-1); + assert_se(IN_SET(r, 0, -EPERM, -EACCES, -EROFS)); + + assert_se(procfs_tasks_get_limit(&w) >= 0); + assert_se((r == 0 && w == v - 1) || (r < 0 && w == v)); + + assert_se(procfs_tasks_set_limit(v) >= 0); + + assert_se(procfs_tasks_get_limit(&w) >= 0); + assert_se(v == w); + } + + return 0; +}