1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
17 //#include <sys/mman.h>
18 #include <sys/prctl.h>
19 #include <sys/statfs.h>
20 #include <sys/sysmacros.h>
21 //#include <sys/types.h>
24 #include "alloc-util.h"
25 //#include "btrfs-util.h"
27 #include "cgroup-util.h"
29 //#include "device-nodes.h"
30 #include "dirent-util.h"
33 //#include "format-util.h"
35 #include "hostname-util.h"
38 //#include "missing.h"
39 #include "parse-util.h"
40 //#include "path-util.h"
41 #include "process-util.h"
42 #include "procfs-util.h"
44 #include "signal-util.h"
45 #include "stat-util.h"
46 #include "string-util.h"
48 #include "time-util.h"
49 #include "umask-util.h"
50 #include "user-util.h"
55 char **saved_argv = NULL;
56 static int saved_in_initrd = -1;
58 size_t page_size(void) {
59 static thread_local size_t pgsz = 0;
62 if (_likely_(pgsz > 0))
65 r = sysconf(_SC_PAGESIZE);
72 #if 0 /// UNNEEDED by elogind
73 bool plymouth_running(void) {
74 return access("/run/plymouth/pid", F_OK) >= 0;
78 bool display_is_local(const char *display) {
87 int socket_from_display(const char *display, char **path) {
94 if (!display_is_local(display))
97 k = strspn(display+1, "0123456789");
99 f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
103 c = stpcpy(f, "/tmp/.X11-unix/X");
104 memcpy(c, display+1, k);
112 #if 0 /// UNNEEDED by elogind
113 bool kexec_loaded(void) {
114 _cleanup_free_ char *s = NULL;
116 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
122 int prot_from_flags(int flags) {
124 switch (flags & O_ACCMODE) {
133 return PROT_READ|PROT_WRITE;
141 bool in_initrd(void) {
144 if (saved_in_initrd >= 0)
145 return saved_in_initrd;
147 /* We make two checks here:
149 * 1. the flag file /etc/initrd-release must exist
150 * 2. the root file system must be a memory file system
152 * The second check is extra paranoia, since misdetecting an
153 * initrd can have bad consequences due the initrd
154 * emptying when transititioning to the main systemd.
157 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
158 statfs("/", &s) >= 0 &&
161 return saved_in_initrd;
164 #if 0 /// UNNEEDED by elogind
165 void in_initrd_force(bool value) {
166 saved_in_initrd = value;
169 /* hey glibc, APIs with callbacks without a user pointer are so useless */
170 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
171 int (*compar) (const void *, const void *, void *), void *arg) {
176 assert(!size_multiply_overflow(nmemb, size));
182 p = (const uint8_t*) base + idx * size;
183 comparison = compar(key, p, arg);
186 else if (comparison > 0)
195 int on_ac_power(void) {
196 bool found_offline = false, found_online = false;
197 _cleanup_closedir_ DIR *d = NULL;
200 d = opendir("/sys/class/power_supply");
202 return errno == ENOENT ? true : -errno;
204 FOREACH_DIRENT(de, d, return -errno) {
205 _cleanup_close_ int fd = -1, device = -1;
209 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
211 if (IN_SET(errno, ENOENT, ENOTDIR))
217 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
225 n = read(fd, contents, sizeof(contents));
229 if (n != 6 || memcmp(contents, "Mains\n", 6))
233 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
241 n = read(fd, contents, sizeof(contents));
245 if (n != 2 || contents[1] != '\n')
248 if (contents[0] == '1') {
251 } else if (contents[0] == '0')
252 found_offline = true;
257 return found_online || !found_offline;
260 int container_get_leader(const char *machine, pid_t *pid) {
261 _cleanup_free_ char *s = NULL, *class = NULL;
269 if (!machine_name_is_valid(machine))
272 p = strjoina("/run/systemd/machines/", machine);
273 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
281 if (!streq_ptr(class, "container"))
284 r = parse_pid(s, &leader);
294 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
295 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
303 mntns = procfs_file_alloca(pid, "ns/mnt");
304 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
312 pidns = procfs_file_alloca(pid, "ns/pid");
313 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
321 netns = procfs_file_alloca(pid, "ns/net");
322 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
330 userns = procfs_file_alloca(pid, "ns/user");
331 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
332 if (usernsfd < 0 && errno != ENOENT)
339 root = procfs_file_alloca(pid, "root");
340 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
355 *userns_fd = usernsfd;
360 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
365 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
366 if (userns_fd >= 0) {
367 /* Can't setns to your own userns, since then you could
368 * escalate from non-root to root in your own namespace, so
369 * check if namespaces equal before attempting to enter. */
370 _cleanup_free_ char *userns_fd_path = NULL;
372 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
375 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
383 if (setns(pidns_fd, CLONE_NEWPID) < 0)
387 if (setns(mntns_fd, CLONE_NEWNS) < 0)
391 if (setns(netns_fd, CLONE_NEWNET) < 0)
395 if (setns(userns_fd, CLONE_NEWUSER) < 0)
399 if (fchdir(root_fd) < 0)
406 return reset_uid_gid();
409 uint64_t physical_memory(void) {
410 _cleanup_free_ char *root = NULL, *value = NULL;
416 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
419 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
420 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
422 sc = sysconf(_SC_PHYS_PAGES);
426 mem = (uint64_t) sc * (uint64_t) ps;
428 r = cg_get_root_path(&root);
430 log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
434 r = cg_all_unified();
436 log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
440 r = cg_get_attribute("memory", root, "memory.max", &value);
442 log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
446 if (streq(value, "max"))
449 r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
451 log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
456 r = safe_atou64(value, &lim);
458 log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
461 if (lim == UINT64_MAX)
464 /* Make sure the limit is a multiple of our own page size */
468 return MIN(mem, lim);
471 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
472 uint64_t p, m, ps, r;
476 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
477 * the result is a multiple of the page size (rounds down). */
482 p = physical_memory() / ps;
498 uint64_t system_tasks_max(void) {
500 uint64_t a = TASKS_MAX, b = TASKS_MAX;
501 _cleanup_free_ char *root = NULL;
503 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
506 * a) the maximum tasks value the kernel allows on this architecture
507 * b) the cgroups pids_max attribute for the system
508 * c) the kernel's configured maximum PID value
510 * And then pick the smallest of the three */
512 (void) procfs_tasks_get_limit(&a);
514 if (cg_get_root_path(&root) >= 0) {
515 _cleanup_free_ char *value = NULL;
517 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
518 (void) safe_atou64(value, &b);
521 return MIN3(TASKS_MAX,
522 a <= 0 ? TASKS_MAX : a,
523 b <= 0 ? TASKS_MAX : b);
526 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
531 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
532 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
534 t = system_tasks_max();
538 if (m / t != v) /* overflow? */
545 puts(PACKAGE_STRING "\n"
550 #if 0 /// UNNEEDED by elogind
551 /* This is a direct translation of str_verscmp from boot.c */
552 static bool is_digit(int c) {
553 return c >= '0' && c <= '9';
556 static int c_order(int c) {
557 if (c == 0 || is_digit(c))
560 if ((c >= 'a') && (c <= 'z'))
566 int str_verscmp(const char *s1, const char *s2) {
567 const char *os1, *os2;
578 while ((*s1 && !is_digit(*s1)) || (*s2 && !is_digit(*s2))) {
581 order = c_order(*s1) - c_order(*s2);
594 while (is_digit(*s1) && is_digit(*s2)) {
610 return strcmp(os1, os2);
613 /* Turn off core dumps but only if we're running outside of a container. */
614 void disable_coredumps(void) {
617 if (detect_container() > 0)
620 r = write_string_file("/proc/sys/kernel/core_pattern", "|/bin/false", 0);
622 log_debug_errno(r, "Failed to turn off coredumps, ignoring: %m");