1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 //#include <sys/mman.h>
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 //#include <sys/types.h>
37 #include "alloc-util.h"
38 //#include "btrfs-util.h"
40 #include "cgroup-util.h"
42 //#include "device-nodes.h"
43 #include "dirent-util.h"
46 //#include "format-util.h"
48 #include "hostname-util.h"
51 //#include "missing.h"
52 #include "parse-util.h"
53 //#include "path-util.h"
54 #include "process-util.h"
56 #include "signal-util.h"
57 #include "stat-util.h"
58 #include "string-util.h"
60 #include "time-util.h"
61 #include "umask-util.h"
62 #include "user-util.h"
66 char **saved_argv = NULL;
67 static int saved_in_initrd = -1;
69 size_t page_size(void) {
70 static thread_local size_t pgsz = 0;
73 if (_likely_(pgsz > 0))
76 r = sysconf(_SC_PAGESIZE);
83 #if 0 /// UNNEEDED by elogind
84 bool plymouth_running(void) {
85 return access("/run/plymouth/pid", F_OK) >= 0;
89 bool display_is_local(const char *display) {
98 int socket_from_display(const char *display, char **path) {
105 if (!display_is_local(display))
108 k = strspn(display+1, "0123456789");
110 f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
114 c = stpcpy(f, "/tmp/.X11-unix/X");
115 memcpy(c, display+1, k);
123 #if 0 /// UNNEEDED by elogind
124 int block_get_whole_disk(dev_t d, dev_t *ret) {
125 char p[SYS_BLOCK_PATH_MAX("/partition")];
126 _cleanup_free_ char *s = NULL;
132 /* If it has a queue this is good enough for us */
133 xsprintf_sys_block_path(p, "/queue", d);
134 if (access(p, F_OK) >= 0) {
139 /* If it is a partition find the originating device */
140 xsprintf_sys_block_path(p, "/partition", d);
141 if (access(p, F_OK) < 0)
144 /* Get parent dev_t */
145 xsprintf_sys_block_path(p, "/../dev", d);
146 r = read_one_line_file(p, &s);
150 r = sscanf(s, "%u:%u", &m, &n);
154 /* Only return this if it is really good enough for us. */
155 xsprintf_sys_block_path(p, "/queue", makedev(m, n));
156 if (access(p, F_OK) < 0)
159 *ret = makedev(m, n);
163 bool kexec_loaded(void) {
164 _cleanup_free_ char *s = NULL;
166 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
172 int prot_from_flags(int flags) {
174 switch (flags & O_ACCMODE) {
183 return PROT_READ|PROT_WRITE;
191 bool in_initrd(void) {
194 if (saved_in_initrd >= 0)
195 return saved_in_initrd;
197 /* We make two checks here:
199 * 1. the flag file /etc/initrd-release must exist
200 * 2. the root file system must be a memory file system
202 * The second check is extra paranoia, since misdetecting an
203 * initrd can have bad consequences due the initrd
204 * emptying when transititioning to the main systemd.
207 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
208 statfs("/", &s) >= 0 &&
211 return saved_in_initrd;
214 void in_initrd_force(bool value) {
215 saved_in_initrd = value;
218 #if 0 /// UNNEEDED by elogind
219 /* hey glibc, APIs with callbacks without a user pointer are so useless */
220 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
221 int (*compar) (const void *, const void *, void *), void *arg) {
230 p = (const char *) base + idx * size;
231 comparison = compar(key, p, arg);
234 else if (comparison > 0)
242 int on_ac_power(void) {
243 bool found_offline = false, found_online = false;
244 _cleanup_closedir_ DIR *d = NULL;
247 d = opendir("/sys/class/power_supply");
249 return errno == ENOENT ? true : -errno;
251 FOREACH_DIRENT(de, d, return -errno) {
252 _cleanup_close_ int fd = -1, device = -1;
256 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
258 if (IN_SET(errno, ENOENT, ENOTDIR))
264 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
272 n = read(fd, contents, sizeof(contents));
276 if (n != 6 || memcmp(contents, "Mains\n", 6))
280 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
288 n = read(fd, contents, sizeof(contents));
292 if (n != 2 || contents[1] != '\n')
295 if (contents[0] == '1') {
298 } else if (contents[0] == '0')
299 found_offline = true;
304 return found_online || !found_offline;
308 int container_get_leader(const char *machine, pid_t *pid) {
309 _cleanup_free_ char *s = NULL, *class = NULL;
317 if (!machine_name_is_valid(machine))
320 p = strjoina("/run/systemd/machines/", machine);
321 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
329 if (!streq_ptr(class, "container"))
332 r = parse_pid(s, &leader);
342 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
343 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
351 mntns = procfs_file_alloca(pid, "ns/mnt");
352 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
360 pidns = procfs_file_alloca(pid, "ns/pid");
361 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
369 netns = procfs_file_alloca(pid, "ns/net");
370 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
378 userns = procfs_file_alloca(pid, "ns/user");
379 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
380 if (usernsfd < 0 && errno != ENOENT)
387 root = procfs_file_alloca(pid, "root");
388 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
403 *userns_fd = usernsfd;
408 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
413 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
414 if (userns_fd >= 0) {
415 /* Can't setns to your own userns, since then you could
416 * escalate from non-root to root in your own namespace, so
417 * check if namespaces equal before attempting to enter. */
418 _cleanup_free_ char *userns_fd_path = NULL;
420 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
423 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
431 if (setns(pidns_fd, CLONE_NEWPID) < 0)
435 if (setns(mntns_fd, CLONE_NEWNS) < 0)
439 if (setns(netns_fd, CLONE_NEWNET) < 0)
443 if (setns(userns_fd, CLONE_NEWUSER) < 0)
447 if (fchdir(root_fd) < 0)
454 return reset_uid_gid();
457 uint64_t physical_memory(void) {
458 _cleanup_free_ char *root = NULL, *value = NULL;
463 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
466 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
467 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
469 sc = sysconf(_SC_PHYS_PAGES);
473 mem = (uint64_t) sc * (uint64_t) ps;
475 if (cg_get_root_path(&root) < 0)
478 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
481 if (safe_atou64(value, &lim) < 0)
484 /* Make sure the limit is a multiple of our own page size */
488 return MIN(mem, lim);
491 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
492 uint64_t p, m, ps, r;
496 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
497 * the result is a multiple of the page size (rounds down). */
502 p = physical_memory() / ps;
518 uint64_t system_tasks_max(void) {
520 #if SIZEOF_PID_T == 4
521 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
522 #elif SIZEOF_PID_T == 2
523 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
525 #error "Unknown pid_t size"
528 _cleanup_free_ char *value = NULL, *root = NULL;
529 uint64_t a = TASKS_MAX, b = TASKS_MAX;
531 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
534 * a) the maximum value for the pid_t type
535 * b) the cgroups pids_max attribute for the system
536 * c) the kernel's configure maximum PID value
538 * And then pick the smallest of the three */
540 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
541 (void) safe_atou64(value, &a);
543 if (cg_get_root_path(&root) >= 0) {
544 value = mfree(value);
546 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
547 (void) safe_atou64(value, &b);
550 return MIN3(TASKS_MAX,
551 a <= 0 ? TASKS_MAX : a,
552 b <= 0 ? TASKS_MAX : b);
555 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
560 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
561 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
563 t = system_tasks_max();
567 if (m / t != v) /* overflow? */
573 #if 0 /// UNNEEDED by elogind
574 int update_reboot_parameter_and_warn(const char *param) {
577 if (isempty(param)) {
578 if (unlink("/run/systemd/reboot-param") < 0) {
582 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
588 RUN_WITH_UMASK(0022) {
589 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
591 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
599 puts(PACKAGE_STRING "\n"
604 #if 0 /// UNNEEDED by elogind
605 int get_block_device(const char *path, dev_t *dev) {
612 /* Get's the block device directly backing a file system. If
613 * the block device is encrypted, returns the device mapper
616 if (lstat(path, &st))
619 if (major(st.st_dev) != 0) {
624 if (statfs(path, &sfs) < 0)
627 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
628 return btrfs_get_block_device(path, dev);
633 int get_block_device_harder(const char *path, dev_t *dev) {
634 _cleanup_closedir_ DIR *d = NULL;
635 _cleanup_free_ char *t = NULL;
636 char p[SYS_BLOCK_PATH_MAX("/slaves")];
637 struct dirent *de, *found = NULL;
646 /* Gets the backing block device for a file system, and
647 * handles LUKS encrypted file systems, looking for its
648 * immediate parent, if there is one. */
650 r = get_block_device(path, &dt);
654 xsprintf_sys_block_path(p, "/slaves", dt);
663 FOREACH_DIRENT_ALL(de, d, return -errno) {
665 if (dot_or_dot_dot(de->d_name))
668 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
672 _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
674 /* We found a device backed by multiple other devices. We don't really support automatic
675 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
676 * two backing devices: the data partition and the hash partition. We are fine with such
677 * setups, however, only if both partitions are on the same physical device. Hence, let's
680 u = strjoin(p, "/", de->d_name, "/../dev");
684 v = strjoin(p, "/", found->d_name, "/../dev");
688 r = read_one_line_file(u, &a);
690 log_debug_errno(r, "Failed to read %s: %m", u);
694 r = read_one_line_file(v, &b);
696 log_debug_errno(r, "Failed to read %s: %m", v);
700 /* Check if the parent device is the same. If not, then the two backing devices are on
701 * different physical devices, and we don't support that. */
712 q = strjoina(p, "/", found->d_name, "/dev");
714 r = read_one_line_file(q, &t);
720 if (sscanf(t, "%u:%u", &maj, &min) != 2)
726 *dev = makedev(maj, min);