1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 //#include <sys/mman.h>
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 //#include <sys/types.h>
37 #include "alloc-util.h"
38 //#include "btrfs-util.h"
40 #include "cgroup-util.h"
42 //#include "device-nodes.h"
43 #include "dirent-util.h"
46 //#include "format-util.h"
48 #include "hostname-util.h"
51 //#include "missing.h"
52 #include "parse-util.h"
53 //#include "path-util.h"
54 #include "process-util.h"
56 #include "signal-util.h"
57 #include "stat-util.h"
58 #include "string-util.h"
60 #include "time-util.h"
61 #include "umask-util.h"
62 #include "user-util.h"
66 char **saved_argv = NULL;
67 static int saved_in_initrd = -1;
69 size_t page_size(void) {
70 static thread_local size_t pgsz = 0;
73 if (_likely_(pgsz > 0))
76 r = sysconf(_SC_PAGESIZE);
83 #if 0 /// UNNEEDED by elogind
84 bool plymouth_running(void) {
85 return access("/run/plymouth/pid", F_OK) >= 0;
89 bool display_is_local(const char *display) {
98 int socket_from_display(const char *display, char **path) {
105 if (!display_is_local(display))
108 k = strspn(display+1, "0123456789");
110 f = new(char, STRLEN("/tmp/.X11-unix/X") + k + 1);
114 c = stpcpy(f, "/tmp/.X11-unix/X");
115 memcpy(c, display+1, k);
123 #if 0 /// UNNEEDED by elogind
124 int block_get_whole_disk(dev_t d, dev_t *ret) {
125 char p[SYS_BLOCK_PATH_MAX("/partition")];
126 _cleanup_free_ char *s = NULL;
132 /* If it has a queue this is good enough for us */
133 xsprintf_sys_block_path(p, "/queue", d);
134 if (access(p, F_OK) >= 0) {
139 /* If it is a partition find the originating device */
140 xsprintf_sys_block_path(p, "/partition", d);
141 if (access(p, F_OK) < 0)
144 /* Get parent dev_t */
145 xsprintf_sys_block_path(p, "/../dev", d);
146 r = read_one_line_file(p, &s);
150 r = sscanf(s, "%u:%u", &m, &n);
154 /* Only return this if it is really good enough for us. */
155 xsprintf_sys_block_path(p, "/queue", makedev(m, n));
156 if (access(p, F_OK) < 0)
159 *ret = makedev(m, n);
163 bool kexec_loaded(void) {
164 _cleanup_free_ char *s = NULL;
166 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
172 int prot_from_flags(int flags) {
174 switch (flags & O_ACCMODE) {
183 return PROT_READ|PROT_WRITE;
191 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
192 bool stdout_is_tty, stderr_is_tty;
193 pid_t parent_pid, agent_pid;
194 sigset_t ss, saved_ss;
202 /* Spawns a temporary TTY agent, making sure it goes away when
205 parent_pid = getpid_cached();
207 /* First we temporarily block all signals, so that the new
208 * child has them blocked initially. This way, we can be sure
209 * that SIGTERMs are not lost we might send to the agent. */
210 assert_se(sigfillset(&ss) >= 0);
211 assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
215 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
219 if (agent_pid != 0) {
220 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
227 * Make sure the agent goes away when the parent dies */
228 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
231 /* Make sure we actually can kill the agent, if we need to, in
232 * case somebody invoked us from a shell script that trapped
233 * SIGTERM or so... */
234 (void) reset_all_signal_handlers();
235 (void) reset_signal_mask();
237 /* Check whether our parent died before we were able
238 * to set the death signal and unblock the signals */
239 if (getppid() != parent_pid)
242 /* Don't leak fds to the agent */
243 close_all_fds(except, n_except);
245 stdout_is_tty = isatty(STDOUT_FILENO);
246 stderr_is_tty = isatty(STDERR_FILENO);
248 if (!stdout_is_tty || !stderr_is_tty) {
251 /* Detach from stdout/stderr. and reopen
252 * /dev/tty for them. This is important to
253 * ensure that when systemctl is started via
254 * popen() or a similar call that expects to
255 * read EOF we actually do generate EOF and
256 * not delay this indefinitely by because we
257 * keep an unused copy of stdin around. */
258 fd = open("/dev/tty", O_WRONLY);
260 log_error_errno(errno, "Failed to open /dev/tty: %m");
264 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
265 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
269 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
270 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
274 if (fd > STDERR_FILENO)
278 /* Count arguments */
280 for (n = 0; va_arg(ap, char*); n++)
285 l = alloca(sizeof(char *) * (n + 1));
287 /* Fill in arguments */
289 for (i = 0; i <= n; i++)
290 l[i] = va_arg(ap, char*);
297 bool in_initrd(void) {
300 if (saved_in_initrd >= 0)
301 return saved_in_initrd;
303 /* We make two checks here:
305 * 1. the flag file /etc/initrd-release must exist
306 * 2. the root file system must be a memory file system
308 * The second check is extra paranoia, since misdetecting an
309 * initrd can have bad consequences due the initrd
310 * emptying when transititioning to the main systemd.
313 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
314 statfs("/", &s) >= 0 &&
317 return saved_in_initrd;
320 void in_initrd_force(bool value) {
321 saved_in_initrd = value;
324 #if 0 /// UNNEEDED by elogind
325 /* hey glibc, APIs with callbacks without a user pointer are so useless */
326 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
327 int (*compar) (const void *, const void *, void *), void *arg) {
336 p = (const char *) base + idx * size;
337 comparison = compar(key, p, arg);
340 else if (comparison > 0)
348 int on_ac_power(void) {
349 bool found_offline = false, found_online = false;
350 _cleanup_closedir_ DIR *d = NULL;
353 d = opendir("/sys/class/power_supply");
355 return errno == ENOENT ? true : -errno;
357 FOREACH_DIRENT(de, d, return -errno) {
358 _cleanup_close_ int fd = -1, device = -1;
362 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
364 if (IN_SET(errno, ENOENT, ENOTDIR))
370 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
378 n = read(fd, contents, sizeof(contents));
382 if (n != 6 || memcmp(contents, "Mains\n", 6))
386 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
394 n = read(fd, contents, sizeof(contents));
398 if (n != 2 || contents[1] != '\n')
401 if (contents[0] == '1') {
404 } else if (contents[0] == '0')
405 found_offline = true;
410 return found_online || !found_offline;
414 int container_get_leader(const char *machine, pid_t *pid) {
415 _cleanup_free_ char *s = NULL, *class = NULL;
423 if (!machine_name_is_valid(machine))
426 p = strjoina("/run/systemd/machines/", machine);
427 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
435 if (!streq_ptr(class, "container"))
438 r = parse_pid(s, &leader);
448 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
449 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
457 mntns = procfs_file_alloca(pid, "ns/mnt");
458 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
466 pidns = procfs_file_alloca(pid, "ns/pid");
467 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
475 netns = procfs_file_alloca(pid, "ns/net");
476 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
484 userns = procfs_file_alloca(pid, "ns/user");
485 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
486 if (usernsfd < 0 && errno != ENOENT)
493 root = procfs_file_alloca(pid, "root");
494 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
509 *userns_fd = usernsfd;
514 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
519 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
520 if (userns_fd >= 0) {
521 /* Can't setns to your own userns, since then you could
522 * escalate from non-root to root in your own namespace, so
523 * check if namespaces equal before attempting to enter. */
524 _cleanup_free_ char *userns_fd_path = NULL;
526 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
529 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
537 if (setns(pidns_fd, CLONE_NEWPID) < 0)
541 if (setns(mntns_fd, CLONE_NEWNS) < 0)
545 if (setns(netns_fd, CLONE_NEWNET) < 0)
549 if (setns(userns_fd, CLONE_NEWUSER) < 0)
553 if (fchdir(root_fd) < 0)
560 return reset_uid_gid();
563 uint64_t physical_memory(void) {
564 _cleanup_free_ char *root = NULL, *value = NULL;
569 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
572 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
573 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
575 sc = sysconf(_SC_PHYS_PAGES);
579 mem = (uint64_t) sc * (uint64_t) ps;
581 if (cg_get_root_path(&root) < 0)
584 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
587 if (safe_atou64(value, &lim) < 0)
590 /* Make sure the limit is a multiple of our own page size */
594 return MIN(mem, lim);
597 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
598 uint64_t p, m, ps, r;
602 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
603 * the result is a multiple of the page size (rounds down). */
608 p = physical_memory() / ps;
624 uint64_t system_tasks_max(void) {
626 #if SIZEOF_PID_T == 4
627 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
628 #elif SIZEOF_PID_T == 2
629 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
631 #error "Unknown pid_t size"
634 _cleanup_free_ char *value = NULL, *root = NULL;
635 uint64_t a = TASKS_MAX, b = TASKS_MAX;
637 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
640 * a) the maximum value for the pid_t type
641 * b) the cgroups pids_max attribute for the system
642 * c) the kernel's configure maximum PID value
644 * And then pick the smallest of the three */
646 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
647 (void) safe_atou64(value, &a);
649 if (cg_get_root_path(&root) >= 0) {
650 value = mfree(value);
652 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
653 (void) safe_atou64(value, &b);
656 return MIN3(TASKS_MAX,
657 a <= 0 ? TASKS_MAX : a,
658 b <= 0 ? TASKS_MAX : b);
661 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
666 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
667 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
669 t = system_tasks_max();
673 if (m / t != v) /* overflow? */
679 #if 0 /// UNNEEDED by elogind
680 int update_reboot_parameter_and_warn(const char *param) {
683 if (isempty(param)) {
684 if (unlink("/run/systemd/reboot-param") < 0) {
688 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
694 RUN_WITH_UMASK(0022) {
695 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
697 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
705 puts(PACKAGE_STRING "\n"
710 #if 0 /// UNNEEDED by elogind
711 int get_block_device(const char *path, dev_t *dev) {
718 /* Get's the block device directly backing a file system. If
719 * the block device is encrypted, returns the device mapper
722 if (lstat(path, &st))
725 if (major(st.st_dev) != 0) {
730 if (statfs(path, &sfs) < 0)
733 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
734 return btrfs_get_block_device(path, dev);
739 int get_block_device_harder(const char *path, dev_t *dev) {
740 _cleanup_closedir_ DIR *d = NULL;
741 _cleanup_free_ char *t = NULL;
742 char p[SYS_BLOCK_PATH_MAX("/slaves")];
743 struct dirent *de, *found = NULL;
752 /* Gets the backing block device for a file system, and
753 * handles LUKS encrypted file systems, looking for its
754 * immediate parent, if there is one. */
756 r = get_block_device(path, &dt);
760 xsprintf_sys_block_path(p, "/slaves", dt);
769 FOREACH_DIRENT_ALL(de, d, return -errno) {
771 if (dot_or_dot_dot(de->d_name))
774 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
778 _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
780 /* We found a device backed by multiple other devices. We don't really support automatic
781 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
782 * two backing devices: the data partition and the hash partition. We are fine with such
783 * setups, however, only if both partitions are on the same physical device. Hence, let's
786 u = strjoin(p, "/", de->d_name, "/../dev");
790 v = strjoin(p, "/", found->d_name, "/../dev");
794 r = read_one_line_file(u, &a);
796 log_debug_errno(r, "Failed to read %s: %m", u);
800 r = read_one_line_file(v, &b);
802 log_debug_errno(r, "Failed to read %s: %m", v);
806 /* Check if the parent device is the same. If not, then the two backing devices are on
807 * different physical devices, and we don't support that. */
818 q = strjoina(p, "/", found->d_name, "/dev");
820 r = read_one_line_file(q, &t);
826 if (sscanf(t, "%u:%u", &maj, &min) != 2)
832 *dev = makedev(maj, min);