2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 //#include <sys/mman.h>
30 #include <sys/prctl.h>
31 #include <sys/statfs.h>
32 #include <sys/sysmacros.h>
33 //#include <sys/types.h>
36 #include "alloc-util.h"
37 //#include "btrfs-util.h"
39 #include "cgroup-util.h"
41 #include "dirent-util.h"
44 //#include "format-util.h"
46 #include "hostname-util.h"
49 //#include "missing.h"
50 #include "parse-util.h"
51 //#include "path-util.h"
52 #include "process-util.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-util.h"
58 #include "time-util.h"
59 #include "umask-util.h"
60 #include "user-util.h"
64 char **saved_argv = NULL;
65 static int saved_in_initrd = -1;
67 size_t page_size(void) {
68 static thread_local size_t pgsz = 0;
71 if (_likely_(pgsz > 0))
74 r = sysconf(_SC_PAGESIZE);
81 #if 0 /// UNNEEDED by elogind
82 bool plymouth_running(void) {
83 return access("/run/plymouth/pid", F_OK) >= 0;
87 bool display_is_local(const char *display) {
96 int socket_from_display(const char *display, char **path) {
103 if (!display_is_local(display))
106 k = strspn(display+1, "0123456789");
108 f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
112 c = stpcpy(f, "/tmp/.X11-unix/X");
113 memcpy(c, display+1, k);
121 #if 0 /// UNNEEDED by elogind
122 int block_get_whole_disk(dev_t d, dev_t *ret) {
129 /* If it has a queue this is good enough for us */
130 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
141 /* If it is a partition find the originating device */
142 if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
151 /* Get parent dev_t */
152 if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
155 r = read_one_line_file(p, &s);
161 r = sscanf(s, "%u:%u", &m, &n);
167 /* Only return this if it is really good enough for us. */
168 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
175 *ret = makedev(m, n);
182 bool kexec_loaded(void) {
183 _cleanup_free_ char *s = NULL;
185 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) < 0)
191 int prot_from_flags(int flags) {
193 switch (flags & O_ACCMODE) {
202 return PROT_READ|PROT_WRITE;
210 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
211 bool stdout_is_tty, stderr_is_tty;
212 pid_t parent_pid, agent_pid;
213 sigset_t ss, saved_ss;
221 /* Spawns a temporary TTY agent, making sure it goes away when
224 parent_pid = getpid_cached();
226 /* First we temporarily block all signals, so that the new
227 * child has them blocked initially. This way, we can be sure
228 * that SIGTERMs are not lost we might send to the agent. */
229 assert_se(sigfillset(&ss) >= 0);
230 assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
234 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
238 if (agent_pid != 0) {
239 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
246 * Make sure the agent goes away when the parent dies */
247 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
250 /* Make sure we actually can kill the agent, if we need to, in
251 * case somebody invoked us from a shell script that trapped
252 * SIGTERM or so... */
253 (void) reset_all_signal_handlers();
254 (void) reset_signal_mask();
256 /* Check whether our parent died before we were able
257 * to set the death signal and unblock the signals */
258 if (getppid() != parent_pid)
261 /* Don't leak fds to the agent */
262 close_all_fds(except, n_except);
264 stdout_is_tty = isatty(STDOUT_FILENO);
265 stderr_is_tty = isatty(STDERR_FILENO);
267 if (!stdout_is_tty || !stderr_is_tty) {
270 /* Detach from stdout/stderr. and reopen
271 * /dev/tty for them. This is important to
272 * ensure that when systemctl is started via
273 * popen() or a similar call that expects to
274 * read EOF we actually do generate EOF and
275 * not delay this indefinitely by because we
276 * keep an unused copy of stdin around. */
277 fd = open("/dev/tty", O_WRONLY);
279 log_error_errno(errno, "Failed to open /dev/tty: %m");
283 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
284 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
288 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
289 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
293 if (fd > STDERR_FILENO)
297 /* Count arguments */
299 for (n = 0; va_arg(ap, char*); n++)
304 l = alloca(sizeof(char *) * (n + 1));
306 /* Fill in arguments */
308 for (i = 0; i <= n; i++)
309 l[i] = va_arg(ap, char*);
316 bool in_initrd(void) {
319 if (saved_in_initrd >= 0)
320 return saved_in_initrd;
322 /* We make two checks here:
324 * 1. the flag file /etc/initrd-release must exist
325 * 2. the root file system must be a memory file system
327 * The second check is extra paranoia, since misdetecting an
328 * initrd can have bad consequences due the initrd
329 * emptying when transititioning to the main systemd.
332 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
333 statfs("/", &s) >= 0 &&
336 return saved_in_initrd;
339 void in_initrd_force(bool value) {
340 saved_in_initrd = value;
343 #if 0 /// UNNEEDED by elogind
344 /* hey glibc, APIs with callbacks without a user pointer are so useless */
345 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
346 int (*compar) (const void *, const void *, void *), void *arg) {
355 p = (const char *) base + idx * size;
356 comparison = compar(key, p, arg);
359 else if (comparison > 0)
367 int on_ac_power(void) {
368 bool found_offline = false, found_online = false;
369 _cleanup_closedir_ DIR *d = NULL;
372 d = opendir("/sys/class/power_supply");
374 return errno == ENOENT ? true : -errno;
376 FOREACH_DIRENT(de, d, return -errno) {
377 _cleanup_close_ int fd = -1, device = -1;
381 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
383 if (IN_SET(errno, ENOENT, ENOTDIR))
389 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
397 n = read(fd, contents, sizeof(contents));
401 if (n != 6 || memcmp(contents, "Mains\n", 6))
405 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
413 n = read(fd, contents, sizeof(contents));
417 if (n != 2 || contents[1] != '\n')
420 if (contents[0] == '1') {
423 } else if (contents[0] == '0')
424 found_offline = true;
429 return found_online || !found_offline;
433 int container_get_leader(const char *machine, pid_t *pid) {
434 _cleanup_free_ char *s = NULL, *class = NULL;
442 if (!machine_name_is_valid(machine))
445 p = strjoina("/run/systemd/machines/", machine);
446 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
454 if (!streq_ptr(class, "container"))
457 r = parse_pid(s, &leader);
467 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
468 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
476 mntns = procfs_file_alloca(pid, "ns/mnt");
477 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
485 pidns = procfs_file_alloca(pid, "ns/pid");
486 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
494 netns = procfs_file_alloca(pid, "ns/net");
495 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
503 userns = procfs_file_alloca(pid, "ns/user");
504 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
505 if (usernsfd < 0 && errno != ENOENT)
512 root = procfs_file_alloca(pid, "root");
513 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
528 *userns_fd = usernsfd;
533 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
538 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
539 if (userns_fd >= 0) {
540 /* Can't setns to your own userns, since then you could
541 * escalate from non-root to root in your own namespace, so
542 * check if namespaces equal before attempting to enter. */
543 _cleanup_free_ char *userns_fd_path = NULL;
545 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
548 r = files_same(userns_fd_path, "/proc/self/ns/user", 0);
556 if (setns(pidns_fd, CLONE_NEWPID) < 0)
560 if (setns(mntns_fd, CLONE_NEWNS) < 0)
564 if (setns(netns_fd, CLONE_NEWNET) < 0)
568 if (setns(userns_fd, CLONE_NEWUSER) < 0)
572 if (fchdir(root_fd) < 0)
579 return reset_uid_gid();
582 uint64_t physical_memory(void) {
583 _cleanup_free_ char *root = NULL, *value = NULL;
588 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
591 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
592 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
594 sc = sysconf(_SC_PHYS_PAGES);
598 mem = (uint64_t) sc * (uint64_t) ps;
600 if (cg_get_root_path(&root) < 0)
603 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
606 if (safe_atou64(value, &lim) < 0)
609 /* Make sure the limit is a multiple of our own page size */
613 return MIN(mem, lim);
616 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
617 uint64_t p, m, ps, r;
621 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
622 * the result is a multiple of the page size (rounds down). */
627 p = physical_memory() / ps;
643 uint64_t system_tasks_max(void) {
645 #if SIZEOF_PID_T == 4
646 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
647 #elif SIZEOF_PID_T == 2
648 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
650 #error "Unknown pid_t size"
653 _cleanup_free_ char *value = NULL, *root = NULL;
654 uint64_t a = TASKS_MAX, b = TASKS_MAX;
656 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
659 * a) the maximum value for the pid_t type
660 * b) the cgroups pids_max attribute for the system
661 * c) the kernel's configure maximum PID value
663 * And then pick the smallest of the three */
665 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
666 (void) safe_atou64(value, &a);
668 if (cg_get_root_path(&root) >= 0) {
669 value = mfree(value);
671 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
672 (void) safe_atou64(value, &b);
675 return MIN3(TASKS_MAX,
676 a <= 0 ? TASKS_MAX : a,
677 b <= 0 ? TASKS_MAX : b);
680 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
685 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
686 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
688 t = system_tasks_max();
692 if (m / t != v) /* overflow? */
698 #if 0 /// UNNEEDED by elogind
699 int update_reboot_parameter_and_warn(const char *param) {
702 if (isempty(param)) {
703 if (unlink("/run/systemd/reboot-param") < 0) {
707 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
713 RUN_WITH_UMASK(0022) {
714 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
716 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
724 puts(PACKAGE_STRING "\n"
729 #if 0 /// UNNEEDED by elogind
730 int get_block_device(const char *path, dev_t *dev) {
737 /* Get's the block device directly backing a file system. If
738 * the block device is encrypted, returns the device mapper
741 if (lstat(path, &st))
744 if (major(st.st_dev) != 0) {
749 if (statfs(path, &sfs) < 0)
752 if (F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC))
753 return btrfs_get_block_device(path, dev);
758 int get_block_device_harder(const char *path, dev_t *dev) {
759 _cleanup_closedir_ DIR *d = NULL;
760 _cleanup_free_ char *p = NULL, *t = NULL;
761 struct dirent *de, *found = NULL;
770 /* Gets the backing block device for a file system, and
771 * handles LUKS encrypted file systems, looking for its
772 * immediate parent, if there is one. */
774 r = get_block_device(path, &dt);
778 if (asprintf(&p, "/sys/dev/block/%u:%u/slaves", major(dt), minor(dt)) < 0)
789 FOREACH_DIRENT_ALL(de, d, return -errno) {
791 if (dot_or_dot_dot(de->d_name))
794 if (!IN_SET(de->d_type, DT_LNK, DT_UNKNOWN))
798 _cleanup_free_ char *u = NULL, *v = NULL, *a = NULL, *b = NULL;
800 /* We found a device backed by multiple other devices. We don't really support automatic
801 * discovery on such setups, with the exception of dm-verity partitions. In this case there are
802 * two backing devices: the data partition and the hash partition. We are fine with such
803 * setups, however, only if both partitions are on the same physical device. Hence, let's
806 u = strjoin(p, "/", de->d_name, "/../dev");
810 v = strjoin(p, "/", found->d_name, "/../dev");
814 r = read_one_line_file(u, &a);
816 log_debug_errno(r, "Failed to read %s: %m", u);
820 r = read_one_line_file(v, &b);
822 log_debug_errno(r, "Failed to read %s: %m", v);
826 /* Check if the parent device is the same. If not, then the two backing devices are on
827 * different physical devices, and we don't support that. */
838 q = strjoina(p, "/", found->d_name, "/dev");
840 r = read_one_line_file(q, &t);
846 if (sscanf(t, "%u:%u", &maj, &min) != 2)
852 *dev = makedev(maj, min);