2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 //#include <sys/mman.h>
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 //#include <sys/types.h>
37 #include "alloc-util.h"
39 #include "cgroup-util.h"
41 #include "dirent-util.h"
44 //#include "formats-util.h"
46 #include "hostname-util.h"
49 //#include "missing.h"
50 #include "parse-util.h"
51 //#include "path-util.h"
52 #include "process-util.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-util.h"
58 #include "time-util.h"
59 #include "umask-util.h"
60 #include "user-util.h"
64 char **saved_argv = NULL;
65 static int saved_in_initrd = -1;
67 size_t page_size(void) {
68 static thread_local size_t pgsz = 0;
71 if (_likely_(pgsz > 0))
74 r = sysconf(_SC_PAGESIZE);
81 #if 0 /// UNNEEDED by elogind
82 bool plymouth_running(void) {
83 return access("/run/plymouth/pid", F_OK) >= 0;
87 bool display_is_local(const char *display) {
96 int socket_from_display(const char *display, char **path) {
103 if (!display_is_local(display))
106 k = strspn(display+1, "0123456789");
108 f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
112 c = stpcpy(f, "/tmp/.X11-unix/X");
113 memcpy(c, display+1, k);
121 #if 0 /// UNNEEDED by elogind
122 int block_get_whole_disk(dev_t d, dev_t *ret) {
129 /* If it has a queue this is good enough for us */
130 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
141 /* If it is a partition find the originating device */
142 if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
151 /* Get parent dev_t */
152 if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
155 r = read_one_line_file(p, &s);
161 r = sscanf(s, "%u:%u", &m, &n);
167 /* Only return this if it is really good enough for us. */
168 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
175 *ret = makedev(m, n);
182 bool kexec_loaded(void) {
186 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) {
194 int prot_from_flags(int flags) {
196 switch (flags & O_ACCMODE) {
205 return PROT_READ|PROT_WRITE;
213 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
214 bool stdout_is_tty, stderr_is_tty;
215 pid_t parent_pid, agent_pid;
216 sigset_t ss, saved_ss;
224 /* Spawns a temporary TTY agent, making sure it goes away when
227 parent_pid = getpid();
229 /* First we temporarily block all signals, so that the new
230 * child has them blocked initially. This way, we can be sure
231 * that SIGTERMs are not lost we might send to the agent. */
232 assert_se(sigfillset(&ss) >= 0);
233 assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
237 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
241 if (agent_pid != 0) {
242 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
249 * Make sure the agent goes away when the parent dies */
250 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
253 /* Make sure we actually can kill the agent, if we need to, in
254 * case somebody invoked us from a shell script that trapped
255 * SIGTERM or so... */
256 (void) reset_all_signal_handlers();
257 (void) reset_signal_mask();
259 /* Check whether our parent died before we were able
260 * to set the death signal and unblock the signals */
261 if (getppid() != parent_pid)
264 /* Don't leak fds to the agent */
265 close_all_fds(except, n_except);
267 stdout_is_tty = isatty(STDOUT_FILENO);
268 stderr_is_tty = isatty(STDERR_FILENO);
270 if (!stdout_is_tty || !stderr_is_tty) {
273 /* Detach from stdout/stderr. and reopen
274 * /dev/tty for them. This is important to
275 * ensure that when systemctl is started via
276 * popen() or a similar call that expects to
277 * read EOF we actually do generate EOF and
278 * not delay this indefinitely by because we
279 * keep an unused copy of stdin around. */
280 fd = open("/dev/tty", O_WRONLY);
282 log_error_errno(errno, "Failed to open /dev/tty: %m");
286 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
287 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
291 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
292 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
296 if (fd > STDERR_FILENO)
300 /* Count arguments */
302 for (n = 0; va_arg(ap, char*); n++)
307 l = alloca(sizeof(char *) * (n + 1));
309 /* Fill in arguments */
311 for (i = 0; i <= n; i++)
312 l[i] = va_arg(ap, char*);
319 bool in_initrd(void) {
322 if (saved_in_initrd >= 0)
323 return saved_in_initrd;
325 /* We make two checks here:
327 * 1. the flag file /etc/initrd-release must exist
328 * 2. the root file system must be a memory file system
330 * The second check is extra paranoia, since misdetecting an
331 * initrd can have bad consequences due the initrd
332 * emptying when transititioning to the main systemd.
335 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
336 statfs("/", &s) >= 0 &&
339 return saved_in_initrd;
342 void in_initrd_force(bool value) {
343 saved_in_initrd = value;
346 #if 0 /// UNNEEDED by elogind
347 /* hey glibc, APIs with callbacks without a user pointer are so useless */
348 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
349 int (*compar) (const void *, const void *, void *), void *arg) {
358 p = (const char *) base + idx * size;
359 comparison = compar(key, p, arg);
362 else if (comparison > 0)
370 int on_ac_power(void) {
371 bool found_offline = false, found_online = false;
372 _cleanup_closedir_ DIR *d = NULL;
375 d = opendir("/sys/class/power_supply");
377 return errno == ENOENT ? true : -errno;
379 FOREACH_DIRENT(de, d, return -errno) {
380 _cleanup_close_ int fd = -1, device = -1;
384 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
386 if (errno == ENOENT || errno == ENOTDIR)
392 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
400 n = read(fd, contents, sizeof(contents));
404 if (n != 6 || memcmp(contents, "Mains\n", 6))
408 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
416 n = read(fd, contents, sizeof(contents));
420 if (n != 2 || contents[1] != '\n')
423 if (contents[0] == '1') {
426 } else if (contents[0] == '0')
427 found_offline = true;
432 return found_online || !found_offline;
436 int container_get_leader(const char *machine, pid_t *pid) {
437 _cleanup_free_ char *s = NULL, *class = NULL;
445 if (!machine_name_is_valid(machine))
448 p = strjoina("/run/systemd/machines/", machine);
449 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
457 if (!streq_ptr(class, "container"))
460 r = parse_pid(s, &leader);
470 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
471 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
479 mntns = procfs_file_alloca(pid, "ns/mnt");
480 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
488 pidns = procfs_file_alloca(pid, "ns/pid");
489 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
497 netns = procfs_file_alloca(pid, "ns/net");
498 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
506 userns = procfs_file_alloca(pid, "ns/user");
507 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
508 if (usernsfd < 0 && errno != ENOENT)
515 root = procfs_file_alloca(pid, "root");
516 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
531 *userns_fd = usernsfd;
536 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
541 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
542 if (userns_fd >= 0) {
543 /* Can't setns to your own userns, since then you could
544 * escalate from non-root to root in your own namespace, so
545 * check if namespaces equal before attempting to enter. */
546 _cleanup_free_ char *userns_fd_path = NULL;
548 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
551 r = files_same(userns_fd_path, "/proc/self/ns/user");
559 if (setns(pidns_fd, CLONE_NEWPID) < 0)
563 if (setns(mntns_fd, CLONE_NEWNS) < 0)
567 if (setns(netns_fd, CLONE_NEWNET) < 0)
571 if (setns(userns_fd, CLONE_NEWUSER) < 0)
575 if (fchdir(root_fd) < 0)
582 return reset_uid_gid();
585 uint64_t physical_memory(void) {
586 _cleanup_free_ char *root = NULL, *value = NULL;
591 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
594 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
595 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
597 sc = sysconf(_SC_PHYS_PAGES);
601 mem = (uint64_t) sc * (uint64_t) ps;
603 if (cg_get_root_path(&root) < 0)
606 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
609 if (safe_atou64(value, &lim) < 0)
612 /* Make sure the limit is a multiple of our own page size */
616 return MIN(mem, lim);
619 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
620 uint64_t p, m, ps, r;
624 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
625 * the result is a multiple of the page size (rounds down). */
630 p = physical_memory() / ps;
646 uint64_t system_tasks_max(void) {
648 #if SIZEOF_PID_T == 4
649 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
650 #elif SIZEOF_PID_T == 2
651 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
653 #error "Unknown pid_t size"
656 _cleanup_free_ char *value = NULL, *root = NULL;
657 uint64_t a = TASKS_MAX, b = TASKS_MAX;
659 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
662 * a) the maximum value for the pid_t type
663 * b) the cgroups pids_max attribute for the system
664 * c) the kernel's configure maximum PID value
666 * And then pick the smallest of the three */
668 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
669 (void) safe_atou64(value, &a);
671 if (cg_get_root_path(&root) >= 0) {
672 value = mfree(value);
674 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
675 (void) safe_atou64(value, &b);
678 return MIN3(TASKS_MAX,
679 a <= 0 ? TASKS_MAX : a,
680 b <= 0 ? TASKS_MAX : b);
683 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
688 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
689 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
691 t = system_tasks_max();
695 if (m / t != v) /* overflow? */
701 #if 0 /// UNNEEDED by elogind
702 int update_reboot_parameter_and_warn(const char *param) {
705 if (isempty(param)) {
706 if (unlink("/run/systemd/reboot-param") < 0) {
710 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
716 RUN_WITH_UMASK(0022) {
717 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
719 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
727 puts(PACKAGE_STRING "\n"