2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 //#include <sys/mman.h>
31 #include <sys/prctl.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 //#include <sys/types.h>
37 #include "alloc-util.h"
39 #include "cgroup-util.h"
41 #include "dirent-util.h"
44 //#include "formats-util.h"
46 #include "hostname-util.h"
49 //#include "missing.h"
50 #include "parse-util.h"
51 //#include "path-util.h"
52 #include "process-util.h"
54 #include "signal-util.h"
55 #include "stat-util.h"
56 #include "string-util.h"
58 #include "time-util.h"
59 #include "umask-util.h"
60 #include "user-util.h"
63 /* Put this test here for a lack of better place */
64 assert_cc(EAGAIN == EWOULDBLOCK);
67 char **saved_argv = NULL;
68 static int saved_in_initrd = -1;
70 size_t page_size(void) {
71 static thread_local size_t pgsz = 0;
74 if (_likely_(pgsz > 0))
77 r = sysconf(_SC_PAGESIZE);
84 static int do_execute(char **directories, usec_t timeout, char *argv[]) {
85 _cleanup_hashmap_free_free_ Hashmap *pids = NULL;
86 _cleanup_set_free_free_ Set *seen = NULL;
89 /* We fork this all off from a child process so that we can
90 * somewhat cleanly make use of SIGALRM to set a time limit */
92 (void) reset_all_signal_handlers();
93 (void) reset_signal_mask();
95 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
97 pids = hashmap_new(NULL);
101 seen = set_new(&string_hash_ops);
105 STRV_FOREACH(directory, directories) {
106 _cleanup_closedir_ DIR *d;
109 d = opendir(*directory);
114 return log_error_errno(errno, "Failed to open directory %s: %m", *directory);
117 FOREACH_DIRENT(de, d, break) {
118 _cleanup_free_ char *path = NULL;
122 if (!dirent_is_file(de))
125 if (set_contains(seen, de->d_name)) {
126 log_debug("%1$s/%2$s skipped (%2$s was already seen).", *directory, de->d_name);
130 r = set_put_strdup(seen, de->d_name);
134 path = strjoin(*directory, "/", de->d_name, NULL);
138 if (null_or_empty_path(path)) {
139 log_debug("%s is empty (a mask).", path);
145 log_error_errno(errno, "Failed to fork: %m");
147 } else if (pid == 0) {
150 assert_se(prctl(PR_SET_PDEATHSIG, SIGTERM) == 0);
160 return log_error_errno(errno, "Failed to execute %s: %m", path);
163 log_debug("Spawned %s as " PID_FMT ".", path, pid);
165 r = hashmap_put(pids, PID_TO_PTR(pid), path);
172 /* Abort execution of this process after the timout. We simply
173 * rely on SIGALRM as default action terminating the process,
174 * and turn on alarm(). */
176 if (timeout != USEC_INFINITY)
177 alarm((timeout + USEC_PER_SEC - 1) / USEC_PER_SEC);
179 while (!hashmap_isempty(pids)) {
180 _cleanup_free_ char *path = NULL;
183 pid = PTR_TO_PID(hashmap_first_key(pids));
186 path = hashmap_remove(pids, PID_TO_PTR(pid));
189 wait_for_terminate_and_warn(path, pid, true);
195 void execute_directories(const char* const* directories, usec_t timeout, char *argv[]) {
199 char **dirs = (char**) directories;
201 assert(!strv_isempty(dirs));
203 name = basename(dirs[0]);
204 assert(!isempty(name));
206 /* Executes all binaries in the directories in parallel and waits
207 * for them to finish. Optionally a timeout is applied. If a file
208 * with the same name exists in more than one directory, the
209 * earliest one wins. */
211 executor_pid = fork();
212 if (executor_pid < 0) {
213 log_error_errno(errno, "Failed to fork: %m");
216 } else if (executor_pid == 0) {
217 r = do_execute(dirs, timeout, argv);
218 _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
221 wait_for_terminate_and_warn(name, executor_pid, true);
224 #if 0 /// UNNEEDED by elogind
225 bool plymouth_running(void) {
226 return access("/run/plymouth/pid", F_OK) >= 0;
230 bool display_is_local(const char *display) {
239 int socket_from_display(const char *display, char **path) {
246 if (!display_is_local(display))
249 k = strspn(display+1, "0123456789");
251 f = new(char, strlen("/tmp/.X11-unix/X") + k + 1);
255 c = stpcpy(f, "/tmp/.X11-unix/X");
256 memcpy(c, display+1, k);
264 #if 0 /// UNNEEDED by elogind
265 int block_get_whole_disk(dev_t d, dev_t *ret) {
272 /* If it has a queue this is good enough for us */
273 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", major(d), minor(d)) < 0)
284 /* If it is a partition find the originating device */
285 if (asprintf(&p, "/sys/dev/block/%u:%u/partition", major(d), minor(d)) < 0)
294 /* Get parent dev_t */
295 if (asprintf(&p, "/sys/dev/block/%u:%u/../dev", major(d), minor(d)) < 0)
298 r = read_one_line_file(p, &s);
304 r = sscanf(s, "%u:%u", &m, &n);
310 /* Only return this if it is really good enough for us. */
311 if (asprintf(&p, "/sys/dev/block/%u:%u/queue", m, n) < 0)
318 *ret = makedev(m, n);
325 bool kexec_loaded(void) {
329 if (read_one_line_file("/sys/kernel/kexec_loaded", &s) >= 0) {
337 int prot_from_flags(int flags) {
339 switch (flags & O_ACCMODE) {
348 return PROT_READ|PROT_WRITE;
356 int fork_agent(pid_t *pid, const int except[], unsigned n_except, const char *path, ...) {
357 bool stdout_is_tty, stderr_is_tty;
358 pid_t parent_pid, agent_pid;
359 sigset_t ss, saved_ss;
367 /* Spawns a temporary TTY agent, making sure it goes away when
370 parent_pid = getpid();
372 /* First we temporarily block all signals, so that the new
373 * child has them blocked initially. This way, we can be sure
374 * that SIGTERMs are not lost we might send to the agent. */
375 assert_se(sigfillset(&ss) >= 0);
376 assert_se(sigprocmask(SIG_SETMASK, &ss, &saved_ss) >= 0);
380 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
384 if (agent_pid != 0) {
385 assert_se(sigprocmask(SIG_SETMASK, &saved_ss, NULL) >= 0);
392 * Make sure the agent goes away when the parent dies */
393 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
396 /* Make sure we actually can kill the agent, if we need to, in
397 * case somebody invoked us from a shell script that trapped
398 * SIGTERM or so... */
399 (void) reset_all_signal_handlers();
400 (void) reset_signal_mask();
402 /* Check whether our parent died before we were able
403 * to set the death signal and unblock the signals */
404 if (getppid() != parent_pid)
407 /* Don't leak fds to the agent */
408 close_all_fds(except, n_except);
410 stdout_is_tty = isatty(STDOUT_FILENO);
411 stderr_is_tty = isatty(STDERR_FILENO);
413 if (!stdout_is_tty || !stderr_is_tty) {
416 /* Detach from stdout/stderr. and reopen
417 * /dev/tty for them. This is important to
418 * ensure that when systemctl is started via
419 * popen() or a similar call that expects to
420 * read EOF we actually do generate EOF and
421 * not delay this indefinitely by because we
422 * keep an unused copy of stdin around. */
423 fd = open("/dev/tty", O_WRONLY);
425 log_error_errno(errno, "Failed to open /dev/tty: %m");
429 if (!stdout_is_tty && dup2(fd, STDOUT_FILENO) < 0) {
430 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
434 if (!stderr_is_tty && dup2(fd, STDERR_FILENO) < 0) {
435 log_error_errno(errno, "Failed to dup2 /dev/tty: %m");
439 if (fd > STDERR_FILENO)
443 /* Count arguments */
445 for (n = 0; va_arg(ap, char*); n++)
450 l = alloca(sizeof(char *) * (n + 1));
452 /* Fill in arguments */
454 for (i = 0; i <= n; i++)
455 l[i] = va_arg(ap, char*);
462 bool in_initrd(void) {
465 if (saved_in_initrd >= 0)
466 return saved_in_initrd;
468 /* We make two checks here:
470 * 1. the flag file /etc/initrd-release must exist
471 * 2. the root file system must be a memory file system
473 * The second check is extra paranoia, since misdetecting an
474 * initrd can have bad bad consequences due the initrd
475 * emptying when transititioning to the main systemd.
478 saved_in_initrd = access("/etc/initrd-release", F_OK) >= 0 &&
479 statfs("/", &s) >= 0 &&
482 return saved_in_initrd;
485 void in_initrd_force(bool value) {
486 saved_in_initrd = value;
489 #if 0 /// UNNEEDED by elogind
490 /* hey glibc, APIs with callbacks without a user pointer are so useless */
491 void *xbsearch_r(const void *key, const void *base, size_t nmemb, size_t size,
492 int (*compar) (const void *, const void *, void *), void *arg) {
501 p = (void *)(((const char *) base) + (idx * size));
502 comparison = compar(key, p, arg);
505 else if (comparison > 0)
513 int on_ac_power(void) {
514 bool found_offline = false, found_online = false;
515 _cleanup_closedir_ DIR *d = NULL;
517 d = opendir("/sys/class/power_supply");
519 return errno == ENOENT ? true : -errno;
523 _cleanup_close_ int fd = -1, device = -1;
529 if (!de && errno > 0)
535 if (hidden_or_backup_file(de->d_name))
538 device = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY|O_CLOEXEC|O_NOCTTY);
540 if (errno == ENOENT || errno == ENOTDIR)
546 fd = openat(device, "type", O_RDONLY|O_CLOEXEC|O_NOCTTY);
554 n = read(fd, contents, sizeof(contents));
558 if (n != 6 || memcmp(contents, "Mains\n", 6))
562 fd = openat(device, "online", O_RDONLY|O_CLOEXEC|O_NOCTTY);
570 n = read(fd, contents, sizeof(contents));
574 if (n != 2 || contents[1] != '\n')
577 if (contents[0] == '1') {
580 } else if (contents[0] == '0')
581 found_offline = true;
586 return found_online || !found_offline;
590 int container_get_leader(const char *machine, pid_t *pid) {
591 _cleanup_free_ char *s = NULL, *class = NULL;
599 if (!machine_name_is_valid(machine))
602 p = strjoina("/run/systemd/machines/", machine);
603 r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
611 if (!streq_ptr(class, "container"))
614 r = parse_pid(s, &leader);
624 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
625 _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
633 mntns = procfs_file_alloca(pid, "ns/mnt");
634 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
642 pidns = procfs_file_alloca(pid, "ns/pid");
643 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
651 netns = procfs_file_alloca(pid, "ns/net");
652 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
660 userns = procfs_file_alloca(pid, "ns/user");
661 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
662 if (usernsfd < 0 && errno != ENOENT)
669 root = procfs_file_alloca(pid, "root");
670 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
685 *userns_fd = usernsfd;
690 pidnsfd = mntnsfd = netnsfd = usernsfd = -1;
695 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
696 if (userns_fd >= 0) {
697 /* Can't setns to your own userns, since then you could
698 * escalate from non-root to root in your own namespace, so
699 * check if namespaces equal before attempting to enter. */
700 _cleanup_free_ char *userns_fd_path = NULL;
702 if (asprintf(&userns_fd_path, "/proc/self/fd/%d", userns_fd) < 0)
705 r = files_same(userns_fd_path, "/proc/self/ns/user");
713 if (setns(pidns_fd, CLONE_NEWPID) < 0)
717 if (setns(mntns_fd, CLONE_NEWNS) < 0)
721 if (setns(netns_fd, CLONE_NEWNET) < 0)
725 if (setns(userns_fd, CLONE_NEWUSER) < 0)
729 if (fchdir(root_fd) < 0)
736 return reset_uid_gid();
739 uint64_t physical_memory(void) {
740 _cleanup_free_ char *root = NULL, *value = NULL;
745 /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
748 * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
749 * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
751 sc = sysconf(_SC_PHYS_PAGES);
755 mem = (uint64_t) sc * (uint64_t) ps;
757 if (cg_get_root_path(&root) < 0)
760 if (cg_get_attribute("memory", root, "memory.limit_in_bytes", &value))
763 if (safe_atou64(value, &lim) < 0)
766 /* Make sure the limit is a multiple of our own page size */
770 return MIN(mem, lim);
773 uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
774 uint64_t p, m, ps, r;
778 /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
779 * the result is a multiple of the page size (rounds down). */
784 p = physical_memory() / ps;
800 uint64_t system_tasks_max(void) {
802 #if SIZEOF_PID_T == 4
803 #define TASKS_MAX ((uint64_t) (INT32_MAX-1))
804 #elif SIZEOF_PID_T == 2
805 #define TASKS_MAX ((uint64_t) (INT16_MAX-1))
807 #error "Unknown pid_t size"
810 _cleanup_free_ char *value = NULL, *root = NULL;
811 uint64_t a = TASKS_MAX, b = TASKS_MAX;
813 /* Determine the maximum number of tasks that may run on this system. We check three sources to determine this
816 * a) the maximum value for the pid_t type
817 * b) the cgroups pids_max attribute for the system
818 * c) the kernel's configure maximum PID value
820 * And then pick the smallest of the three */
822 if (read_one_line_file("/proc/sys/kernel/pid_max", &value) >= 0)
823 (void) safe_atou64(value, &a);
825 if (cg_get_root_path(&root) >= 0) {
826 value = mfree(value);
828 if (cg_get_attribute("pids", root, "pids.max", &value) >= 0)
829 (void) safe_atou64(value, &b);
832 return MIN3(TASKS_MAX,
833 a <= 0 ? TASKS_MAX : a,
834 b <= 0 ? TASKS_MAX : b);
837 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
842 /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
843 * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
845 t = system_tasks_max();
849 if (m / t != v) /* overflow? */
855 #if 0 /// UNNEEDED by elogind
856 int update_reboot_parameter_and_warn(const char *param) {
859 if (isempty(param)) {
860 if (unlink("/run/systemd/reboot-param") < 0) {
864 return log_warning_errno(errno, "Failed to unlink reboot parameter file: %m");
870 RUN_WITH_UMASK(0022) {
871 r = write_string_file("/run/systemd/reboot-param", param, WRITE_STRING_FILE_CREATE);
873 return log_warning_errno(r, "Failed to write reboot parameter file: %m");
881 puts(PACKAGE_STRING "\n"