1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <linux/oom.h>
29 #include <stdio_ext.h>
33 #include <sys/personality.h>
34 #include <sys/prctl.h>
35 #include <sys/types.h>
39 #if HAVE_VALGRIND_VALGRIND_H
40 #include <valgrind/valgrind.h>
43 #include "alloc-util.h"
44 //#include "architecture.h"
53 #include "process-util.h"
54 //#include "raw-clone.h"
55 #include "signal-util.h"
56 //#include "stat-util.h"
57 #include "string-table.h"
58 #include "string-util.h"
59 #include "user-util.h"
62 int get_process_state(pid_t pid) {
66 _cleanup_free_ char *line = NULL;
70 p = procfs_file_alloca(pid, "stat");
72 r = read_one_line_file(p, &line);
78 p = strrchr(line, ')');
84 if (sscanf(p, " %c", &state) != 1)
87 return (unsigned char) state;
90 int get_process_comm(pid_t pid, char **name) {
97 p = procfs_file_alloca(pid, "comm");
99 r = read_one_line_file(p, name);
106 int get_process_cmdline(pid_t pid, size_t max_length, bool comm_fallback, char **line) {
107 _cleanup_fclose_ FILE *f = NULL;
109 char *k, *ans = NULL;
116 /* Retrieves a process' command line. Replaces unprintable characters while doing so by whitespace (coalescing
117 * multiple sequential ones into one). If max_length is != 0 will return a string of the specified size at most
118 * (the trailing NUL byte does count towards the length here!), abbreviated with a "..." ellipsis. If
119 * comm_fallback is true and the process has no command line set (the case for kernel threads), or has a
120 * command line that resolves to the empty string will return the "comm" name of the process instead.
122 * Returns -ESRCH if the process doesn't exist, and -ENOENT if the process has no command line (and
123 * comm_fallback is false). Returns 0 and sets *line otherwise. */
125 p = procfs_file_alloca(pid, "cmdline");
134 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
136 if (max_length == 1) {
138 /* If there's only room for one byte, return the empty string */
146 } else if (max_length == 0) {
147 size_t len = 0, allocated = 0;
149 while ((c = getc(f)) != EOF) {
151 if (!GREEDY_REALLOC(ans, allocated, len+3)) {
173 bool dotdotdot = false;
176 ans = new(char, max_length);
182 while ((c = getc(f)) != EOF) {
209 if (max_length <= 4) {
213 k = ans + max_length - 4;
216 /* Eat up final spaces */
217 while (k > ans && isspace(k[-1])) {
223 strncpy(k, "...", left-1);
229 /* Kernel threads have no argv[] */
231 _cleanup_free_ char *t = NULL;
239 h = get_process_comm(pid, &t);
244 ans = strjoin("[", t, "]");
250 if (l + 3 <= max_length)
251 ans = strjoin("[", t, "]");
252 else if (max_length <= 6) {
254 ans = new(char, max_length);
258 memcpy(ans, "[...]", max_length-1);
259 ans[max_length-1] = 0;
263 t[max_length - 6] = 0;
265 /* Chop off final spaces */
267 while (e > t && isspace(e[-1]))
271 ans = strjoin("[", t, "...]");
282 #if 0 /// UNNEEDED by elogind
283 int rename_process(const char name[]) {
284 static size_t mm_size = 0;
285 static char *mm = NULL;
286 bool truncated = false;
289 /* This is a like a poor man's setproctitle(). It changes the comm field, argv[0], and also the glibc's
290 * internally used name of the process. For the first one a limit of 16 chars applies; to the second one in
291 * many cases one of 10 (i.e. length of "/sbin/init") — however if we have CAP_SYS_RESOURCES it is unbounded;
292 * to the third one 7 (i.e. the length of "systemd". If you pass a longer string it will likely be
295 * Returns 0 if a name was set but truncated, > 0 if it was set but not truncated. */
298 return -EINVAL; /* let's not confuse users unnecessarily with an empty name */
302 /* First step, change the comm field. */
303 (void) prctl(PR_SET_NAME, name);
304 if (l > 15) /* Linux process names can be 15 chars at max */
307 /* Second step, change glibc's ID of the process name. */
308 if (program_invocation_name) {
311 k = strlen(program_invocation_name);
312 strncpy(program_invocation_name, name, k);
317 /* Third step, completely replace the argv[] array the kernel maintains for us. This requires privileges, but
318 * has the advantage that the argv[] array is exactly what we want it to be, and not filled up with zeros at
319 * the end. This is the best option for changing /proc/self/cmdline. */
321 /* Let's not bother with this if we don't have euid == 0. Strictly speaking we should check for the
322 * CAP_SYS_RESOURCE capability which is independent of the euid. In our own code the capability generally is
323 * present only for euid == 0, hence let's use this as quick bypass check, to avoid calling mmap() if
324 * PR_SET_MM_ARG_{START,END} fails with EPERM later on anyway. After all geteuid() is dead cheap to call, but
327 log_debug("Skipping PR_SET_MM, as we don't have privileges.");
328 else if (mm_size < l+1) {
332 nn_size = PAGE_ALIGN(l+1);
333 nn = mmap(NULL, nn_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
334 if (nn == MAP_FAILED) {
335 log_debug_errno(errno, "mmap() failed: %m");
339 strncpy(nn, name, nn_size);
341 /* Now, let's tell the kernel about this new memory */
342 if (prctl(PR_SET_MM, PR_SET_MM_ARG_START, (unsigned long) nn, 0, 0) < 0) {
343 log_debug_errno(errno, "PR_SET_MM_ARG_START failed, proceeding without: %m");
344 (void) munmap(nn, nn_size);
348 /* And update the end pointer to the new end, too. If this fails, we don't really know what to do, it's
349 * pretty unlikely that we can rollback, hence we'll just accept the failure, and continue. */
350 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) nn + l + 1, 0, 0) < 0)
351 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
354 (void) munmap(mm, mm_size);
359 strncpy(mm, name, mm_size);
361 /* Update the end pointer, continuing regardless of any failure. */
362 if (prctl(PR_SET_MM, PR_SET_MM_ARG_END, (unsigned long) mm + l + 1, 0, 0) < 0)
363 log_debug_errno(errno, "PR_SET_MM_ARG_END failed, proceeding without: %m");
367 /* Fourth step: in all cases we'll also update the original argv[], so that our own code gets it right too if
368 * it still looks here */
370 if (saved_argc > 0) {
376 k = strlen(saved_argv[0]);
377 strncpy(saved_argv[0], name, k);
382 for (i = 1; i < saved_argc; i++) {
386 memzero(saved_argv[i], strlen(saved_argv[i]));
394 int is_kernel_thread(pid_t pid) {
401 if (IN_SET(pid, 0, 1) || pid == getpid_cached()) /* pid 1, and we ourselves certainly aren't a kernel thread */
406 p = procfs_file_alloca(pid, "cmdline");
414 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
416 count = fread(&c, 1, 1, f);
420 /* Kernel threads have an empty cmdline */
423 return eof ? 1 : -errno;
428 #if 0 /// UNNEEDED by elogind
429 int get_process_capeff(pid_t pid, char **capeff) {
436 p = procfs_file_alloca(pid, "status");
438 r = get_proc_field(p, "CapEff", WHITESPACE, capeff);
446 static int get_process_link_contents(const char *proc_file, char **name) {
452 r = readlink_malloc(proc_file, name);
461 int get_process_exe(pid_t pid, char **name) {
468 p = procfs_file_alloca(pid, "exe");
469 r = get_process_link_contents(p, name);
473 d = endswith(*name, " (deleted)");
480 #if 0 /// UNNEEDED by elogind
481 static int get_process_id(pid_t pid, const char *field, uid_t *uid) {
482 _cleanup_fclose_ FILE *f = NULL;
492 p = procfs_file_alloca(pid, "status");
500 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
502 FOREACH_LINE(line, f, return -errno) {
507 if (startswith(l, field)) {
509 l += strspn(l, WHITESPACE);
511 l[strcspn(l, WHITESPACE)] = 0;
513 return parse_uid(l, uid);
520 int get_process_uid(pid_t pid, uid_t *uid) {
522 if (pid == 0 || pid == getpid_cached()) {
527 return get_process_id(pid, "Uid:", uid);
530 int get_process_gid(pid_t pid, gid_t *gid) {
532 if (pid == 0 || pid == getpid_cached()) {
537 assert_cc(sizeof(uid_t) == sizeof(gid_t));
538 return get_process_id(pid, "Gid:", gid);
541 int get_process_cwd(pid_t pid, char **cwd) {
546 p = procfs_file_alloca(pid, "cwd");
548 return get_process_link_contents(p, cwd);
551 int get_process_root(pid_t pid, char **root) {
556 p = procfs_file_alloca(pid, "root");
558 return get_process_link_contents(p, root);
561 int get_process_environ(pid_t pid, char **env) {
562 _cleanup_fclose_ FILE *f = NULL;
563 _cleanup_free_ char *outcome = NULL;
566 size_t allocated = 0, sz = 0;
571 p = procfs_file_alloca(pid, "environ");
580 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
582 while ((c = fgetc(f)) != EOF) {
583 if (!GREEDY_REALLOC(outcome, allocated, sz + 5))
587 outcome[sz++] = '\n';
589 sz += cescape_char(c, outcome + sz);
593 outcome = strdup("");
605 int get_process_ppid(pid_t pid, pid_t *_ppid) {
607 _cleanup_free_ char *line = NULL;
614 if (pid == 0 || pid == getpid_cached()) {
619 p = procfs_file_alloca(pid, "stat");
620 r = read_one_line_file(p, &line);
626 /* Let's skip the pid and comm fields. The latter is enclosed
627 * in () but does not escape any () in its value, so let's
628 * skip over it manually */
630 p = strrchr(line, ')');
642 if ((long unsigned) (pid_t) ppid != ppid)
645 *_ppid = (pid_t) ppid;
651 int wait_for_terminate(pid_t pid, siginfo_t *status) {
662 if (waitid(P_PID, pid, status, WEXITED) < 0) {
667 return negative_errno();
676 * < 0 : wait_for_terminate() failed to get the state of the
677 * process, the process was terminated by a signal, or
678 * failed for an unknown reason.
679 * >=0 : The process terminated normally, and its exit code is
682 * That is, success is indicated by a return value of zero, and an
683 * error is indicated by a non-zero value.
685 * A warning is emitted if the process terminates abnormally,
686 * and also if it returns non-zero unless check_exit_code is true.
688 int wait_for_terminate_and_warn(const char *name, pid_t pid, bool check_exit_code) {
695 r = wait_for_terminate(pid, &status);
697 return log_warning_errno(r, "Failed to wait for %s: %m", name);
699 if (status.si_code == CLD_EXITED) {
700 if (status.si_status != 0)
701 log_full(check_exit_code ? LOG_WARNING : LOG_DEBUG,
702 "%s failed with error code %i.", name, status.si_status);
704 log_debug("%s succeeded.", name);
706 return status.si_status;
707 } else if (IN_SET(status.si_code, CLD_KILLED, CLD_DUMPED)) {
709 log_warning("%s terminated by signal %s.", name, signal_to_string(status.si_status));
713 log_warning("%s failed due to unknown reason.", name);
717 #if 0 /// UNNEEDED by elogind
718 void sigkill_wait(pid_t pid) {
721 if (kill(pid, SIGKILL) > 0)
722 (void) wait_for_terminate(pid, NULL);
725 void sigkill_waitp(pid_t *pid) {
734 int kill_and_sigcont(pid_t pid, int sig) {
737 r = kill(pid, sig) < 0 ? -errno : 0;
739 /* If this worked, also send SIGCONT, unless we already just sent a SIGCONT, or SIGKILL was sent which isn't
740 * affected by a process being suspended anyway. */
741 if (r >= 0 && !IN_SET(sig, SIGCONT, SIGKILL))
742 (void) kill(pid, SIGCONT);
748 int getenv_for_pid(pid_t pid, const char *field, char **_value) {
749 _cleanup_fclose_ FILE *f = NULL;
760 path = procfs_file_alloca(pid, "environ");
762 f = fopen(path, "re");
769 (void) __fsetlocking(f, FSETLOCKING_BYCALLER);
778 for (i = 0; i < sizeof(line)-1; i++) {
782 if (_unlikely_(c == EOF)) {
792 if (strneq(line, field, l) && line[l] == '=') {
793 value = strdup(line + l + 1);
807 bool pid_is_unwaited(pid_t pid) {
808 /* Checks whether a PID is still valid at all, including a zombie */
813 if (pid <= 1) /* If we or PID 1 would be dead and have been waited for, this code would not be running */
816 if (pid == getpid_cached())
819 if (kill(pid, 0) >= 0)
822 return errno != ESRCH;
825 bool pid_is_alive(pid_t pid) {
828 /* Checks whether a PID is still valid and not a zombie */
833 if (pid <= 1) /* If we or PID 1 would be a zombie, this code would not be running */
836 if (pid == getpid_cached())
839 r = get_process_state(pid);
840 if (IN_SET(r, -ESRCH, 'Z'))
846 #if 0 /// UNNEEDED by elogind
847 int pid_from_same_root_fs(pid_t pid) {
853 if (pid == 0 || pid == getpid_cached())
856 root = procfs_file_alloca(pid, "root");
858 return files_same(root, "/proc/1/root", 0);
862 bool is_main_thread(void) {
863 static thread_local int cached = 0;
865 if (_unlikely_(cached == 0))
866 cached = getpid_cached() == gettid() ? 1 : -1;
871 #if 0 /// UNNEEDED by elogind
872 noreturn void freeze(void) {
876 /* Make sure nobody waits for us on a socket anymore */
877 close_all_fds(NULL, 0);
885 bool oom_score_adjust_is_valid(int oa) {
886 return oa >= OOM_SCORE_ADJ_MIN && oa <= OOM_SCORE_ADJ_MAX;
889 unsigned long personality_from_string(const char *p) {
893 return PERSONALITY_INVALID;
895 /* Parse a personality specifier. We use our own identifiers that indicate specific ABIs, rather than just
896 * hints regarding the register size, since we want to keep things open for multiple locally supported ABIs for
897 * the same register size. */
899 architecture = architecture_from_string(p);
900 if (architecture < 0)
901 return PERSONALITY_INVALID;
903 if (architecture == native_architecture())
905 #ifdef SECONDARY_ARCHITECTURE
906 if (architecture == SECONDARY_ARCHITECTURE)
910 return PERSONALITY_INVALID;
913 const char* personality_to_string(unsigned long p) {
914 int architecture = _ARCHITECTURE_INVALID;
917 architecture = native_architecture();
918 #ifdef SECONDARY_ARCHITECTURE
919 else if (p == PER_LINUX32)
920 architecture = SECONDARY_ARCHITECTURE;
923 if (architecture < 0)
926 return architecture_to_string(architecture);
929 int safe_personality(unsigned long p) {
932 /* So here's the deal, personality() is weirdly defined by glibc. In some cases it returns a failure via errno,
933 * and in others as negative return value containing an errno-like value. Let's work around this: this is a
934 * wrapper that uses errno if it is set, and uses the return value otherwise. And then it sets both errno and
935 * the return value indicating the same issue, so that we are definitely on the safe side.
937 * See https://github.com/systemd/systemd/issues/6737 */
940 ret = personality(p);
951 int opinionated_personality(unsigned long *ret) {
954 /* Returns the current personality, or PERSONALITY_INVALID if we can't determine it. This function is a bit
955 * opinionated though, and ignores all the finer-grained bits and exotic personalities, only distinguishing the
956 * two most relevant personalities: PER_LINUX and PER_LINUX32. */
958 current = safe_personality(PERSONALITY_INVALID);
962 if (((unsigned long) current & 0xffff) == PER_LINUX32)
970 void valgrind_summary_hack(void) {
971 #if HAVE_VALGRIND_VALGRIND_H
972 if (getpid_cached() == 1 && RUNNING_ON_VALGRIND) {
974 pid = raw_clone(SIGCHLD);
976 log_emergency_errno(errno, "Failed to fork off valgrind helper: %m");
980 log_info("Spawned valgrind helper as PID "PID_FMT".", pid);
981 (void) wait_for_terminate(pid, NULL);
987 int pid_compare_func(const void *a, const void *b) {
988 const pid_t *p = a, *q = b;
990 /* Suitable for usage in qsort() */
999 int ioprio_parse_priority(const char *s, int *ret) {
1005 r = safe_atoi(s, &i);
1009 if (!ioprio_priority_is_valid(i))
1017 /* The cached PID, possible values:
1019 * == UNSET [0] → cache not initialized yet
1020 * == BUSY [-1] → some thread is initializing it at the moment
1021 * any other → the cached PID
1024 #define CACHED_PID_UNSET ((pid_t) 0)
1025 #define CACHED_PID_BUSY ((pid_t) -1)
1027 static pid_t cached_pid = CACHED_PID_UNSET;
1029 static void reset_cached_pid(void) {
1030 /* Invoked in the child after a fork(), i.e. at the first moment the PID changed */
1031 cached_pid = CACHED_PID_UNSET;
1034 /* We use glibc __register_atfork() + __dso_handle directly here, as they are not included in the glibc
1035 * headers. __register_atfork() is mostly equivalent to pthread_atfork(), but doesn't require us to link against
1036 * libpthread, as it is part of glibc anyway. */
1038 extern int __register_atfork(void (*prepare) (void), void (*parent) (void), void (*child) (void), void * __dso_handle);
1039 extern void* __dso_handle __attribute__ ((__weak__));
1040 #endif // ifdef __GLIBC__
1042 pid_t getpid_cached(void) {
1043 pid_t current_value;
1045 /* getpid_cached() is much like getpid(), but caches the value in local memory, to avoid having to invoke a
1046 * system call each time. This restores glibc behaviour from before 2.24, when getpid() was unconditionally
1047 * cached. Starting with 2.24 getpid() started to become prohibitively expensive when used for detecting when
1048 * objects were used across fork()s. With this caching the old behaviour is somewhat restored.
1050 * https://bugzilla.redhat.com/show_bug.cgi?id=1443976
1051 * https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=c579f48edba88380635ab98cb612030e3ed8691e
1054 current_value = __sync_val_compare_and_swap(&cached_pid, CACHED_PID_UNSET, CACHED_PID_BUSY);
1056 switch (current_value) {
1058 case CACHED_PID_UNSET: { /* Not initialized yet, then do so now */
1063 if (__register_atfork(NULL, NULL, reset_cached_pid, __dso_handle) != 0) {
1064 /* OOM? Let's try again later */
1065 cached_pid = CACHED_PID_UNSET;
1069 cached_pid = new_pid;
1073 case CACHED_PID_BUSY: /* Somebody else is currently initializing */
1076 default: /* Properly initialized */
1077 return current_value;
1081 int must_be_root(void) {
1086 log_error("Need to be root.");
1090 #if 0 /// UNNEEDED by elogind
1091 static const char *const ioprio_class_table[] = {
1092 [IOPRIO_CLASS_NONE] = "none",
1093 [IOPRIO_CLASS_RT] = "realtime",
1094 [IOPRIO_CLASS_BE] = "best-effort",
1095 [IOPRIO_CLASS_IDLE] = "idle"
1098 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(ioprio_class, int, INT_MAX);
1100 static const char *const sigchld_code_table[] = {
1101 [CLD_EXITED] = "exited",
1102 [CLD_KILLED] = "killed",
1103 [CLD_DUMPED] = "dumped",
1104 [CLD_TRAPPED] = "trapped",
1105 [CLD_STOPPED] = "stopped",
1106 [CLD_CONTINUED] = "continued",
1109 DEFINE_STRING_TABLE_LOOKUP(sigchld_code, int);
1111 static const char* const sched_policy_table[] = {
1112 [SCHED_OTHER] = "other",
1113 [SCHED_BATCH] = "batch",
1114 [SCHED_IDLE] = "idle",
1115 [SCHED_FIFO] = "fifo",
1119 DEFINE_STRING_TABLE_LOOKUP_WITH_FALLBACK(sched_policy, int, INT_MAX);