1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
74 #include "utmp-wtmp.h"
76 #include "path-util.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-endpoint.h"
92 #include "seccomp-util.h"
95 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
96 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
98 /* This assumes there is a 'tty' group */
101 #define SNDBUF_SIZE (8*1024*1024)
103 static int shift_fds(int fds[], unsigned n_fds) {
104 int start, restart_from;
109 /* Modifies the fds array! (sorts it) */
119 for (i = start; i < (int) n_fds; i++) {
122 /* Already at right index? */
126 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
132 /* Hmm, the fd we wanted isn't free? Then
133 * let's remember that and try again from here */
134 if (nfd != i+3 && restart_from < 0)
138 if (restart_from < 0)
141 start = restart_from;
147 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
156 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
158 for (i = 0; i < n_fds; i++) {
160 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
163 /* We unconditionally drop FD_CLOEXEC from the fds,
164 * since after all we want to pass these fds to our
167 if ((r = fd_cloexec(fds[i], false)) < 0)
174 _pure_ static const char *tty_path(const ExecContext *context) {
177 if (context->tty_path)
178 return context->tty_path;
180 return "/dev/console";
183 static void exec_context_tty_reset(const ExecContext *context) {
186 if (context->tty_vhangup)
187 terminal_vhangup(tty_path(context));
189 if (context->tty_reset)
190 reset_terminal(tty_path(context));
192 if (context->tty_vt_disallocate && context->tty_path)
193 vt_disallocate(context->tty_path);
196 static bool is_terminal_output(ExecOutput o) {
198 o == EXEC_OUTPUT_TTY ||
199 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
201 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
204 static int open_null_as(int flags, int nfd) {
209 fd = open("/dev/null", flags|O_NOCTTY);
214 r = dup2(fd, nfd) < 0 ? -errno : nfd;
222 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
223 union sockaddr_union sa = {
224 .un.sun_family = AF_UNIX,
225 .un.sun_path = "/run/systemd/journal/stdout",
227 uid_t olduid = UID_INVALID;
228 gid_t oldgid = GID_INVALID;
231 if (gid != GID_INVALID) {
239 if (uid != UID_INVALID) {
249 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
253 /* If we fail to restore the uid or gid, things will likely
254 fail later on. This should only happen if an LSM interferes. */
256 if (uid != UID_INVALID)
257 (void) seteuid(olduid);
260 if (gid != GID_INVALID)
261 (void) setegid(oldgid);
266 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
270 assert(output < _EXEC_OUTPUT_MAX);
274 fd = socket(AF_UNIX, SOCK_STREAM, 0);
278 r = connect_journal_socket(fd, uid, gid);
282 if (shutdown(fd, SHUT_RD) < 0) {
287 fd_inc_sndbuf(fd, SNDBUF_SIZE);
297 context->syslog_identifier ? context->syslog_identifier : ident,
299 context->syslog_priority,
300 !!context->syslog_level_prefix,
301 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
302 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
303 is_terminal_output(output));
306 r = dup2(fd, nfd) < 0 ? -errno : nfd;
313 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
319 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
323 r = dup2(fd, nfd) < 0 ? -errno : nfd;
331 static bool is_terminal_input(ExecInput i) {
333 i == EXEC_INPUT_TTY ||
334 i == EXEC_INPUT_TTY_FORCE ||
335 i == EXEC_INPUT_TTY_FAIL;
338 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
340 if (is_terminal_input(std_input) && !apply_tty_stdin)
341 return EXEC_INPUT_NULL;
343 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
344 return EXEC_INPUT_NULL;
349 static int fixup_output(ExecOutput std_output, int socket_fd) {
351 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
352 return EXEC_OUTPUT_INHERIT;
357 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
362 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366 case EXEC_INPUT_NULL:
367 return open_null_as(O_RDONLY, STDIN_FILENO);
370 case EXEC_INPUT_TTY_FORCE:
371 case EXEC_INPUT_TTY_FAIL: {
374 fd = acquire_terminal(tty_path(context),
375 i == EXEC_INPUT_TTY_FAIL,
376 i == EXEC_INPUT_TTY_FORCE,
382 if (fd != STDIN_FILENO) {
383 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
391 case EXEC_INPUT_SOCKET:
392 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
395 assert_not_reached("Unknown input type");
399 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
407 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
408 o = fixup_output(context->std_output, socket_fd);
410 if (fileno == STDERR_FILENO) {
412 e = fixup_output(context->std_error, socket_fd);
414 /* This expects the input and output are already set up */
416 /* Don't change the stderr file descriptor if we inherit all
417 * the way and are not on a tty */
418 if (e == EXEC_OUTPUT_INHERIT &&
419 o == EXEC_OUTPUT_INHERIT &&
420 i == EXEC_INPUT_NULL &&
421 !is_terminal_input(context->std_input) &&
425 /* Duplicate from stdout if possible */
426 if (e == o || e == EXEC_OUTPUT_INHERIT)
427 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
431 } else if (o == EXEC_OUTPUT_INHERIT) {
432 /* If input got downgraded, inherit the original value */
433 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
434 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
436 /* If the input is connected to anything that's not a /dev/null, inherit that... */
437 if (i != EXEC_INPUT_NULL)
438 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
440 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
444 /* We need to open /dev/null here anew, to get the right access mode. */
445 return open_null_as(O_WRONLY, fileno);
450 case EXEC_OUTPUT_NULL:
451 return open_null_as(O_WRONLY, fileno);
453 case EXEC_OUTPUT_TTY:
454 if (is_terminal_input(i))
455 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
457 /* We don't reset the terminal if this is just about output */
458 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
460 case EXEC_OUTPUT_SYSLOG:
461 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
462 case EXEC_OUTPUT_KMSG:
463 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
464 case EXEC_OUTPUT_JOURNAL:
465 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
466 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
468 log_unit_struct(unit_id,
470 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
471 fileno == STDOUT_FILENO ? "stdout" : "stderr",
472 unit_id, strerror(-r)),
475 r = open_null_as(O_WRONLY, fileno);
479 case EXEC_OUTPUT_SOCKET:
480 assert(socket_fd >= 0);
481 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
484 assert_not_reached("Unknown error type");
488 static int chown_terminal(int fd, uid_t uid) {
493 /* This might fail. What matters are the results. */
494 (void) fchown(fd, uid, -1);
495 (void) fchmod(fd, TTY_MODE);
497 if (fstat(fd, &st) < 0)
500 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
506 static int setup_confirm_stdio(int *_saved_stdin,
507 int *_saved_stdout) {
508 int fd = -1, saved_stdin, saved_stdout = -1, r;
510 assert(_saved_stdin);
511 assert(_saved_stdout);
513 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
517 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
518 if (saved_stdout < 0) {
523 fd = acquire_terminal(
528 DEFAULT_CONFIRM_USEC);
534 r = chown_terminal(fd, getuid());
538 if (dup2(fd, STDIN_FILENO) < 0) {
543 if (dup2(fd, STDOUT_FILENO) < 0) {
551 *_saved_stdin = saved_stdin;
552 *_saved_stdout = saved_stdout;
557 safe_close(saved_stdout);
558 safe_close(saved_stdin);
564 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
565 _cleanup_close_ int fd = -1;
570 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
574 va_start(ap, format);
575 vdprintf(fd, format, ap);
581 static int restore_confirm_stdio(int *saved_stdin,
587 assert(saved_stdout);
591 if (*saved_stdin >= 0)
592 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
595 if (*saved_stdout >= 0)
596 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
599 safe_close(*saved_stdin);
600 safe_close(*saved_stdout);
605 static int ask_for_confirmation(char *response, char **argv) {
606 int saved_stdout = -1, saved_stdin = -1, r;
607 _cleanup_free_ char *line = NULL;
609 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
613 line = exec_command_line(argv);
617 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
619 restore_confirm_stdio(&saved_stdin, &saved_stdout);
624 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
625 bool keep_groups = false;
630 /* Lookup and set GID and supplementary group list. Here too
631 * we avoid NSS lookups for gid=0. */
633 if (context->group || username) {
635 if (context->group) {
636 const char *g = context->group;
638 if ((r = get_group_creds(&g, &gid)) < 0)
642 /* First step, initialize groups from /etc/groups */
643 if (username && gid != 0) {
644 if (initgroups(username, gid) < 0)
650 /* Second step, set our gids */
651 if (setresgid(gid, gid, gid) < 0)
655 if (context->supplementary_groups) {
660 /* Final step, initialize any manually set supplementary groups */
661 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
663 if (!(gids = new(gid_t, ngroups_max)))
667 if ((k = getgroups(ngroups_max, gids)) < 0) {
674 STRV_FOREACH(i, context->supplementary_groups) {
677 if (k >= ngroups_max) {
683 r = get_group_creds(&g, gids+k);
692 if (setgroups(k, gids) < 0) {
703 static int enforce_user(const ExecContext *context, uid_t uid) {
706 /* Sets (but doesn't lookup) the uid and make sure we keep the
707 * capabilities while doing so. */
709 if (context->capabilities) {
710 _cleanup_cap_free_ cap_t d = NULL;
711 static const cap_value_t bits[] = {
712 CAP_SETUID, /* Necessary so that we can run setresuid() below */
713 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
716 /* First step: If we need to keep capabilities but
717 * drop privileges we need to make sure we keep our
718 * caps, while we drop privileges. */
720 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
722 if (prctl(PR_GET_SECUREBITS) != sb)
723 if (prctl(PR_SET_SECUREBITS, sb) < 0)
727 /* Second step: set the capabilities. This will reduce
728 * the capabilities to the minimum we need. */
730 d = cap_dup(context->capabilities);
734 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
735 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
738 if (cap_set_proc(d) < 0)
742 /* Third step: actually set the uids */
743 if (setresuid(uid, uid, uid) < 0)
746 /* At this point we should have all necessary capabilities but
747 are otherwise a normal user. However, the caps might got
748 corrupted due to the setresuid() so we need clean them up
749 later. This is done outside of this call. */
756 static int null_conv(
758 const struct pam_message **msg,
759 struct pam_response **resp,
762 /* We don't support conversations */
767 static int setup_pam(
773 int fds[], unsigned n_fds) {
775 static const struct pam_conv conv = {
780 pam_handle_t *handle = NULL;
782 int pam_code = PAM_SUCCESS;
785 bool close_session = false;
786 pid_t pam_pid = 0, parent_pid;
793 /* We set up PAM in the parent process, then fork. The child
794 * will then stay around until killed via PR_GET_PDEATHSIG or
795 * systemd via the cgroup logic. It will then remove the PAM
796 * session again. The parent process will exec() the actual
797 * daemon. We do things this way to ensure that the main PID
798 * of the daemon is the one we initially fork()ed. */
800 if (log_get_max_level() < LOG_DEBUG)
803 pam_code = pam_start(name, user, &conv, &handle);
804 if (pam_code != PAM_SUCCESS) {
810 pam_code = pam_set_item(handle, PAM_TTY, tty);
811 if (pam_code != PAM_SUCCESS)
815 pam_code = pam_acct_mgmt(handle, flags);
816 if (pam_code != PAM_SUCCESS)
819 pam_code = pam_open_session(handle, flags);
820 if (pam_code != PAM_SUCCESS)
823 close_session = true;
825 e = pam_getenvlist(handle);
827 pam_code = PAM_BUF_ERR;
831 /* Block SIGTERM, so that we know that it won't get lost in
833 if (sigemptyset(&ss) < 0 ||
834 sigaddset(&ss, SIGTERM) < 0 ||
835 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
838 parent_pid = getpid();
848 /* The child's job is to reset the PAM session on
851 /* This string must fit in 10 chars (i.e. the length
852 * of "/sbin/init"), to look pretty in /bin/ps */
853 rename_process("(sd-pam)");
855 /* Make sure we don't keep open the passed fds in this
856 child. We assume that otherwise only those fds are
857 open here that have been opened by PAM. */
858 close_many(fds, n_fds);
860 /* Drop privileges - we don't need any to pam_close_session
861 * and this will make PR_SET_PDEATHSIG work in most cases.
862 * If this fails, ignore the error - but expect sd-pam threads
863 * to fail to exit normally */
864 if (setresuid(uid, uid, uid) < 0)
865 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
867 /* Wait until our parent died. This will only work if
868 * the above setresuid() succeeds, otherwise the kernel
869 * will not allow unprivileged parents kill their privileged
870 * children this way. We rely on the control groups kill logic
871 * to do the rest for us. */
872 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
875 /* Check if our parent process might already have
877 if (getppid() == parent_pid) {
879 if (sigwait(&ss, &sig) < 0) {
886 assert(sig == SIGTERM);
891 /* If our parent died we'll end the session */
892 if (getppid() != parent_pid) {
893 pam_code = pam_close_session(handle, flags);
894 if (pam_code != PAM_SUCCESS)
901 pam_end(handle, pam_code | flags);
905 /* If the child was forked off successfully it will do all the
906 * cleanups, so forget about the handle here. */
909 /* Unblock SIGTERM again in the parent */
910 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
913 /* We close the log explicitly here, since the PAM modules
914 * might have opened it, but we don't want this fd around. */
923 if (pam_code != PAM_SUCCESS) {
924 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
925 err = -EPERM; /* PAM errors do not map to errno */
927 log_error_errno(errno, "PAM failed: %m");
933 pam_code = pam_close_session(handle, flags);
935 pam_end(handle, pam_code | flags);
943 kill(pam_pid, SIGTERM);
944 kill(pam_pid, SIGCONT);
951 static void rename_process_from_path(const char *path) {
952 char process_name[11];
956 /* This resulting string must fit in 10 chars (i.e. the length
957 * of "/sbin/init") to look pretty in /bin/ps */
961 rename_process("(...)");
967 /* The end of the process name is usually more
968 * interesting, since the first bit might just be
974 process_name[0] = '(';
975 memcpy(process_name+1, p, l);
976 process_name[1+l] = ')';
977 process_name[1+l+1] = 0;
979 rename_process(process_name);
984 static int apply_seccomp(const ExecContext *c) {
985 uint32_t negative_action, action;
986 scmp_filter_ctx *seccomp;
993 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
995 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
999 if (c->syscall_archs) {
1001 SET_FOREACH(id, c->syscall_archs, i) {
1002 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1010 r = seccomp_add_secondary_archs(seccomp);
1015 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1016 SET_FOREACH(id, c->syscall_filter, i) {
1017 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1022 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1026 r = seccomp_load(seccomp);
1029 seccomp_release(seccomp);
1033 static int apply_address_families(const ExecContext *c) {
1034 scmp_filter_ctx *seccomp;
1040 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1044 r = seccomp_add_secondary_archs(seccomp);
1048 if (c->address_families_whitelist) {
1049 int af, first = 0, last = 0;
1052 /* If this is a whitelist, we first block the address
1053 * families that are out of range and then everything
1054 * that is not in the set. First, we find the lowest
1055 * and highest address family in the set. */
1057 SET_FOREACH(afp, c->address_families, i) {
1058 af = PTR_TO_INT(afp);
1060 if (af <= 0 || af >= af_max())
1063 if (first == 0 || af < first)
1066 if (last == 0 || af > last)
1070 assert((first == 0) == (last == 0));
1074 /* No entries in the valid range, block everything */
1075 r = seccomp_rule_add(
1077 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1085 /* Block everything below the first entry */
1086 r = seccomp_rule_add(
1088 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1091 SCMP_A0(SCMP_CMP_LT, first));
1095 /* Block everything above the last entry */
1096 r = seccomp_rule_add(
1098 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1101 SCMP_A0(SCMP_CMP_GT, last));
1105 /* Block everything between the first and last
1107 for (af = 1; af < af_max(); af++) {
1109 if (set_contains(c->address_families, INT_TO_PTR(af)))
1112 r = seccomp_rule_add(
1114 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1117 SCMP_A0(SCMP_CMP_EQ, af));
1126 /* If this is a blacklist, then generate one rule for
1127 * each address family that are then combined in OR
1130 SET_FOREACH(af, c->address_families, i) {
1132 r = seccomp_rule_add(
1134 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1137 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1143 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1147 r = seccomp_load(seccomp);
1150 seccomp_release(seccomp);
1156 static void do_idle_pipe_dance(int idle_pipe[4]) {
1160 safe_close(idle_pipe[1]);
1161 safe_close(idle_pipe[2]);
1163 if (idle_pipe[0] >= 0) {
1166 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1168 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1169 /* Signal systemd that we are bored and want to continue. */
1170 write(idle_pipe[3], "x", 1);
1172 /* Wait for systemd to react to the signal above. */
1173 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1176 safe_close(idle_pipe[0]);
1180 safe_close(idle_pipe[3]);
1183 static int build_environment(
1184 const ExecContext *c,
1186 usec_t watchdog_usec,
1188 const char *username,
1192 _cleanup_strv_free_ char **our_env = NULL;
1199 our_env = new0(char*, 10);
1204 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1206 our_env[n_env++] = x;
1208 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1210 our_env[n_env++] = x;
1213 if (watchdog_usec > 0) {
1214 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1216 our_env[n_env++] = x;
1218 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1220 our_env[n_env++] = x;
1224 x = strappend("HOME=", home);
1227 our_env[n_env++] = x;
1231 x = strappend("LOGNAME=", username);
1234 our_env[n_env++] = x;
1236 x = strappend("USER=", username);
1239 our_env[n_env++] = x;
1243 x = strappend("SHELL=", shell);
1246 our_env[n_env++] = x;
1249 if (is_terminal_input(c->std_input) ||
1250 c->std_output == EXEC_OUTPUT_TTY ||
1251 c->std_error == EXEC_OUTPUT_TTY ||
1254 x = strdup(default_term_for_tty(tty_path(c)));
1257 our_env[n_env++] = x;
1260 our_env[n_env++] = NULL;
1261 assert(n_env <= 10);
1269 static int exec_child(ExecCommand *command,
1270 const ExecContext *context,
1271 const ExecParameters *params,
1272 ExecRuntime *runtime,
1275 int *fds, unsigned n_fds,
1279 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1280 _cleanup_free_ char *mac_selinux_context_net = NULL;
1281 const char *username = NULL, *home = NULL, *shell = NULL;
1282 unsigned n_dont_close = 0;
1283 int dont_close[n_fds + 4];
1284 uid_t uid = UID_INVALID;
1285 gid_t gid = GID_INVALID;
1293 rename_process_from_path(command->path);
1295 /* We reset exactly these signals, since they are the
1296 * only ones we set to SIG_IGN in the main daemon. All
1297 * others we leave untouched because we set them to
1298 * SIG_DFL or a valid handler initially, both of which
1299 * will be demoted to SIG_DFL. */
1300 default_signals(SIGNALS_CRASH_HANDLER,
1301 SIGNALS_IGNORE, -1);
1303 if (context->ignore_sigpipe)
1304 ignore_signals(SIGPIPE, -1);
1306 err = reset_signal_mask();
1308 *error = EXIT_SIGNAL_MASK;
1312 if (params->idle_pipe)
1313 do_idle_pipe_dance(params->idle_pipe);
1315 /* Close sockets very early to make sure we don't
1316 * block init reexecution because it cannot bind its
1321 dont_close[n_dont_close++] = socket_fd;
1323 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324 n_dont_close += n_fds;
1326 if (params->bus_endpoint_fd >= 0)
1327 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1329 if (runtime->netns_storage_socket[0] >= 0)
1330 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331 if (runtime->netns_storage_socket[1] >= 0)
1332 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1335 err = close_all_fds(dont_close, n_dont_close);
1341 if (!context->same_pgrp)
1343 *error = EXIT_SETSID;
1347 exec_context_tty_reset(context);
1349 if (params->confirm_spawn) {
1352 err = ask_for_confirmation(&response, argv);
1353 if (err == -ETIMEDOUT)
1354 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1356 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1357 else if (response == 's') {
1358 write_confirm_message("Skipping execution.\n");
1359 *error = EXIT_CONFIRM;
1361 } else if (response == 'n') {
1362 write_confirm_message("Failing execution.\n");
1368 if (context->user) {
1369 username = context->user;
1370 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1377 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378 * must sure to drop O_NONBLOCK */
1380 fd_nonblock(socket_fd, false);
1382 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1384 *error = EXIT_STDIN;
1388 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1390 *error = EXIT_STDOUT;
1394 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1396 *error = EXIT_STDERR;
1400 if (params->cgroup_path) {
1401 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1403 *error = EXIT_CGROUP;
1408 if (context->oom_score_adjust_set) {
1411 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1414 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1415 *error = EXIT_OOM_ADJUST;
1420 if (context->nice_set)
1421 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1426 if (context->cpu_sched_set) {
1427 struct sched_param param = {
1428 .sched_priority = context->cpu_sched_priority,
1431 err = sched_setscheduler(0,
1432 context->cpu_sched_policy |
1433 (context->cpu_sched_reset_on_fork ?
1434 SCHED_RESET_ON_FORK : 0),
1437 *error = EXIT_SETSCHEDULER;
1442 if (context->cpuset)
1443 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1444 *error = EXIT_CPUAFFINITY;
1448 if (context->ioprio_set)
1449 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1450 *error = EXIT_IOPRIO;
1454 if (context->timer_slack_nsec != NSEC_INFINITY)
1455 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1456 *error = EXIT_TIMERSLACK;
1460 if (context->personality != 0xffffffffUL)
1461 if (personality(context->personality) < 0) {
1462 *error = EXIT_PERSONALITY;
1466 if (context->utmp_id)
1467 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1469 if (context->user && is_terminal_input(context->std_input)) {
1470 err = chown_terminal(STDIN_FILENO, uid);
1472 *error = EXIT_STDIN;
1478 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1479 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1481 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1483 *error = EXIT_BUS_ENDPOINT;
1489 /* If delegation is enabled we'll pass ownership of the cgroup
1490 * (but only in systemd's own controller hierarchy!) to the
1491 * user of the new process. */
1492 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1493 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1495 *error = EXIT_CGROUP;
1500 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1502 *error = EXIT_CGROUP;
1507 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1510 STRV_FOREACH(rt, context->runtime_directory) {
1511 _cleanup_free_ char *p;
1513 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1515 *error = EXIT_RUNTIME_DIRECTORY;
1519 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1521 *error = EXIT_RUNTIME_DIRECTORY;
1527 if (params->apply_permissions) {
1528 err = enforce_groups(context, username, gid);
1530 *error = EXIT_GROUP;
1535 umask(context->umask);
1538 if (params->apply_permissions && context->pam_name && username) {
1539 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1547 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1548 err = setup_netns(runtime->netns_storage_socket);
1550 *error = EXIT_NETWORK;
1555 if (!strv_isempty(context->read_write_dirs) ||
1556 !strv_isempty(context->read_only_dirs) ||
1557 !strv_isempty(context->inaccessible_dirs) ||
1558 context->mount_flags != 0 ||
1559 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1560 params->bus_endpoint_path ||
1561 context->private_devices ||
1562 context->protect_system != PROTECT_SYSTEM_NO ||
1563 context->protect_home != PROTECT_HOME_NO) {
1565 char *tmp = NULL, *var = NULL;
1567 /* The runtime struct only contains the parent
1568 * of the private /tmp, which is
1569 * non-accessible to world users. Inside of it
1570 * there's a /tmp that is sticky, and that's
1571 * the one we want to use here. */
1573 if (context->private_tmp && runtime) {
1574 if (runtime->tmp_dir)
1575 tmp = strappenda(runtime->tmp_dir, "/tmp");
1576 if (runtime->var_tmp_dir)
1577 var = strappenda(runtime->var_tmp_dir, "/tmp");
1580 err = setup_namespace(
1581 context->read_write_dirs,
1582 context->read_only_dirs,
1583 context->inaccessible_dirs,
1586 params->bus_endpoint_path,
1587 context->private_devices,
1588 context->protect_home,
1589 context->protect_system,
1590 context->mount_flags);
1593 log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1595 *error = EXIT_NAMESPACE;
1600 if (params->apply_chroot) {
1601 if (context->root_directory)
1602 if (chroot(context->root_directory) < 0) {
1603 *error = EXIT_CHROOT;
1607 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1608 *error = EXIT_CHDIR;
1612 _cleanup_free_ char *d = NULL;
1614 if (asprintf(&d, "%s/%s",
1615 context->root_directory ? context->root_directory : "",
1616 context->working_directory ? context->working_directory : "") < 0) {
1617 *error = EXIT_MEMORY;
1622 *error = EXIT_CHDIR;
1628 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1629 err = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1631 *error = EXIT_SELINUX_CONTEXT;
1637 /* We repeat the fd closing here, to make sure that
1638 * nothing is leaked from the PAM modules. Note that
1639 * we are more aggressive this time since socket_fd
1640 * and the netns fds we don't need anymore. The custom
1641 * endpoint fd was needed to upload the policy and can
1642 * now be closed as well. */
1643 err = close_all_fds(fds, n_fds);
1645 err = shift_fds(fds, n_fds);
1647 err = flags_fds(fds, n_fds, context->non_blocking);
1653 if (params->apply_permissions) {
1655 for (i = 0; i < _RLIMIT_MAX; i++) {
1656 if (!context->rlimit[i])
1659 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1660 *error = EXIT_LIMITS;
1665 if (context->capability_bounding_set_drop) {
1666 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1668 *error = EXIT_CAPABILITIES;
1674 if (context->smack_process_label) {
1675 err = mac_smack_apply_pid(0, context->smack_process_label);
1677 *error = EXIT_SMACK_PROCESS_LABEL;
1683 if (context->user) {
1684 err = enforce_user(context, uid);
1691 /* PR_GET_SECUREBITS is not privileged, while
1692 * PR_SET_SECUREBITS is. So to suppress
1693 * potential EPERMs we'll try not to call
1694 * PR_SET_SECUREBITS unless necessary. */
1695 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1696 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1697 *error = EXIT_SECUREBITS;
1701 if (context->capabilities)
1702 if (cap_set_proc(context->capabilities) < 0) {
1703 *error = EXIT_CAPABILITIES;
1707 if (context->no_new_privileges)
1708 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1709 *error = EXIT_NO_NEW_PRIVILEGES;
1714 if (context->address_families_whitelist ||
1715 !set_isempty(context->address_families)) {
1716 err = apply_address_families(context);
1718 *error = EXIT_ADDRESS_FAMILIES;
1723 if (context->syscall_whitelist ||
1724 !set_isempty(context->syscall_filter) ||
1725 !set_isempty(context->syscall_archs)) {
1726 err = apply_seccomp(context);
1728 *error = EXIT_SECCOMP;
1735 if (mac_selinux_use()) {
1736 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1739 err = setexeccon(exec_context);
1741 *error = EXIT_SELINUX_CONTEXT;
1748 #ifdef HAVE_APPARMOR
1749 if (context->apparmor_profile && mac_apparmor_use()) {
1750 err = aa_change_onexec(context->apparmor_profile);
1751 if (err < 0 && !context->apparmor_profile_ignore) {
1752 *error = EXIT_APPARMOR_PROFILE;
1759 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1761 *error = EXIT_MEMORY;
1765 final_env = strv_env_merge(5,
1766 params->environment,
1768 context->environment,
1773 *error = EXIT_MEMORY;
1777 final_argv = replace_env_argv(argv, final_env);
1779 *error = EXIT_MEMORY;
1783 final_env = strv_env_clean(final_env);
1785 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1786 _cleanup_free_ char *line;
1788 line = exec_command_line(final_argv);
1791 log_unit_struct(params->unit_id,
1793 "EXECUTABLE=%s", command->path,
1794 LOG_MESSAGE("Executing: %s", line),
1799 execve(command->path, final_argv, final_env);
1804 int exec_spawn(ExecCommand *command,
1805 const ExecContext *context,
1806 const ExecParameters *params,
1807 ExecRuntime *runtime,
1810 _cleanup_strv_free_ char **files_env = NULL;
1811 int *fds = NULL; unsigned n_fds = 0;
1821 assert(params->fds || params->n_fds <= 0);
1823 if (context->std_input == EXEC_INPUT_SOCKET ||
1824 context->std_output == EXEC_OUTPUT_SOCKET ||
1825 context->std_error == EXEC_OUTPUT_SOCKET) {
1827 if (params->n_fds != 1)
1830 socket_fd = params->fds[0];
1834 n_fds = params->n_fds;
1837 err = exec_context_load_environment(context, params->unit_id, &files_env);
1839 log_unit_struct(params->unit_id,
1841 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1847 argv = params->argv ?: command->argv;
1849 line = exec_command_line(argv);
1853 log_unit_struct(params->unit_id,
1855 "EXECUTABLE=%s", command->path,
1856 LOG_MESSAGE("About to execute: %s", line),
1867 err = exec_child(command,
1879 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1880 "EXECUTABLE=%s", command->path,
1881 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1882 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1883 command->path, strerror(-err)),
1892 log_unit_struct(params->unit_id,
1894 LOG_MESSAGE("Forked %s as "PID_FMT,
1895 command->path, pid),
1898 /* We add the new process to the cgroup both in the child (so
1899 * that we can be sure that no user code is ever executed
1900 * outside of the cgroup) and in the parent (so that we can be
1901 * sure that when we kill the cgroup the process will be
1903 if (params->cgroup_path)
1904 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1906 exec_status_start(&command->exec_status, pid);
1912 void exec_context_init(ExecContext *c) {
1916 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1917 c->cpu_sched_policy = SCHED_OTHER;
1918 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1919 c->syslog_level_prefix = true;
1920 c->ignore_sigpipe = true;
1921 c->timer_slack_nsec = NSEC_INFINITY;
1922 c->personality = 0xffffffffUL;
1923 c->runtime_directory_mode = 0755;
1926 void exec_context_done(ExecContext *c) {
1931 strv_free(c->environment);
1932 c->environment = NULL;
1934 strv_free(c->environment_files);
1935 c->environment_files = NULL;
1937 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1939 c->rlimit[l] = NULL;
1942 free(c->working_directory);
1943 c->working_directory = NULL;
1944 free(c->root_directory);
1945 c->root_directory = NULL;
1950 free(c->syslog_identifier);
1951 c->syslog_identifier = NULL;
1959 strv_free(c->supplementary_groups);
1960 c->supplementary_groups = NULL;
1965 if (c->capabilities) {
1966 cap_free(c->capabilities);
1967 c->capabilities = NULL;
1970 strv_free(c->read_only_dirs);
1971 c->read_only_dirs = NULL;
1973 strv_free(c->read_write_dirs);
1974 c->read_write_dirs = NULL;
1976 strv_free(c->inaccessible_dirs);
1977 c->inaccessible_dirs = NULL;
1980 CPU_FREE(c->cpuset);
1985 free(c->selinux_context);
1986 c->selinux_context = NULL;
1988 free(c->apparmor_profile);
1989 c->apparmor_profile = NULL;
1991 set_free(c->syscall_filter);
1992 c->syscall_filter = NULL;
1994 set_free(c->syscall_archs);
1995 c->syscall_archs = NULL;
1997 set_free(c->address_families);
1998 c->address_families = NULL;
2000 strv_free(c->runtime_directory);
2001 c->runtime_directory = NULL;
2003 bus_endpoint_free(c->bus_endpoint);
2004 c->bus_endpoint = NULL;
2007 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2012 if (!runtime_prefix)
2015 STRV_FOREACH(i, c->runtime_directory) {
2016 _cleanup_free_ char *p;
2018 p = strjoin(runtime_prefix, "/", *i, NULL);
2022 /* We execute this synchronously, since we need to be
2023 * sure this is gone when we start the service
2025 rm_rf(p, false, true, false);
2031 void exec_command_done(ExecCommand *c) {
2041 void exec_command_done_array(ExecCommand *c, unsigned n) {
2044 for (i = 0; i < n; i++)
2045 exec_command_done(c+i);
2048 ExecCommand* exec_command_free_list(ExecCommand *c) {
2052 LIST_REMOVE(command, c, i);
2053 exec_command_done(i);
2060 void exec_command_free_array(ExecCommand **c, unsigned n) {
2063 for (i = 0; i < n; i++)
2064 c[i] = exec_command_free_list(c[i]);
2067 typedef struct InvalidEnvInfo {
2068 const char *unit_id;
2072 static void invalid_env(const char *p, void *userdata) {
2073 InvalidEnvInfo *info = userdata;
2075 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2078 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2079 char **i, **r = NULL;
2084 STRV_FOREACH(i, c->environment_files) {
2087 bool ignore = false;
2089 _cleanup_globfree_ glob_t pglob = {};
2099 if (!path_is_absolute(fn)) {
2107 /* Filename supports globbing, take all matching files */
2109 if (glob(fn, 0, NULL, &pglob) != 0) {
2114 return errno ? -errno : -EINVAL;
2116 count = pglob.gl_pathc;
2124 for (n = 0; n < count; n++) {
2125 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2133 /* Log invalid environment variables with filename */
2135 InvalidEnvInfo info = {
2137 .path = pglob.gl_pathv[n]
2140 p = strv_env_clean_with_callback(p, invalid_env, &info);
2148 m = strv_env_merge(2, r, p);
2164 static bool tty_may_match_dev_console(const char *tty) {
2165 _cleanup_free_ char *active = NULL;
2168 if (startswith(tty, "/dev/"))
2171 /* trivial identity? */
2172 if (streq(tty, "console"))
2175 console = resolve_dev_console(&active);
2176 /* if we could not resolve, assume it may */
2180 /* "tty0" means the active VC, so it may be the same sometimes */
2181 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2184 bool exec_context_may_touch_console(ExecContext *ec) {
2185 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2186 is_terminal_input(ec->std_input) ||
2187 is_terminal_output(ec->std_output) ||
2188 is_terminal_output(ec->std_error)) &&
2189 tty_may_match_dev_console(tty_path(ec));
2192 static void strv_fprintf(FILE *f, char **l) {
2198 fprintf(f, " %s", *g);
2201 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2208 prefix = strempty(prefix);
2212 "%sWorkingDirectory: %s\n"
2213 "%sRootDirectory: %s\n"
2214 "%sNonBlocking: %s\n"
2215 "%sPrivateTmp: %s\n"
2216 "%sPrivateNetwork: %s\n"
2217 "%sPrivateDevices: %s\n"
2218 "%sProtectHome: %s\n"
2219 "%sProtectSystem: %s\n"
2220 "%sIgnoreSIGPIPE: %s\n",
2222 prefix, c->working_directory ? c->working_directory : "/",
2223 prefix, c->root_directory ? c->root_directory : "/",
2224 prefix, yes_no(c->non_blocking),
2225 prefix, yes_no(c->private_tmp),
2226 prefix, yes_no(c->private_network),
2227 prefix, yes_no(c->private_devices),
2228 prefix, protect_home_to_string(c->protect_home),
2229 prefix, protect_system_to_string(c->protect_system),
2230 prefix, yes_no(c->ignore_sigpipe));
2232 STRV_FOREACH(e, c->environment)
2233 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2235 STRV_FOREACH(e, c->environment_files)
2236 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2243 if (c->oom_score_adjust_set)
2245 "%sOOMScoreAdjust: %i\n",
2246 prefix, c->oom_score_adjust);
2248 for (i = 0; i < RLIM_NLIMITS; i++)
2250 fprintf(f, "%s%s: "RLIM_FMT"\n",
2251 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2253 if (c->ioprio_set) {
2254 _cleanup_free_ char *class_str = NULL;
2256 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2258 "%sIOSchedulingClass: %s\n"
2259 "%sIOPriority: %i\n",
2260 prefix, strna(class_str),
2261 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2264 if (c->cpu_sched_set) {
2265 _cleanup_free_ char *policy_str = NULL;
2267 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2269 "%sCPUSchedulingPolicy: %s\n"
2270 "%sCPUSchedulingPriority: %i\n"
2271 "%sCPUSchedulingResetOnFork: %s\n",
2272 prefix, strna(policy_str),
2273 prefix, c->cpu_sched_priority,
2274 prefix, yes_no(c->cpu_sched_reset_on_fork));
2278 fprintf(f, "%sCPUAffinity:", prefix);
2279 for (i = 0; i < c->cpuset_ncpus; i++)
2280 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2281 fprintf(f, " %u", i);
2285 if (c->timer_slack_nsec != NSEC_INFINITY)
2286 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2289 "%sStandardInput: %s\n"
2290 "%sStandardOutput: %s\n"
2291 "%sStandardError: %s\n",
2292 prefix, exec_input_to_string(c->std_input),
2293 prefix, exec_output_to_string(c->std_output),
2294 prefix, exec_output_to_string(c->std_error));
2300 "%sTTYVHangup: %s\n"
2301 "%sTTYVTDisallocate: %s\n",
2302 prefix, c->tty_path,
2303 prefix, yes_no(c->tty_reset),
2304 prefix, yes_no(c->tty_vhangup),
2305 prefix, yes_no(c->tty_vt_disallocate));
2307 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2308 c->std_output == EXEC_OUTPUT_KMSG ||
2309 c->std_output == EXEC_OUTPUT_JOURNAL ||
2310 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2311 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2312 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2313 c->std_error == EXEC_OUTPUT_SYSLOG ||
2314 c->std_error == EXEC_OUTPUT_KMSG ||
2315 c->std_error == EXEC_OUTPUT_JOURNAL ||
2316 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2317 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2318 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2320 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2322 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2323 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2326 "%sSyslogFacility: %s\n"
2327 "%sSyslogLevel: %s\n",
2328 prefix, strna(fac_str),
2329 prefix, strna(lvl_str));
2332 if (c->capabilities) {
2333 _cleanup_cap_free_charp_ char *t;
2335 t = cap_to_text(c->capabilities, NULL);
2337 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2341 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2343 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2344 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2345 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2346 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2347 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2348 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2350 if (c->capability_bounding_set_drop) {
2352 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2354 for (l = 0; l <= cap_last_cap(); l++)
2355 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2356 fprintf(f, " %s", strna(capability_to_name(l)));
2362 fprintf(f, "%sUser: %s\n", prefix, c->user);
2364 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2366 if (strv_length(c->supplementary_groups) > 0) {
2367 fprintf(f, "%sSupplementaryGroups:", prefix);
2368 strv_fprintf(f, c->supplementary_groups);
2373 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2375 if (strv_length(c->read_write_dirs) > 0) {
2376 fprintf(f, "%sReadWriteDirs:", prefix);
2377 strv_fprintf(f, c->read_write_dirs);
2381 if (strv_length(c->read_only_dirs) > 0) {
2382 fprintf(f, "%sReadOnlyDirs:", prefix);
2383 strv_fprintf(f, c->read_only_dirs);
2387 if (strv_length(c->inaccessible_dirs) > 0) {
2388 fprintf(f, "%sInaccessibleDirs:", prefix);
2389 strv_fprintf(f, c->inaccessible_dirs);
2395 "%sUtmpIdentifier: %s\n",
2396 prefix, c->utmp_id);
2398 if (c->selinux_context)
2400 "%sSELinuxContext: %s%s\n",
2401 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2403 if (c->personality != 0xffffffffUL)
2405 "%sPersonality: %s\n",
2406 prefix, strna(personality_to_string(c->personality)));
2408 if (c->syscall_filter) {
2416 "%sSystemCallFilter: ",
2419 if (!c->syscall_whitelist)
2423 SET_FOREACH(id, c->syscall_filter, j) {
2424 _cleanup_free_ char *name = NULL;
2431 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2432 fputs(strna(name), f);
2439 if (c->syscall_archs) {
2446 "%sSystemCallArchitectures:",
2450 SET_FOREACH(id, c->syscall_archs, j)
2451 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2456 if (c->syscall_errno != 0)
2458 "%sSystemCallErrorNumber: %s\n",
2459 prefix, strna(errno_to_name(c->syscall_errno)));
2461 if (c->apparmor_profile)
2463 "%sAppArmorProfile: %s%s\n",
2464 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2467 bool exec_context_maintains_privileges(ExecContext *c) {
2470 /* Returns true if the process forked off would run run under
2471 * an unchanged UID or as root. */
2476 if (streq(c->user, "root") || streq(c->user, "0"))
2482 void exec_status_start(ExecStatus *s, pid_t pid) {
2487 dual_timestamp_get(&s->start_timestamp);
2490 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2493 if (s->pid && s->pid != pid)
2497 dual_timestamp_get(&s->exit_timestamp);
2503 if (context->utmp_id)
2504 utmp_put_dead_process(context->utmp_id, pid, code, status);
2506 exec_context_tty_reset(context);
2510 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2511 char buf[FORMAT_TIMESTAMP_MAX];
2519 prefix = strempty(prefix);
2522 "%sPID: "PID_FMT"\n",
2525 if (s->start_timestamp.realtime > 0)
2527 "%sStart Timestamp: %s\n",
2528 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2530 if (s->exit_timestamp.realtime > 0)
2532 "%sExit Timestamp: %s\n"
2534 "%sExit Status: %i\n",
2535 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2536 prefix, sigchld_code_to_string(s->code),
2540 char *exec_command_line(char **argv) {
2548 STRV_FOREACH(a, argv)
2551 if (!(n = new(char, k)))
2555 STRV_FOREACH(a, argv) {
2562 if (strpbrk(*a, WHITESPACE)) {
2573 /* FIXME: this doesn't really handle arguments that have
2574 * spaces and ticks in them */
2579 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2580 _cleanup_free_ char *cmd = NULL;
2581 const char *prefix2;
2586 prefix = strempty(prefix);
2587 prefix2 = strappenda(prefix, "\t");
2589 cmd = exec_command_line(c->argv);
2591 "%sCommand Line: %s\n",
2592 prefix, cmd ? cmd : strerror(ENOMEM));
2594 exec_status_dump(&c->exec_status, f, prefix2);
2597 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2600 prefix = strempty(prefix);
2602 LIST_FOREACH(command, c, c)
2603 exec_command_dump(c, f, prefix);
2606 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2613 /* It's kind of important, that we keep the order here */
2614 LIST_FIND_TAIL(command, *l, end);
2615 LIST_INSERT_AFTER(command, *l, end, e);
2620 int exec_command_set(ExecCommand *c, const char *path, ...) {
2628 l = strv_new_ap(path, ap);
2649 int exec_command_append(ExecCommand *c, const char *path, ...) {
2650 _cleanup_strv_free_ char **l = NULL;
2658 l = strv_new_ap(path, ap);
2664 r = strv_extend_strv(&c->argv, l);
2672 static int exec_runtime_allocate(ExecRuntime **rt) {
2677 *rt = new0(ExecRuntime, 1);
2682 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2687 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2697 if (!c->private_network && !c->private_tmp)
2700 r = exec_runtime_allocate(rt);
2704 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2705 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2709 if (c->private_tmp && !(*rt)->tmp_dir) {
2710 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2718 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2720 assert(r->n_ref > 0);
2726 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2731 assert(r->n_ref > 0);
2734 if (r->n_ref <= 0) {
2736 free(r->var_tmp_dir);
2737 safe_close_pair(r->netns_storage_socket);
2744 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2753 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2755 if (rt->var_tmp_dir)
2756 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2758 if (rt->netns_storage_socket[0] >= 0) {
2761 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2765 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2768 if (rt->netns_storage_socket[1] >= 0) {
2771 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2775 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2781 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2788 if (streq(key, "tmp-dir")) {
2791 r = exec_runtime_allocate(rt);
2795 copy = strdup(value);
2799 free((*rt)->tmp_dir);
2800 (*rt)->tmp_dir = copy;
2802 } else if (streq(key, "var-tmp-dir")) {
2805 r = exec_runtime_allocate(rt);
2809 copy = strdup(value);
2813 free((*rt)->var_tmp_dir);
2814 (*rt)->var_tmp_dir = copy;
2816 } else if (streq(key, "netns-socket-0")) {
2819 r = exec_runtime_allocate(rt);
2823 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2824 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2826 safe_close((*rt)->netns_storage_socket[0]);
2827 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2829 } else if (streq(key, "netns-socket-1")) {
2832 r = exec_runtime_allocate(rt);
2836 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2837 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2839 safe_close((*rt)->netns_storage_socket[1]);
2840 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2848 static void *remove_tmpdir_thread(void *p) {
2849 _cleanup_free_ char *path = p;
2851 rm_rf_dangerous(path, false, true, false);
2855 void exec_runtime_destroy(ExecRuntime *rt) {
2861 /* If there are multiple users of this, let's leave the stuff around */
2866 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2868 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2870 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2877 if (rt->var_tmp_dir) {
2878 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2880 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2882 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2883 free(rt->var_tmp_dir);
2886 rt->var_tmp_dir = NULL;
2889 safe_close_pair(rt->netns_storage_socket);
2892 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2893 [EXEC_INPUT_NULL] = "null",
2894 [EXEC_INPUT_TTY] = "tty",
2895 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2896 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2897 [EXEC_INPUT_SOCKET] = "socket"
2900 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2902 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2903 [EXEC_OUTPUT_INHERIT] = "inherit",
2904 [EXEC_OUTPUT_NULL] = "null",
2905 [EXEC_OUTPUT_TTY] = "tty",
2906 [EXEC_OUTPUT_SYSLOG] = "syslog",
2907 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2908 [EXEC_OUTPUT_KMSG] = "kmsg",
2909 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2910 [EXEC_OUTPUT_JOURNAL] = "journal",
2911 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2912 [EXEC_OUTPUT_SOCKET] = "socket"
2915 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);