1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
73 #include "exit-status.h"
75 #include "utmp-wtmp.h"
77 #include "path-util.h"
82 #include "selinux-util.h"
83 #include "errno-list.h"
85 #include "apparmor-util.h"
88 #include "seccomp-util.h"
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94 /* This assumes there is a 'tty' group */
97 #define SNDBUF_SIZE (8*1024*1024)
99 static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
105 /* Modifies the fds array! (sorts it) */
115 for (i = start; i < (int) n_fds; i++) {
118 /* Already at right index? */
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
125 close_nointr_nofail(fds[i]);
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here*/
130 if (nfd != i+3 && restart_from < 0)
134 if (restart_from < 0)
137 start = restart_from;
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154 for (i = 0; i < n_fds; i++) {
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
163 if ((r = fd_cloexec(fds[i], false)) < 0)
170 _pure_ static const char *tty_path(const ExecContext *context) {
173 if (context->tty_path)
174 return context->tty_path;
176 return "/dev/console";
179 static void exec_context_tty_reset(const ExecContext *context) {
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
192 static bool is_terminal_output(ExecOutput o) {
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 static int open_null_as(int flags, int nfd) {
205 fd = open("/dev/null", flags|O_NOCTTY);
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
211 close_nointr_nofail(fd);
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220 union sockaddr_union sa = {
221 .un.sun_family = AF_UNIX,
222 .un.sun_path = "/run/systemd/journal/stdout",
226 assert(output < _EXEC_OUTPUT_MAX);
230 fd = socket(AF_UNIX, SOCK_STREAM, 0);
234 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
236 close_nointr_nofail(fd);
240 if (shutdown(fd, SHUT_RD) < 0) {
241 close_nointr_nofail(fd);
245 fd_inc_sndbuf(fd, SNDBUF_SIZE);
255 context->syslog_identifier ? context->syslog_identifier : ident,
257 context->syslog_priority,
258 !!context->syslog_level_prefix,
259 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261 is_terminal_output(output));
264 r = dup2(fd, nfd) < 0 ? -errno : nfd;
265 close_nointr_nofail(fd);
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
277 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281 r = dup2(fd, nfd) < 0 ? -errno : nfd;
282 close_nointr_nofail(fd);
289 static bool is_terminal_input(ExecInput i) {
291 i == EXEC_INPUT_TTY ||
292 i == EXEC_INPUT_TTY_FORCE ||
293 i == EXEC_INPUT_TTY_FAIL;
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298 if (is_terminal_input(std_input) && !apply_tty_stdin)
299 return EXEC_INPUT_NULL;
301 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302 return EXEC_INPUT_NULL;
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
309 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310 return EXEC_OUTPUT_INHERIT;
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324 case EXEC_INPUT_NULL:
325 return open_null_as(O_RDONLY, STDIN_FILENO);
328 case EXEC_INPUT_TTY_FORCE:
329 case EXEC_INPUT_TTY_FAIL: {
332 fd = acquire_terminal(tty_path(context),
333 i == EXEC_INPUT_TTY_FAIL,
334 i == EXEC_INPUT_TTY_FORCE,
340 if (fd != STDIN_FILENO) {
341 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
342 close_nointr_nofail(fd);
349 case EXEC_INPUT_SOCKET:
350 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
353 assert_not_reached("Unknown input type");
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366 o = fixup_output(context->std_output, socket_fd);
368 if (fileno == STDERR_FILENO) {
370 e = fixup_output(context->std_error, socket_fd);
372 /* This expects the input and output are already set up */
374 /* Don't change the stderr file descriptor if we inherit all
375 * the way and are not on a tty */
376 if (e == EXEC_OUTPUT_INHERIT &&
377 o == EXEC_OUTPUT_INHERIT &&
378 i == EXEC_INPUT_NULL &&
379 !is_terminal_input(context->std_input) &&
383 /* Duplicate from stdout if possible */
384 if (e == o || e == EXEC_OUTPUT_INHERIT)
385 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389 } else if (o == EXEC_OUTPUT_INHERIT) {
390 /* If input got downgraded, inherit the original value */
391 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395 if (i != EXEC_INPUT_NULL)
396 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402 /* We need to open /dev/null here anew, to get the right access mode. */
403 return open_null_as(O_WRONLY, fileno);
408 case EXEC_OUTPUT_NULL:
409 return open_null_as(O_WRONLY, fileno);
411 case EXEC_OUTPUT_TTY:
412 if (is_terminal_input(i))
413 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415 /* We don't reset the terminal if this is just about output */
416 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418 case EXEC_OUTPUT_SYSLOG:
419 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420 case EXEC_OUTPUT_KMSG:
421 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422 case EXEC_OUTPUT_JOURNAL:
423 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424 r = connect_logger_as(context, o, ident, unit_id, fileno);
426 log_struct_unit(LOG_CRIT, unit_id,
427 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428 fileno == STDOUT_FILENO ? "out" : "err",
429 unit_id, strerror(-r),
432 r = open_null_as(O_WRONLY, fileno);
436 case EXEC_OUTPUT_SOCKET:
437 assert(socket_fd >= 0);
438 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
441 assert_not_reached("Unknown error type");
445 static int chown_terminal(int fd, uid_t uid) {
450 /* This might fail. What matters are the results. */
451 (void) fchown(fd, uid, -1);
452 (void) fchmod(fd, TTY_MODE);
454 if (fstat(fd, &st) < 0)
457 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463 static int setup_confirm_stdio(int *_saved_stdin,
464 int *_saved_stdout) {
465 int fd = -1, saved_stdin, saved_stdout = -1, r;
467 assert(_saved_stdin);
468 assert(_saved_stdout);
470 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475 if (saved_stdout < 0) {
480 fd = acquire_terminal(
485 DEFAULT_CONFIRM_USEC);
491 r = chown_terminal(fd, getuid());
495 if (dup2(fd, STDIN_FILENO) < 0) {
500 if (dup2(fd, STDOUT_FILENO) < 0) {
506 close_nointr_nofail(fd);
508 *_saved_stdin = saved_stdin;
509 *_saved_stdout = saved_stdout;
514 if (saved_stdout >= 0)
515 close_nointr_nofail(saved_stdout);
517 if (saved_stdin >= 0)
518 close_nointr_nofail(saved_stdin);
521 close_nointr_nofail(fd);
526 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
532 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
536 va_start(ap, format);
537 vdprintf(fd, format, ap);
540 close_nointr_nofail(fd);
545 static int restore_confirm_stdio(int *saved_stdin,
551 assert(saved_stdout);
555 if (*saved_stdin >= 0)
556 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
559 if (*saved_stdout >= 0)
560 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
563 if (*saved_stdin >= 0)
564 close_nointr_nofail(*saved_stdin);
566 if (*saved_stdout >= 0)
567 close_nointr_nofail(*saved_stdout);
572 static int ask_for_confirmation(char *response, char **argv) {
573 int saved_stdout = -1, saved_stdin = -1, r;
576 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
580 line = exec_command_line(argv);
584 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
587 restore_confirm_stdio(&saved_stdin, &saved_stdout);
592 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
593 bool keep_groups = false;
598 /* Lookup and set GID and supplementary group list. Here too
599 * we avoid NSS lookups for gid=0. */
601 if (context->group || username) {
603 if (context->group) {
604 const char *g = context->group;
606 if ((r = get_group_creds(&g, &gid)) < 0)
610 /* First step, initialize groups from /etc/groups */
611 if (username && gid != 0) {
612 if (initgroups(username, gid) < 0)
618 /* Second step, set our gids */
619 if (setresgid(gid, gid, gid) < 0)
623 if (context->supplementary_groups) {
628 /* Final step, initialize any manually set supplementary groups */
629 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
631 if (!(gids = new(gid_t, ngroups_max)))
635 if ((k = getgroups(ngroups_max, gids)) < 0) {
642 STRV_FOREACH(i, context->supplementary_groups) {
645 if (k >= ngroups_max) {
651 r = get_group_creds(&g, gids+k);
660 if (setgroups(k, gids) < 0) {
671 static int enforce_user(const ExecContext *context, uid_t uid) {
674 /* Sets (but doesn't lookup) the uid and make sure we keep the
675 * capabilities while doing so. */
677 if (context->capabilities) {
678 _cleanup_cap_free_ cap_t d = NULL;
679 static const cap_value_t bits[] = {
680 CAP_SETUID, /* Necessary so that we can run setresuid() below */
681 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
684 /* First step: If we need to keep capabilities but
685 * drop privileges we need to make sure we keep our
686 * caps, while we drop privileges. */
688 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
690 if (prctl(PR_GET_SECUREBITS) != sb)
691 if (prctl(PR_SET_SECUREBITS, sb) < 0)
695 /* Second step: set the capabilities. This will reduce
696 * the capabilities to the minimum we need. */
698 d = cap_dup(context->capabilities);
702 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
703 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
706 if (cap_set_proc(d) < 0)
710 /* Third step: actually set the uids */
711 if (setresuid(uid, uid, uid) < 0)
714 /* At this point we should have all necessary capabilities but
715 are otherwise a normal user. However, the caps might got
716 corrupted due to the setresuid() so we need clean them up
717 later. This is done outside of this call. */
724 static int null_conv(
726 const struct pam_message **msg,
727 struct pam_response **resp,
730 /* We don't support conversations */
735 static int setup_pam(
741 int fds[], unsigned n_fds) {
743 static const struct pam_conv conv = {
748 pam_handle_t *handle = NULL;
750 int pam_code = PAM_SUCCESS;
753 bool close_session = false;
754 pid_t pam_pid = 0, parent_pid;
761 /* We set up PAM in the parent process, then fork. The child
762 * will then stay around until killed via PR_GET_PDEATHSIG or
763 * systemd via the cgroup logic. It will then remove the PAM
764 * session again. The parent process will exec() the actual
765 * daemon. We do things this way to ensure that the main PID
766 * of the daemon is the one we initially fork()ed. */
768 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
771 pam_code = pam_start(name, user, &conv, &handle);
772 if (pam_code != PAM_SUCCESS) {
778 pam_code = pam_set_item(handle, PAM_TTY, tty);
779 if (pam_code != PAM_SUCCESS)
783 pam_code = pam_acct_mgmt(handle, flags);
784 if (pam_code != PAM_SUCCESS)
787 pam_code = pam_open_session(handle, flags);
788 if (pam_code != PAM_SUCCESS)
791 close_session = true;
793 e = pam_getenvlist(handle);
795 pam_code = PAM_BUF_ERR;
799 /* Block SIGTERM, so that we know that it won't get lost in
801 if (sigemptyset(&ss) < 0 ||
802 sigaddset(&ss, SIGTERM) < 0 ||
803 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
806 parent_pid = getpid();
816 /* The child's job is to reset the PAM session on
819 /* This string must fit in 10 chars (i.e. the length
820 * of "/sbin/init"), to look pretty in /bin/ps */
821 rename_process("(sd-pam)");
823 /* Make sure we don't keep open the passed fds in this
824 child. We assume that otherwise only those fds are
825 open here that have been opened by PAM. */
826 close_many(fds, n_fds);
828 /* Drop privileges - we don't need any to pam_close_session
829 * and this will make PR_SET_PDEATHSIG work in most cases.
830 * If this fails, ignore the error - but expect sd-pam threads
831 * to fail to exit normally */
832 if (setresuid(uid, uid, uid) < 0)
833 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
835 /* Wait until our parent died. This will only work if
836 * the above setresuid() succeeds, otherwise the kernel
837 * will not allow unprivileged parents kill their privileged
838 * children this way. We rely on the control groups kill logic
839 * to do the rest for us. */
840 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
843 /* Check if our parent process might already have
845 if (getppid() == parent_pid) {
847 if (sigwait(&ss, &sig) < 0) {
854 assert(sig == SIGTERM);
859 /* If our parent died we'll end the session */
860 if (getppid() != parent_pid) {
861 pam_code = pam_close_session(handle, flags);
862 if (pam_code != PAM_SUCCESS)
869 pam_end(handle, pam_code | flags);
873 /* If the child was forked off successfully it will do all the
874 * cleanups, so forget about the handle here. */
877 /* Unblock SIGTERM again in the parent */
878 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
881 /* We close the log explicitly here, since the PAM modules
882 * might have opened it, but we don't want this fd around. */
891 if (pam_code != PAM_SUCCESS) {
892 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
893 err = -EPERM; /* PAM errors do not map to errno */
895 log_error("PAM failed: %m");
901 pam_code = pam_close_session(handle, flags);
903 pam_end(handle, pam_code | flags);
911 kill(pam_pid, SIGTERM);
912 kill(pam_pid, SIGCONT);
919 static void rename_process_from_path(const char *path) {
920 char process_name[11];
924 /* This resulting string must fit in 10 chars (i.e. the length
925 * of "/sbin/init") to look pretty in /bin/ps */
929 rename_process("(...)");
935 /* The end of the process name is usually more
936 * interesting, since the first bit might just be
942 process_name[0] = '(';
943 memcpy(process_name+1, p, l);
944 process_name[1+l] = ')';
945 process_name[1+l+1] = 0;
947 rename_process(process_name);
952 static int apply_seccomp(ExecContext *c) {
953 uint32_t negative_action, action;
954 scmp_filter_ctx *seccomp;
961 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
963 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
967 if (c->syscall_archs) {
969 SET_FOREACH(id, c->syscall_archs, i) {
970 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
978 r = seccomp_add_secondary_archs(seccomp);
983 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
984 SET_FOREACH(id, c->syscall_filter, i) {
985 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
990 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
994 r = seccomp_load(seccomp);
997 seccomp_release(seccomp);
1001 static int apply_address_families(ExecContext *c) {
1002 scmp_filter_ctx *seccomp;
1008 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1012 r = seccomp_add_secondary_archs(seccomp);
1016 if (c->address_families_whitelist) {
1017 int af, first = 0, last = 0;
1020 /* If this is a whitelist, we first block the address
1021 * families that are out of range and then everything
1022 * that is not in the set. First, we find the lowest
1023 * and highest address family in the set. */
1025 SET_FOREACH(afp, c->address_families, i) {
1026 af = PTR_TO_INT(afp);
1028 if (af <= 0 || af >= af_max())
1031 if (first == 0 || af < first)
1034 if (last == 0 || af > last)
1038 assert((first == 0) == (last == 0));
1042 /* No entries in the valid range, block everything */
1043 r = seccomp_rule_add(
1045 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1053 /* Block everything below the first entry */
1054 r = seccomp_rule_add(
1056 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1059 SCMP_A0(SCMP_CMP_LT, first));
1063 /* Block everything above the last entry */
1064 r = seccomp_rule_add(
1066 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1069 SCMP_A0(SCMP_CMP_GT, last));
1073 /* Block everything between the first and last
1075 for (af = 1; af < af_max(); af++) {
1077 if (set_contains(c->address_families, INT_TO_PTR(af)))
1080 r = seccomp_rule_add(
1082 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1085 SCMP_A0(SCMP_CMP_EQ, af));
1094 /* If this is a blacklist, then generate one rule for
1095 * each address family that are then combined in OR
1098 SET_FOREACH(af, c->address_families, i) {
1100 r = seccomp_rule_add(
1102 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1105 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1111 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1115 r = seccomp_load(seccomp);
1118 seccomp_release(seccomp);
1124 static void do_idle_pipe_dance(int idle_pipe[4]) {
1127 if (idle_pipe[1] >= 0)
1128 close_nointr_nofail(idle_pipe[1]);
1129 if (idle_pipe[2] >= 0)
1130 close_nointr_nofail(idle_pipe[2]);
1132 if (idle_pipe[0] >= 0) {
1135 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1137 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1138 /* Signal systemd that we are bored and want to continue. */
1139 write(idle_pipe[3], "x", 1);
1141 /* Wait for systemd to react to the signal above. */
1142 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1145 close_nointr_nofail(idle_pipe[0]);
1149 if (idle_pipe[3] >= 0)
1150 close_nointr_nofail(idle_pipe[3]);
1153 static int build_environment(
1156 usec_t watchdog_usec,
1158 const char *username,
1162 _cleanup_strv_free_ char **our_env = NULL;
1169 our_env = new0(char*, 10);
1174 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1176 our_env[n_env++] = x;
1178 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1180 our_env[n_env++] = x;
1183 if (watchdog_usec > 0) {
1184 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1186 our_env[n_env++] = x;
1188 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1190 our_env[n_env++] = x;
1194 x = strappend("HOME=", home);
1197 our_env[n_env++] = x;
1201 x = strappend("LOGNAME=", username);
1204 our_env[n_env++] = x;
1206 x = strappend("USER=", username);
1209 our_env[n_env++] = x;
1213 x = strappend("SHELL=", shell);
1216 our_env[n_env++] = x;
1219 if (is_terminal_input(c->std_input) ||
1220 c->std_output == EXEC_OUTPUT_TTY ||
1221 c->std_error == EXEC_OUTPUT_TTY ||
1224 x = strdup(default_term_for_tty(tty_path(c)));
1227 our_env[n_env++] = x;
1230 our_env[n_env++] = NULL;
1231 assert(n_env <= 10);
1239 int exec_spawn(ExecCommand *command,
1241 ExecContext *context,
1242 int fds[], unsigned n_fds,
1244 bool apply_permissions,
1246 bool apply_tty_stdin,
1248 CGroupControllerMask cgroup_supported,
1249 const char *cgroup_path,
1250 const char *unit_id,
1251 usec_t watchdog_usec,
1253 ExecRuntime *runtime,
1256 _cleanup_strv_free_ char **files_env = NULL;
1265 assert(fds || n_fds <= 0);
1267 if (context->std_input == EXEC_INPUT_SOCKET ||
1268 context->std_output == EXEC_OUTPUT_SOCKET ||
1269 context->std_error == EXEC_OUTPUT_SOCKET) {
1281 r = exec_context_load_environment(context, &files_env);
1283 log_struct_unit(LOG_ERR,
1285 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1292 argv = command->argv;
1294 line = exec_command_line(argv);
1298 log_struct_unit(LOG_DEBUG,
1300 "EXECUTABLE=%s", command->path,
1301 "MESSAGE=About to execute: %s", line,
1310 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1311 const char *username = NULL, *home = NULL, *shell = NULL;
1312 unsigned n_dont_close = 0;
1313 int dont_close[n_fds + 3];
1314 uid_t uid = (uid_t) -1;
1315 gid_t gid = (gid_t) -1;
1321 rename_process_from_path(command->path);
1323 /* We reset exactly these signals, since they are the
1324 * only ones we set to SIG_IGN in the main daemon. All
1325 * others we leave untouched because we set them to
1326 * SIG_DFL or a valid handler initially, both of which
1327 * will be demoted to SIG_DFL. */
1328 default_signals(SIGNALS_CRASH_HANDLER,
1329 SIGNALS_IGNORE, -1);
1331 if (context->ignore_sigpipe)
1332 ignore_signals(SIGPIPE, -1);
1334 assert_se(sigemptyset(&ss) == 0);
1335 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1337 r = EXIT_SIGNAL_MASK;
1342 do_idle_pipe_dance(idle_pipe);
1344 /* Close sockets very early to make sure we don't
1345 * block init reexecution because it cannot bind its
1350 dont_close[n_dont_close++] = socket_fd;
1352 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1353 n_dont_close += n_fds;
1356 if (runtime->netns_storage_socket[0] >= 0)
1357 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1358 if (runtime->netns_storage_socket[1] >= 0)
1359 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1362 err = close_all_fds(dont_close, n_dont_close);
1368 if (!context->same_pgrp)
1375 if (context->tcpwrap_name) {
1377 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1383 for (i = 0; i < (int) n_fds; i++) {
1384 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1392 exec_context_tty_reset(context);
1394 if (confirm_spawn) {
1397 err = ask_for_confirmation(&response, argv);
1398 if (err == -ETIMEDOUT)
1399 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1401 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1402 else if (response == 's') {
1403 write_confirm_message("Skipping execution.\n");
1407 } else if (response == 'n') {
1408 write_confirm_message("Failing execution.\n");
1414 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1415 * must sure to drop O_NONBLOCK */
1417 fd_nonblock(socket_fd, false);
1419 err = setup_input(context, socket_fd, apply_tty_stdin);
1425 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1431 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1438 err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1445 if (context->oom_score_adjust_set) {
1448 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1451 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1453 r = EXIT_OOM_ADJUST;
1458 if (context->nice_set)
1459 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1465 if (context->cpu_sched_set) {
1466 struct sched_param param = {
1467 .sched_priority = context->cpu_sched_priority,
1470 r = sched_setscheduler(0,
1471 context->cpu_sched_policy |
1472 (context->cpu_sched_reset_on_fork ?
1473 SCHED_RESET_ON_FORK : 0),
1477 r = EXIT_SETSCHEDULER;
1482 if (context->cpuset)
1483 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1485 r = EXIT_CPUAFFINITY;
1489 if (context->ioprio_set)
1490 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1496 if (context->timer_slack_nsec != (nsec_t) -1)
1497 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1499 r = EXIT_TIMERSLACK;
1503 if (context->personality != 0xffffffffUL)
1504 if (personality(context->personality) < 0) {
1506 r = EXIT_PERSONALITY;
1510 if (context->utmp_id)
1511 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1513 if (context->user) {
1514 username = context->user;
1515 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1521 if (is_terminal_input(context->std_input)) {
1522 err = chown_terminal(STDIN_FILENO, uid);
1531 if (cgroup_path && context->user && context->pam_name) {
1532 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1539 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1547 if (apply_permissions) {
1548 err = enforce_groups(context, username, gid);
1555 umask(context->umask);
1558 if (apply_permissions && context->pam_name && username) {
1559 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1566 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1567 err = setup_netns(runtime->netns_storage_socket);
1574 if (!strv_isempty(context->read_write_dirs) ||
1575 !strv_isempty(context->read_only_dirs) ||
1576 !strv_isempty(context->inaccessible_dirs) ||
1577 context->mount_flags != 0 ||
1578 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1579 context->private_devices) {
1581 char *tmp = NULL, *var = NULL;
1583 /* The runtime struct only contains the parent
1584 * of the private /tmp, which is
1585 * non-accessible to world users. Inside of it
1586 * there's a /tmp that is sticky, and that's
1587 * the one we want to use here. */
1589 if (context->private_tmp && runtime) {
1590 if (runtime->tmp_dir)
1591 tmp = strappenda(runtime->tmp_dir, "/tmp");
1592 if (runtime->var_tmp_dir)
1593 var = strappenda(runtime->var_tmp_dir, "/tmp");
1596 err = setup_namespace(
1597 context->read_write_dirs,
1598 context->read_only_dirs,
1599 context->inaccessible_dirs,
1602 context->private_devices,
1603 context->mount_flags);
1612 if (context->root_directory)
1613 if (chroot(context->root_directory) < 0) {
1619 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1625 _cleanup_free_ char *d = NULL;
1627 if (asprintf(&d, "%s/%s",
1628 context->root_directory ? context->root_directory : "",
1629 context->working_directory ? context->working_directory : "") < 0) {
1642 /* We repeat the fd closing here, to make sure that
1643 * nothing is leaked from the PAM modules */
1644 err = close_all_fds(fds, n_fds);
1646 err = shift_fds(fds, n_fds);
1648 err = flags_fds(fds, n_fds, context->non_blocking);
1654 if (apply_permissions) {
1656 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1657 if (!context->rlimit[i])
1660 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1667 if (context->capability_bounding_set_drop) {
1668 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1670 r = EXIT_CAPABILITIES;
1675 if (context->user) {
1676 err = enforce_user(context, uid);
1683 /* PR_GET_SECUREBITS is not privileged, while
1684 * PR_SET_SECUREBITS is. So to suppress
1685 * potential EPERMs we'll try not to call
1686 * PR_SET_SECUREBITS unless necessary. */
1687 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1688 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1690 r = EXIT_SECUREBITS;
1694 if (context->capabilities)
1695 if (cap_set_proc(context->capabilities) < 0) {
1697 r = EXIT_CAPABILITIES;
1701 if (context->no_new_privileges)
1702 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1704 r = EXIT_NO_NEW_PRIVILEGES;
1709 if (context->address_families_whitelist ||
1710 !set_isempty(context->address_families)) {
1711 err = apply_address_families(context);
1713 r = EXIT_ADDRESS_FAMILIES;
1718 if (context->syscall_whitelist ||
1719 !set_isempty(context->syscall_filter) ||
1720 !set_isempty(context->syscall_archs)) {
1721 err = apply_seccomp(context);
1730 if (context->selinux_context && use_selinux()) {
1731 err = setexeccon(context->selinux_context);
1732 if (err < 0 && !context->selinux_context_ignore) {
1733 r = EXIT_SELINUX_CONTEXT;
1739 #ifdef HAVE_APPARMOR
1740 if (context->apparmor_profile && use_apparmor()) {
1741 err = aa_change_onexec(context->apparmor_profile);
1742 if (err < 0 && !context->apparmor_profile_ignore) {
1743 r = EXIT_APPARMOR_PROFILE;
1750 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1756 final_env = strv_env_merge(5,
1759 context->environment,
1769 final_argv = replace_env_argv(argv, final_env);
1776 final_env = strv_env_clean(final_env);
1778 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1779 line = exec_command_line(final_argv);
1782 log_struct_unit(LOG_DEBUG,
1784 "EXECUTABLE=%s", command->path,
1785 "MESSAGE=Executing: %s", line,
1792 execve(command->path, final_argv, final_env);
1799 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1800 "EXECUTABLE=%s", command->path,
1801 "MESSAGE=Failed at step %s spawning %s: %s",
1802 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1803 command->path, strerror(-err),
1812 log_struct_unit(LOG_DEBUG,
1814 "MESSAGE=Forked %s as "PID_FMT,
1818 /* We add the new process to the cgroup both in the child (so
1819 * that we can be sure that no user code is ever executed
1820 * outside of the cgroup) and in the parent (so that we can be
1821 * sure that when we kill the cgroup the process will be
1824 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1826 exec_status_start(&command->exec_status, pid);
1832 void exec_context_init(ExecContext *c) {
1836 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1837 c->cpu_sched_policy = SCHED_OTHER;
1838 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1839 c->syslog_level_prefix = true;
1840 c->ignore_sigpipe = true;
1841 c->timer_slack_nsec = (nsec_t) -1;
1842 c->personality = 0xffffffffUL;
1845 void exec_context_done(ExecContext *c) {
1850 strv_free(c->environment);
1851 c->environment = NULL;
1853 strv_free(c->environment_files);
1854 c->environment_files = NULL;
1856 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1858 c->rlimit[l] = NULL;
1861 free(c->working_directory);
1862 c->working_directory = NULL;
1863 free(c->root_directory);
1864 c->root_directory = NULL;
1869 free(c->tcpwrap_name);
1870 c->tcpwrap_name = NULL;
1872 free(c->syslog_identifier);
1873 c->syslog_identifier = NULL;
1881 strv_free(c->supplementary_groups);
1882 c->supplementary_groups = NULL;
1887 if (c->capabilities) {
1888 cap_free(c->capabilities);
1889 c->capabilities = NULL;
1892 strv_free(c->read_only_dirs);
1893 c->read_only_dirs = NULL;
1895 strv_free(c->read_write_dirs);
1896 c->read_write_dirs = NULL;
1898 strv_free(c->inaccessible_dirs);
1899 c->inaccessible_dirs = NULL;
1902 CPU_FREE(c->cpuset);
1907 free(c->selinux_context);
1908 c->selinux_context = NULL;
1910 free(c->apparmor_profile);
1911 c->apparmor_profile = NULL;
1913 set_free(c->syscall_filter);
1914 c->syscall_filter = NULL;
1916 set_free(c->syscall_archs);
1917 c->syscall_archs = NULL;
1919 set_free(c->address_families);
1920 c->address_families = NULL;
1923 void exec_command_done(ExecCommand *c) {
1933 void exec_command_done_array(ExecCommand *c, unsigned n) {
1936 for (i = 0; i < n; i++)
1937 exec_command_done(c+i);
1940 void exec_command_free_list(ExecCommand *c) {
1944 LIST_REMOVE(command, c, i);
1945 exec_command_done(i);
1950 void exec_command_free_array(ExecCommand **c, unsigned n) {
1953 for (i = 0; i < n; i++) {
1954 exec_command_free_list(c[i]);
1959 int exec_context_load_environment(const ExecContext *c, char ***l) {
1960 char **i, **r = NULL;
1965 STRV_FOREACH(i, c->environment_files) {
1968 bool ignore = false;
1970 _cleanup_globfree_ glob_t pglob = {};
1980 if (!path_is_absolute(fn)) {
1988 /* Filename supports globbing, take all matching files */
1990 if (glob(fn, 0, NULL, &pglob) != 0) {
1995 return errno ? -errno : -EINVAL;
1997 count = pglob.gl_pathc;
2005 for (n = 0; n < count; n++) {
2006 k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2014 /* Log invalid environment variables with filename */
2016 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2023 m = strv_env_merge(2, r, p);
2039 static bool tty_may_match_dev_console(const char *tty) {
2040 char *active = NULL, *console;
2043 if (startswith(tty, "/dev/"))
2046 /* trivial identity? */
2047 if (streq(tty, "console"))
2050 console = resolve_dev_console(&active);
2051 /* if we could not resolve, assume it may */
2055 /* "tty0" means the active VC, so it may be the same sometimes */
2056 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2062 bool exec_context_may_touch_console(ExecContext *ec) {
2063 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2064 is_terminal_input(ec->std_input) ||
2065 is_terminal_output(ec->std_output) ||
2066 is_terminal_output(ec->std_error)) &&
2067 tty_may_match_dev_console(tty_path(ec));
2070 static void strv_fprintf(FILE *f, char **l) {
2076 fprintf(f, " %s", *g);
2079 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2086 prefix = strempty(prefix);
2090 "%sWorkingDirectory: %s\n"
2091 "%sRootDirectory: %s\n"
2092 "%sNonBlocking: %s\n"
2093 "%sPrivateTmp: %s\n"
2094 "%sPrivateNetwork: %s\n"
2095 "%sPrivateDevices: %s\n"
2096 "%sIgnoreSIGPIPE: %s\n",
2098 prefix, c->working_directory ? c->working_directory : "/",
2099 prefix, c->root_directory ? c->root_directory : "/",
2100 prefix, yes_no(c->non_blocking),
2101 prefix, yes_no(c->private_tmp),
2102 prefix, yes_no(c->private_network),
2103 prefix, yes_no(c->private_devices),
2104 prefix, yes_no(c->ignore_sigpipe));
2106 STRV_FOREACH(e, c->environment)
2107 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2109 STRV_FOREACH(e, c->environment_files)
2110 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2112 if (c->tcpwrap_name)
2114 "%sTCPWrapName: %s\n",
2115 prefix, c->tcpwrap_name);
2122 if (c->oom_score_adjust_set)
2124 "%sOOMScoreAdjust: %i\n",
2125 prefix, c->oom_score_adjust);
2127 for (i = 0; i < RLIM_NLIMITS; i++)
2129 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
2131 if (c->ioprio_set) {
2132 _cleanup_free_ char *class_str = NULL;
2134 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2136 "%sIOSchedulingClass: %s\n"
2137 "%sIOPriority: %i\n",
2138 prefix, strna(class_str),
2139 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2142 if (c->cpu_sched_set) {
2143 _cleanup_free_ char *policy_str = NULL;
2145 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2147 "%sCPUSchedulingPolicy: %s\n"
2148 "%sCPUSchedulingPriority: %i\n"
2149 "%sCPUSchedulingResetOnFork: %s\n",
2150 prefix, strna(policy_str),
2151 prefix, c->cpu_sched_priority,
2152 prefix, yes_no(c->cpu_sched_reset_on_fork));
2156 fprintf(f, "%sCPUAffinity:", prefix);
2157 for (i = 0; i < c->cpuset_ncpus; i++)
2158 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2159 fprintf(f, " %u", i);
2163 if (c->timer_slack_nsec != (nsec_t) -1)
2164 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2167 "%sStandardInput: %s\n"
2168 "%sStandardOutput: %s\n"
2169 "%sStandardError: %s\n",
2170 prefix, exec_input_to_string(c->std_input),
2171 prefix, exec_output_to_string(c->std_output),
2172 prefix, exec_output_to_string(c->std_error));
2178 "%sTTYVHangup: %s\n"
2179 "%sTTYVTDisallocate: %s\n",
2180 prefix, c->tty_path,
2181 prefix, yes_no(c->tty_reset),
2182 prefix, yes_no(c->tty_vhangup),
2183 prefix, yes_no(c->tty_vt_disallocate));
2185 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2186 c->std_output == EXEC_OUTPUT_KMSG ||
2187 c->std_output == EXEC_OUTPUT_JOURNAL ||
2188 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2189 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2190 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2191 c->std_error == EXEC_OUTPUT_SYSLOG ||
2192 c->std_error == EXEC_OUTPUT_KMSG ||
2193 c->std_error == EXEC_OUTPUT_JOURNAL ||
2194 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2195 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2196 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2198 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2200 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2201 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2204 "%sSyslogFacility: %s\n"
2205 "%sSyslogLevel: %s\n",
2206 prefix, strna(fac_str),
2207 prefix, strna(lvl_str));
2210 if (c->capabilities) {
2211 _cleanup_cap_free_charp_ char *t;
2213 t = cap_to_text(c->capabilities, NULL);
2215 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2219 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2221 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2222 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2223 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2224 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2225 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2226 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2228 if (c->capability_bounding_set_drop) {
2230 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2232 for (l = 0; l <= cap_last_cap(); l++)
2233 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2234 _cleanup_cap_free_charp_ char *t;
2238 fprintf(f, " %s", t);
2245 fprintf(f, "%sUser: %s\n", prefix, c->user);
2247 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2249 if (strv_length(c->supplementary_groups) > 0) {
2250 fprintf(f, "%sSupplementaryGroups:", prefix);
2251 strv_fprintf(f, c->supplementary_groups);
2256 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2258 if (strv_length(c->read_write_dirs) > 0) {
2259 fprintf(f, "%sReadWriteDirs:", prefix);
2260 strv_fprintf(f, c->read_write_dirs);
2264 if (strv_length(c->read_only_dirs) > 0) {
2265 fprintf(f, "%sReadOnlyDirs:", prefix);
2266 strv_fprintf(f, c->read_only_dirs);
2270 if (strv_length(c->inaccessible_dirs) > 0) {
2271 fprintf(f, "%sInaccessibleDirs:", prefix);
2272 strv_fprintf(f, c->inaccessible_dirs);
2278 "%sUtmpIdentifier: %s\n",
2279 prefix, c->utmp_id);
2281 if (c->selinux_context)
2283 "%sSELinuxContext: %s%s\n",
2284 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2286 if (c->personality != 0xffffffffUL)
2288 "%sPersonality: %s\n",
2289 prefix, strna(personality_to_string(c->personality)));
2291 if (c->syscall_filter) {
2299 "%sSystemCallFilter: ",
2302 if (!c->syscall_whitelist)
2306 SET_FOREACH(id, c->syscall_filter, j) {
2307 _cleanup_free_ char *name = NULL;
2314 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2315 fputs(strna(name), f);
2322 if (c->syscall_archs) {
2329 "%sSystemCallArchitectures:",
2333 SET_FOREACH(id, c->syscall_archs, j)
2334 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2339 if (c->syscall_errno != 0)
2341 "%sSystemCallErrorNumber: %s\n",
2342 prefix, strna(errno_to_name(c->syscall_errno)));
2344 if (c->apparmor_profile)
2346 "%sAppArmorProfile: %s%s\n",
2347 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2350 void exec_status_start(ExecStatus *s, pid_t pid) {
2355 dual_timestamp_get(&s->start_timestamp);
2358 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2361 if (s->pid && s->pid != pid)
2365 dual_timestamp_get(&s->exit_timestamp);
2371 if (context->utmp_id)
2372 utmp_put_dead_process(context->utmp_id, pid, code, status);
2374 exec_context_tty_reset(context);
2378 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2379 char buf[FORMAT_TIMESTAMP_MAX];
2391 "%sPID: "PID_FMT"\n",
2394 if (s->start_timestamp.realtime > 0)
2396 "%sStart Timestamp: %s\n",
2397 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2399 if (s->exit_timestamp.realtime > 0)
2401 "%sExit Timestamp: %s\n"
2403 "%sExit Status: %i\n",
2404 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2405 prefix, sigchld_code_to_string(s->code),
2409 char *exec_command_line(char **argv) {
2417 STRV_FOREACH(a, argv)
2420 if (!(n = new(char, k)))
2424 STRV_FOREACH(a, argv) {
2431 if (strpbrk(*a, WHITESPACE)) {
2442 /* FIXME: this doesn't really handle arguments that have
2443 * spaces and ticks in them */
2448 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2450 const char *prefix2;
2459 p2 = strappend(prefix, "\t");
2460 prefix2 = p2 ? p2 : prefix;
2462 cmd = exec_command_line(c->argv);
2465 "%sCommand Line: %s\n",
2466 prefix, cmd ? cmd : strerror(ENOMEM));
2470 exec_status_dump(&c->exec_status, f, prefix2);
2475 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2481 LIST_FOREACH(command, c, c)
2482 exec_command_dump(c, f, prefix);
2485 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2492 /* It's kind of important, that we keep the order here */
2493 LIST_FIND_TAIL(command, *l, end);
2494 LIST_INSERT_AFTER(command, *l, end, e);
2499 int exec_command_set(ExecCommand *c, const char *path, ...) {
2507 l = strv_new_ap(path, ap);
2528 static int exec_runtime_allocate(ExecRuntime **rt) {
2533 *rt = new0(ExecRuntime, 1);
2538 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2543 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2553 if (!c->private_network && !c->private_tmp)
2556 r = exec_runtime_allocate(rt);
2560 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2561 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2565 if (c->private_tmp && !(*rt)->tmp_dir) {
2566 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2574 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2576 assert(r->n_ref > 0);
2582 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2587 assert(r->n_ref > 0);
2590 if (r->n_ref <= 0) {
2592 free(r->var_tmp_dir);
2593 close_pipe(r->netns_storage_socket);
2600 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2609 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2611 if (rt->var_tmp_dir)
2612 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2614 if (rt->netns_storage_socket[0] >= 0) {
2617 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2621 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2624 if (rt->netns_storage_socket[1] >= 0) {
2627 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2631 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2637 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2644 if (streq(key, "tmp-dir")) {
2647 r = exec_runtime_allocate(rt);
2651 copy = strdup(value);
2655 free((*rt)->tmp_dir);
2656 (*rt)->tmp_dir = copy;
2658 } else if (streq(key, "var-tmp-dir")) {
2661 r = exec_runtime_allocate(rt);
2665 copy = strdup(value);
2669 free((*rt)->var_tmp_dir);
2670 (*rt)->var_tmp_dir = copy;
2672 } else if (streq(key, "netns-socket-0")) {
2675 r = exec_runtime_allocate(rt);
2679 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2680 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2682 if ((*rt)->netns_storage_socket[0] >= 0)
2683 close_nointr_nofail((*rt)->netns_storage_socket[0]);
2685 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2687 } else if (streq(key, "netns-socket-1")) {
2690 r = exec_runtime_allocate(rt);
2694 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2695 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2697 if ((*rt)->netns_storage_socket[1] >= 0)
2698 close_nointr_nofail((*rt)->netns_storage_socket[1]);
2700 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2708 static void *remove_tmpdir_thread(void *p) {
2709 _cleanup_free_ char *path = p;
2711 rm_rf_dangerous(path, false, true, false);
2715 void exec_runtime_destroy(ExecRuntime *rt) {
2719 /* If there are multiple users of this, let's leave the stuff around */
2724 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2725 asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2729 if (rt->var_tmp_dir) {
2730 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2731 asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2732 rt->var_tmp_dir = NULL;
2735 close_pipe(rt->netns_storage_socket);
2738 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2739 [EXEC_INPUT_NULL] = "null",
2740 [EXEC_INPUT_TTY] = "tty",
2741 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2742 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2743 [EXEC_INPUT_SOCKET] = "socket"
2746 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2748 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2749 [EXEC_OUTPUT_INHERIT] = "inherit",
2750 [EXEC_OUTPUT_NULL] = "null",
2751 [EXEC_OUTPUT_TTY] = "tty",
2752 [EXEC_OUTPUT_SYSLOG] = "syslog",
2753 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2754 [EXEC_OUTPUT_KMSG] = "kmsg",
2755 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2756 [EXEC_OUTPUT_JOURNAL] = "journal",
2757 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2758 [EXEC_OUTPUT_SOCKET] = "socket"
2761 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);