1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
74 #include "utmp-wtmp.h"
76 #include "path-util.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
85 #include "apparmor-util.h"
88 #include "seccomp-util.h"
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94 /* This assumes there is a 'tty' group */
97 #define SNDBUF_SIZE (8*1024*1024)
99 static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
105 /* Modifies the fds array! (sorts it) */
115 for (i = start; i < (int) n_fds; i++) {
118 /* Already at right index? */
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here*/
130 if (nfd != i+3 && restart_from < 0)
134 if (restart_from < 0)
137 start = restart_from;
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154 for (i = 0; i < n_fds; i++) {
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
163 if ((r = fd_cloexec(fds[i], false)) < 0)
170 _pure_ static const char *tty_path(const ExecContext *context) {
173 if (context->tty_path)
174 return context->tty_path;
176 return "/dev/console";
179 static void exec_context_tty_reset(const ExecContext *context) {
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
192 static bool is_terminal_output(ExecOutput o) {
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 static int open_null_as(int flags, int nfd) {
205 fd = open("/dev/null", flags|O_NOCTTY);
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220 union sockaddr_union sa = {
221 .un.sun_family = AF_UNIX,
222 .un.sun_path = "/run/systemd/journal/stdout",
226 assert(output < _EXEC_OUTPUT_MAX);
230 fd = socket(AF_UNIX, SOCK_STREAM, 0);
234 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
240 if (shutdown(fd, SHUT_RD) < 0) {
245 fd_inc_sndbuf(fd, SNDBUF_SIZE);
255 context->syslog_identifier ? context->syslog_identifier : ident,
257 context->syslog_priority,
258 !!context->syslog_level_prefix,
259 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261 is_terminal_output(output));
264 r = dup2(fd, nfd) < 0 ? -errno : nfd;
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
277 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281 r = dup2(fd, nfd) < 0 ? -errno : nfd;
289 static bool is_terminal_input(ExecInput i) {
291 i == EXEC_INPUT_TTY ||
292 i == EXEC_INPUT_TTY_FORCE ||
293 i == EXEC_INPUT_TTY_FAIL;
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298 if (is_terminal_input(std_input) && !apply_tty_stdin)
299 return EXEC_INPUT_NULL;
301 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302 return EXEC_INPUT_NULL;
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
309 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310 return EXEC_OUTPUT_INHERIT;
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324 case EXEC_INPUT_NULL:
325 return open_null_as(O_RDONLY, STDIN_FILENO);
328 case EXEC_INPUT_TTY_FORCE:
329 case EXEC_INPUT_TTY_FAIL: {
332 fd = acquire_terminal(tty_path(context),
333 i == EXEC_INPUT_TTY_FAIL,
334 i == EXEC_INPUT_TTY_FORCE,
340 if (fd != STDIN_FILENO) {
341 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
349 case EXEC_INPUT_SOCKET:
350 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
353 assert_not_reached("Unknown input type");
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366 o = fixup_output(context->std_output, socket_fd);
368 if (fileno == STDERR_FILENO) {
370 e = fixup_output(context->std_error, socket_fd);
372 /* This expects the input and output are already set up */
374 /* Don't change the stderr file descriptor if we inherit all
375 * the way and are not on a tty */
376 if (e == EXEC_OUTPUT_INHERIT &&
377 o == EXEC_OUTPUT_INHERIT &&
378 i == EXEC_INPUT_NULL &&
379 !is_terminal_input(context->std_input) &&
383 /* Duplicate from stdout if possible */
384 if (e == o || e == EXEC_OUTPUT_INHERIT)
385 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389 } else if (o == EXEC_OUTPUT_INHERIT) {
390 /* If input got downgraded, inherit the original value */
391 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395 if (i != EXEC_INPUT_NULL)
396 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402 /* We need to open /dev/null here anew, to get the right access mode. */
403 return open_null_as(O_WRONLY, fileno);
408 case EXEC_OUTPUT_NULL:
409 return open_null_as(O_WRONLY, fileno);
411 case EXEC_OUTPUT_TTY:
412 if (is_terminal_input(i))
413 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415 /* We don't reset the terminal if this is just about output */
416 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418 case EXEC_OUTPUT_SYSLOG:
419 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420 case EXEC_OUTPUT_KMSG:
421 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422 case EXEC_OUTPUT_JOURNAL:
423 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424 r = connect_logger_as(context, o, ident, unit_id, fileno);
426 log_struct_unit(LOG_CRIT, unit_id,
427 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428 fileno == STDOUT_FILENO ? "out" : "err",
429 unit_id, strerror(-r),
432 r = open_null_as(O_WRONLY, fileno);
436 case EXEC_OUTPUT_SOCKET:
437 assert(socket_fd >= 0);
438 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
441 assert_not_reached("Unknown error type");
445 static int chown_terminal(int fd, uid_t uid) {
450 /* This might fail. What matters are the results. */
451 (void) fchown(fd, uid, -1);
452 (void) fchmod(fd, TTY_MODE);
454 if (fstat(fd, &st) < 0)
457 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463 static int setup_confirm_stdio(int *_saved_stdin,
464 int *_saved_stdout) {
465 int fd = -1, saved_stdin, saved_stdout = -1, r;
467 assert(_saved_stdin);
468 assert(_saved_stdout);
470 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475 if (saved_stdout < 0) {
480 fd = acquire_terminal(
485 DEFAULT_CONFIRM_USEC);
491 r = chown_terminal(fd, getuid());
495 if (dup2(fd, STDIN_FILENO) < 0) {
500 if (dup2(fd, STDOUT_FILENO) < 0) {
508 *_saved_stdin = saved_stdin;
509 *_saved_stdout = saved_stdout;
514 safe_close(saved_stdout);
515 safe_close(saved_stdin);
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522 _cleanup_close_ int fd = -1;
527 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
531 va_start(ap, format);
532 vdprintf(fd, format, ap);
538 static int restore_confirm_stdio(int *saved_stdin,
544 assert(saved_stdout);
548 if (*saved_stdin >= 0)
549 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
552 if (*saved_stdout >= 0)
553 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
556 safe_close(*saved_stdin);
557 safe_close(*saved_stdout);
562 static int ask_for_confirmation(char *response, char **argv) {
563 int saved_stdout = -1, saved_stdin = -1, r;
566 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570 line = exec_command_line(argv);
574 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
577 restore_confirm_stdio(&saved_stdin, &saved_stdout);
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583 bool keep_groups = false;
588 /* Lookup and set GID and supplementary group list. Here too
589 * we avoid NSS lookups for gid=0. */
591 if (context->group || username) {
593 if (context->group) {
594 const char *g = context->group;
596 if ((r = get_group_creds(&g, &gid)) < 0)
600 /* First step, initialize groups from /etc/groups */
601 if (username && gid != 0) {
602 if (initgroups(username, gid) < 0)
608 /* Second step, set our gids */
609 if (setresgid(gid, gid, gid) < 0)
613 if (context->supplementary_groups) {
618 /* Final step, initialize any manually set supplementary groups */
619 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
621 if (!(gids = new(gid_t, ngroups_max)))
625 if ((k = getgroups(ngroups_max, gids)) < 0) {
632 STRV_FOREACH(i, context->supplementary_groups) {
635 if (k >= ngroups_max) {
641 r = get_group_creds(&g, gids+k);
650 if (setgroups(k, gids) < 0) {
661 static int enforce_user(const ExecContext *context, uid_t uid) {
664 /* Sets (but doesn't lookup) the uid and make sure we keep the
665 * capabilities while doing so. */
667 if (context->capabilities) {
668 _cleanup_cap_free_ cap_t d = NULL;
669 static const cap_value_t bits[] = {
670 CAP_SETUID, /* Necessary so that we can run setresuid() below */
671 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
674 /* First step: If we need to keep capabilities but
675 * drop privileges we need to make sure we keep our
676 * caps, while we drop privileges. */
678 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
680 if (prctl(PR_GET_SECUREBITS) != sb)
681 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685 /* Second step: set the capabilities. This will reduce
686 * the capabilities to the minimum we need. */
688 d = cap_dup(context->capabilities);
692 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
696 if (cap_set_proc(d) < 0)
700 /* Third step: actually set the uids */
701 if (setresuid(uid, uid, uid) < 0)
704 /* At this point we should have all necessary capabilities but
705 are otherwise a normal user. However, the caps might got
706 corrupted due to the setresuid() so we need clean them up
707 later. This is done outside of this call. */
714 static int null_conv(
716 const struct pam_message **msg,
717 struct pam_response **resp,
720 /* We don't support conversations */
725 static int setup_pam(
731 int fds[], unsigned n_fds) {
733 static const struct pam_conv conv = {
738 pam_handle_t *handle = NULL;
740 int pam_code = PAM_SUCCESS;
743 bool close_session = false;
744 pid_t pam_pid = 0, parent_pid;
751 /* We set up PAM in the parent process, then fork. The child
752 * will then stay around until killed via PR_GET_PDEATHSIG or
753 * systemd via the cgroup logic. It will then remove the PAM
754 * session again. The parent process will exec() the actual
755 * daemon. We do things this way to ensure that the main PID
756 * of the daemon is the one we initially fork()ed. */
758 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
761 pam_code = pam_start(name, user, &conv, &handle);
762 if (pam_code != PAM_SUCCESS) {
768 pam_code = pam_set_item(handle, PAM_TTY, tty);
769 if (pam_code != PAM_SUCCESS)
773 pam_code = pam_acct_mgmt(handle, flags);
774 if (pam_code != PAM_SUCCESS)
777 pam_code = pam_open_session(handle, flags);
778 if (pam_code != PAM_SUCCESS)
781 close_session = true;
783 e = pam_getenvlist(handle);
785 pam_code = PAM_BUF_ERR;
789 /* Block SIGTERM, so that we know that it won't get lost in
791 if (sigemptyset(&ss) < 0 ||
792 sigaddset(&ss, SIGTERM) < 0 ||
793 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
796 parent_pid = getpid();
806 /* The child's job is to reset the PAM session on
809 /* This string must fit in 10 chars (i.e. the length
810 * of "/sbin/init"), to look pretty in /bin/ps */
811 rename_process("(sd-pam)");
813 /* Make sure we don't keep open the passed fds in this
814 child. We assume that otherwise only those fds are
815 open here that have been opened by PAM. */
816 close_many(fds, n_fds);
818 /* Drop privileges - we don't need any to pam_close_session
819 * and this will make PR_SET_PDEATHSIG work in most cases.
820 * If this fails, ignore the error - but expect sd-pam threads
821 * to fail to exit normally */
822 if (setresuid(uid, uid, uid) < 0)
823 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
825 /* Wait until our parent died. This will only work if
826 * the above setresuid() succeeds, otherwise the kernel
827 * will not allow unprivileged parents kill their privileged
828 * children this way. We rely on the control groups kill logic
829 * to do the rest for us. */
830 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
833 /* Check if our parent process might already have
835 if (getppid() == parent_pid) {
837 if (sigwait(&ss, &sig) < 0) {
844 assert(sig == SIGTERM);
849 /* If our parent died we'll end the session */
850 if (getppid() != parent_pid) {
851 pam_code = pam_close_session(handle, flags);
852 if (pam_code != PAM_SUCCESS)
859 pam_end(handle, pam_code | flags);
863 /* If the child was forked off successfully it will do all the
864 * cleanups, so forget about the handle here. */
867 /* Unblock SIGTERM again in the parent */
868 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
871 /* We close the log explicitly here, since the PAM modules
872 * might have opened it, but we don't want this fd around. */
881 if (pam_code != PAM_SUCCESS) {
882 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
883 err = -EPERM; /* PAM errors do not map to errno */
885 log_error("PAM failed: %m");
891 pam_code = pam_close_session(handle, flags);
893 pam_end(handle, pam_code | flags);
901 kill(pam_pid, SIGTERM);
902 kill(pam_pid, SIGCONT);
909 static void rename_process_from_path(const char *path) {
910 char process_name[11];
914 /* This resulting string must fit in 10 chars (i.e. the length
915 * of "/sbin/init") to look pretty in /bin/ps */
919 rename_process("(...)");
925 /* The end of the process name is usually more
926 * interesting, since the first bit might just be
932 process_name[0] = '(';
933 memcpy(process_name+1, p, l);
934 process_name[1+l] = ')';
935 process_name[1+l+1] = 0;
937 rename_process(process_name);
942 static int apply_seccomp(ExecContext *c) {
943 uint32_t negative_action, action;
944 scmp_filter_ctx *seccomp;
951 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
953 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
957 if (c->syscall_archs) {
959 SET_FOREACH(id, c->syscall_archs, i) {
960 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
968 r = seccomp_add_secondary_archs(seccomp);
973 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
974 SET_FOREACH(id, c->syscall_filter, i) {
975 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
980 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
984 r = seccomp_load(seccomp);
987 seccomp_release(seccomp);
991 static int apply_address_families(ExecContext *c) {
992 scmp_filter_ctx *seccomp;
998 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1002 r = seccomp_add_secondary_archs(seccomp);
1006 if (c->address_families_whitelist) {
1007 int af, first = 0, last = 0;
1010 /* If this is a whitelist, we first block the address
1011 * families that are out of range and then everything
1012 * that is not in the set. First, we find the lowest
1013 * and highest address family in the set. */
1015 SET_FOREACH(afp, c->address_families, i) {
1016 af = PTR_TO_INT(afp);
1018 if (af <= 0 || af >= af_max())
1021 if (first == 0 || af < first)
1024 if (last == 0 || af > last)
1028 assert((first == 0) == (last == 0));
1032 /* No entries in the valid range, block everything */
1033 r = seccomp_rule_add(
1035 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1043 /* Block everything below the first entry */
1044 r = seccomp_rule_add(
1046 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1049 SCMP_A0(SCMP_CMP_LT, first));
1053 /* Block everything above the last entry */
1054 r = seccomp_rule_add(
1056 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1059 SCMP_A0(SCMP_CMP_GT, last));
1063 /* Block everything between the first and last
1065 for (af = 1; af < af_max(); af++) {
1067 if (set_contains(c->address_families, INT_TO_PTR(af)))
1070 r = seccomp_rule_add(
1072 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1075 SCMP_A0(SCMP_CMP_EQ, af));
1084 /* If this is a blacklist, then generate one rule for
1085 * each address family that are then combined in OR
1088 SET_FOREACH(af, c->address_families, i) {
1090 r = seccomp_rule_add(
1092 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1095 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1101 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1105 r = seccomp_load(seccomp);
1108 seccomp_release(seccomp);
1114 static void do_idle_pipe_dance(int idle_pipe[4]) {
1118 safe_close(idle_pipe[1]);
1119 safe_close(idle_pipe[2]);
1121 if (idle_pipe[0] >= 0) {
1124 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1126 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1127 /* Signal systemd that we are bored and want to continue. */
1128 write(idle_pipe[3], "x", 1);
1130 /* Wait for systemd to react to the signal above. */
1131 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1134 safe_close(idle_pipe[0]);
1138 safe_close(idle_pipe[3]);
1141 static int build_environment(
1144 usec_t watchdog_usec,
1146 const char *username,
1150 _cleanup_strv_free_ char **our_env = NULL;
1157 our_env = new0(char*, 10);
1162 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1164 our_env[n_env++] = x;
1166 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1168 our_env[n_env++] = x;
1171 if (watchdog_usec > 0) {
1172 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1174 our_env[n_env++] = x;
1176 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1178 our_env[n_env++] = x;
1182 x = strappend("HOME=", home);
1185 our_env[n_env++] = x;
1189 x = strappend("LOGNAME=", username);
1192 our_env[n_env++] = x;
1194 x = strappend("USER=", username);
1197 our_env[n_env++] = x;
1201 x = strappend("SHELL=", shell);
1204 our_env[n_env++] = x;
1207 if (is_terminal_input(c->std_input) ||
1208 c->std_output == EXEC_OUTPUT_TTY ||
1209 c->std_error == EXEC_OUTPUT_TTY ||
1212 x = strdup(default_term_for_tty(tty_path(c)));
1215 our_env[n_env++] = x;
1218 our_env[n_env++] = NULL;
1219 assert(n_env <= 10);
1227 int exec_spawn(ExecCommand *command,
1229 ExecContext *context,
1230 int fds[], unsigned n_fds,
1232 bool apply_permissions,
1234 bool apply_tty_stdin,
1236 CGroupControllerMask cgroup_supported,
1237 const char *cgroup_path,
1238 const char *runtime_prefix,
1239 const char *unit_id,
1240 usec_t watchdog_usec,
1242 ExecRuntime *runtime,
1245 _cleanup_strv_free_ char **files_env = NULL;
1254 assert(fds || n_fds <= 0);
1256 if (context->std_input == EXEC_INPUT_SOCKET ||
1257 context->std_output == EXEC_OUTPUT_SOCKET ||
1258 context->std_error == EXEC_OUTPUT_SOCKET) {
1270 r = exec_context_load_environment(context, &files_env);
1272 log_struct_unit(LOG_ERR,
1274 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1281 argv = command->argv;
1283 line = exec_command_line(argv);
1287 log_struct_unit(LOG_DEBUG,
1289 "EXECUTABLE=%s", command->path,
1290 "MESSAGE=About to execute: %s", line,
1299 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1300 const char *username = NULL, *home = NULL, *shell = NULL;
1301 unsigned n_dont_close = 0;
1302 int dont_close[n_fds + 3];
1303 uid_t uid = (uid_t) -1;
1304 gid_t gid = (gid_t) -1;
1310 rename_process_from_path(command->path);
1312 /* We reset exactly these signals, since they are the
1313 * only ones we set to SIG_IGN in the main daemon. All
1314 * others we leave untouched because we set them to
1315 * SIG_DFL or a valid handler initially, both of which
1316 * will be demoted to SIG_DFL. */
1317 default_signals(SIGNALS_CRASH_HANDLER,
1318 SIGNALS_IGNORE, -1);
1320 if (context->ignore_sigpipe)
1321 ignore_signals(SIGPIPE, -1);
1323 assert_se(sigemptyset(&ss) == 0);
1324 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1326 r = EXIT_SIGNAL_MASK;
1331 do_idle_pipe_dance(idle_pipe);
1333 /* Close sockets very early to make sure we don't
1334 * block init reexecution because it cannot bind its
1339 dont_close[n_dont_close++] = socket_fd;
1341 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1342 n_dont_close += n_fds;
1345 if (runtime->netns_storage_socket[0] >= 0)
1346 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1347 if (runtime->netns_storage_socket[1] >= 0)
1348 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1351 err = close_all_fds(dont_close, n_dont_close);
1357 if (!context->same_pgrp)
1364 exec_context_tty_reset(context);
1366 if (confirm_spawn) {
1369 err = ask_for_confirmation(&response, argv);
1370 if (err == -ETIMEDOUT)
1371 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1373 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1374 else if (response == 's') {
1375 write_confirm_message("Skipping execution.\n");
1379 } else if (response == 'n') {
1380 write_confirm_message("Failing execution.\n");
1386 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1387 * must sure to drop O_NONBLOCK */
1389 fd_nonblock(socket_fd, false);
1391 err = setup_input(context, socket_fd, apply_tty_stdin);
1397 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1403 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1410 err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1417 if (context->oom_score_adjust_set) {
1420 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1423 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1425 r = EXIT_OOM_ADJUST;
1430 if (context->nice_set)
1431 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1437 if (context->cpu_sched_set) {
1438 struct sched_param param = {
1439 .sched_priority = context->cpu_sched_priority,
1442 r = sched_setscheduler(0,
1443 context->cpu_sched_policy |
1444 (context->cpu_sched_reset_on_fork ?
1445 SCHED_RESET_ON_FORK : 0),
1449 r = EXIT_SETSCHEDULER;
1454 if (context->cpuset)
1455 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1457 r = EXIT_CPUAFFINITY;
1461 if (context->ioprio_set)
1462 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1468 if (context->timer_slack_nsec != (nsec_t) -1)
1469 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1471 r = EXIT_TIMERSLACK;
1475 if (context->personality != 0xffffffffUL)
1476 if (personality(context->personality) < 0) {
1478 r = EXIT_PERSONALITY;
1482 if (context->utmp_id)
1483 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1485 if (context->user) {
1486 username = context->user;
1487 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1493 if (is_terminal_input(context->std_input)) {
1494 err = chown_terminal(STDIN_FILENO, uid);
1503 if (cgroup_path && context->user && context->pam_name) {
1504 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1511 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1519 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1522 STRV_FOREACH(rt, context->runtime_directory) {
1523 _cleanup_free_ char *p;
1525 p = strjoin(runtime_prefix, "/", *rt, NULL);
1527 r = EXIT_RUNTIME_DIRECTORY;
1532 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1534 r = EXIT_RUNTIME_DIRECTORY;
1540 if (apply_permissions) {
1541 err = enforce_groups(context, username, gid);
1548 umask(context->umask);
1551 if (apply_permissions && context->pam_name && username) {
1552 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1559 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1560 err = setup_netns(runtime->netns_storage_socket);
1567 if (!strv_isempty(context->read_write_dirs) ||
1568 !strv_isempty(context->read_only_dirs) ||
1569 !strv_isempty(context->inaccessible_dirs) ||
1570 context->mount_flags != 0 ||
1571 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1572 context->private_devices ||
1573 context->protect_system != PROTECT_SYSTEM_NO ||
1574 context->protect_home != PROTECT_HOME_NO) {
1576 char *tmp = NULL, *var = NULL;
1578 /* The runtime struct only contains the parent
1579 * of the private /tmp, which is
1580 * non-accessible to world users. Inside of it
1581 * there's a /tmp that is sticky, and that's
1582 * the one we want to use here. */
1584 if (context->private_tmp && runtime) {
1585 if (runtime->tmp_dir)
1586 tmp = strappenda(runtime->tmp_dir, "/tmp");
1587 if (runtime->var_tmp_dir)
1588 var = strappenda(runtime->var_tmp_dir, "/tmp");
1591 err = setup_namespace(
1592 context->read_write_dirs,
1593 context->read_only_dirs,
1594 context->inaccessible_dirs,
1597 context->private_devices,
1598 context->protect_home,
1599 context->protect_system,
1600 context->mount_flags);
1608 if (context->root_directory)
1609 if (chroot(context->root_directory) < 0) {
1615 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1621 _cleanup_free_ char *d = NULL;
1623 if (asprintf(&d, "%s/%s",
1624 context->root_directory ? context->root_directory : "",
1625 context->working_directory ? context->working_directory : "") < 0) {
1638 /* We repeat the fd closing here, to make sure that
1639 * nothing is leaked from the PAM modules */
1640 err = close_all_fds(fds, n_fds);
1642 err = shift_fds(fds, n_fds);
1644 err = flags_fds(fds, n_fds, context->non_blocking);
1650 if (apply_permissions) {
1652 for (i = 0; i < _RLIMIT_MAX; i++) {
1653 if (!context->rlimit[i])
1656 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1663 if (context->capability_bounding_set_drop) {
1664 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1666 r = EXIT_CAPABILITIES;
1671 if (context->user) {
1672 err = enforce_user(context, uid);
1679 /* PR_GET_SECUREBITS is not privileged, while
1680 * PR_SET_SECUREBITS is. So to suppress
1681 * potential EPERMs we'll try not to call
1682 * PR_SET_SECUREBITS unless necessary. */
1683 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1684 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1686 r = EXIT_SECUREBITS;
1690 if (context->capabilities)
1691 if (cap_set_proc(context->capabilities) < 0) {
1693 r = EXIT_CAPABILITIES;
1697 if (context->no_new_privileges)
1698 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1700 r = EXIT_NO_NEW_PRIVILEGES;
1705 if (context->address_families_whitelist ||
1706 !set_isempty(context->address_families)) {
1707 err = apply_address_families(context);
1709 r = EXIT_ADDRESS_FAMILIES;
1714 if (context->syscall_whitelist ||
1715 !set_isempty(context->syscall_filter) ||
1716 !set_isempty(context->syscall_archs)) {
1717 err = apply_seccomp(context);
1726 if (context->selinux_context && use_selinux()) {
1727 err = setexeccon(context->selinux_context);
1728 if (err < 0 && !context->selinux_context_ignore) {
1729 r = EXIT_SELINUX_CONTEXT;
1735 #ifdef HAVE_APPARMOR
1736 if (context->apparmor_profile && use_apparmor()) {
1737 err = aa_change_onexec(context->apparmor_profile);
1738 if (err < 0 && !context->apparmor_profile_ignore) {
1739 r = EXIT_APPARMOR_PROFILE;
1746 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1752 final_env = strv_env_merge(5,
1755 context->environment,
1765 final_argv = replace_env_argv(argv, final_env);
1772 final_env = strv_env_clean(final_env);
1774 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1775 line = exec_command_line(final_argv);
1778 log_struct_unit(LOG_DEBUG,
1780 "EXECUTABLE=%s", command->path,
1781 "MESSAGE=Executing: %s", line,
1788 execve(command->path, final_argv, final_env);
1795 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1796 "EXECUTABLE=%s", command->path,
1797 "MESSAGE=Failed at step %s spawning %s: %s",
1798 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1799 command->path, strerror(-err),
1808 log_struct_unit(LOG_DEBUG,
1810 "MESSAGE=Forked %s as "PID_FMT,
1814 /* We add the new process to the cgroup both in the child (so
1815 * that we can be sure that no user code is ever executed
1816 * outside of the cgroup) and in the parent (so that we can be
1817 * sure that when we kill the cgroup the process will be
1820 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1822 exec_status_start(&command->exec_status, pid);
1828 void exec_context_init(ExecContext *c) {
1832 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1833 c->cpu_sched_policy = SCHED_OTHER;
1834 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1835 c->syslog_level_prefix = true;
1836 c->ignore_sigpipe = true;
1837 c->timer_slack_nsec = (nsec_t) -1;
1838 c->personality = 0xffffffffUL;
1839 c->runtime_directory_mode = 0755;
1842 void exec_context_done(ExecContext *c) {
1847 strv_free(c->environment);
1848 c->environment = NULL;
1850 strv_free(c->environment_files);
1851 c->environment_files = NULL;
1853 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1855 c->rlimit[l] = NULL;
1858 free(c->working_directory);
1859 c->working_directory = NULL;
1860 free(c->root_directory);
1861 c->root_directory = NULL;
1866 free(c->syslog_identifier);
1867 c->syslog_identifier = NULL;
1875 strv_free(c->supplementary_groups);
1876 c->supplementary_groups = NULL;
1881 if (c->capabilities) {
1882 cap_free(c->capabilities);
1883 c->capabilities = NULL;
1886 strv_free(c->read_only_dirs);
1887 c->read_only_dirs = NULL;
1889 strv_free(c->read_write_dirs);
1890 c->read_write_dirs = NULL;
1892 strv_free(c->inaccessible_dirs);
1893 c->inaccessible_dirs = NULL;
1896 CPU_FREE(c->cpuset);
1901 free(c->selinux_context);
1902 c->selinux_context = NULL;
1904 free(c->apparmor_profile);
1905 c->apparmor_profile = NULL;
1907 set_free(c->syscall_filter);
1908 c->syscall_filter = NULL;
1910 set_free(c->syscall_archs);
1911 c->syscall_archs = NULL;
1913 set_free(c->address_families);
1914 c->address_families = NULL;
1916 strv_free(c->runtime_directory);
1917 c->runtime_directory = NULL;
1920 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1925 if (!runtime_prefix)
1928 STRV_FOREACH(i, c->runtime_directory) {
1929 _cleanup_free_ char *p;
1931 p = strjoin(runtime_prefix, "/", *i, NULL);
1935 /* We execute this synchronously, since we need to be
1936 * sure this is gone when we start the service
1938 rm_rf_dangerous(p, false, true, false);
1944 void exec_command_done(ExecCommand *c) {
1954 void exec_command_done_array(ExecCommand *c, unsigned n) {
1957 for (i = 0; i < n; i++)
1958 exec_command_done(c+i);
1961 void exec_command_free_list(ExecCommand *c) {
1965 LIST_REMOVE(command, c, i);
1966 exec_command_done(i);
1971 void exec_command_free_array(ExecCommand **c, unsigned n) {
1974 for (i = 0; i < n; i++) {
1975 exec_command_free_list(c[i]);
1980 int exec_context_load_environment(const ExecContext *c, char ***l) {
1981 char **i, **r = NULL;
1986 STRV_FOREACH(i, c->environment_files) {
1989 bool ignore = false;
1991 _cleanup_globfree_ glob_t pglob = {};
2001 if (!path_is_absolute(fn)) {
2009 /* Filename supports globbing, take all matching files */
2011 if (glob(fn, 0, NULL, &pglob) != 0) {
2016 return errno ? -errno : -EINVAL;
2018 count = pglob.gl_pathc;
2026 for (n = 0; n < count; n++) {
2027 k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2035 /* Log invalid environment variables with filename */
2037 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2044 m = strv_env_merge(2, r, p);
2060 static bool tty_may_match_dev_console(const char *tty) {
2061 char *active = NULL, *console;
2064 if (startswith(tty, "/dev/"))
2067 /* trivial identity? */
2068 if (streq(tty, "console"))
2071 console = resolve_dev_console(&active);
2072 /* if we could not resolve, assume it may */
2076 /* "tty0" means the active VC, so it may be the same sometimes */
2077 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2083 bool exec_context_may_touch_console(ExecContext *ec) {
2084 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2085 is_terminal_input(ec->std_input) ||
2086 is_terminal_output(ec->std_output) ||
2087 is_terminal_output(ec->std_error)) &&
2088 tty_may_match_dev_console(tty_path(ec));
2091 static void strv_fprintf(FILE *f, char **l) {
2097 fprintf(f, " %s", *g);
2100 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2107 prefix = strempty(prefix);
2111 "%sWorkingDirectory: %s\n"
2112 "%sRootDirectory: %s\n"
2113 "%sNonBlocking: %s\n"
2114 "%sPrivateTmp: %s\n"
2115 "%sPrivateNetwork: %s\n"
2116 "%sPrivateDevices: %s\n"
2117 "%sProtectHome: %s\n"
2118 "%sProtectSystem: %s\n"
2119 "%sIgnoreSIGPIPE: %s\n",
2121 prefix, c->working_directory ? c->working_directory : "/",
2122 prefix, c->root_directory ? c->root_directory : "/",
2123 prefix, yes_no(c->non_blocking),
2124 prefix, yes_no(c->private_tmp),
2125 prefix, yes_no(c->private_network),
2126 prefix, yes_no(c->private_devices),
2127 prefix, protect_home_to_string(c->protect_home),
2128 prefix, protect_system_to_string(c->protect_system),
2129 prefix, yes_no(c->ignore_sigpipe));
2131 STRV_FOREACH(e, c->environment)
2132 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2134 STRV_FOREACH(e, c->environment_files)
2135 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2142 if (c->oom_score_adjust_set)
2144 "%sOOMScoreAdjust: %i\n",
2145 prefix, c->oom_score_adjust);
2147 for (i = 0; i < RLIM_NLIMITS; i++)
2149 fprintf(f, "%s%s: "RLIM_FMT"\n",
2150 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2152 if (c->ioprio_set) {
2153 _cleanup_free_ char *class_str = NULL;
2155 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2157 "%sIOSchedulingClass: %s\n"
2158 "%sIOPriority: %i\n",
2159 prefix, strna(class_str),
2160 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2163 if (c->cpu_sched_set) {
2164 _cleanup_free_ char *policy_str = NULL;
2166 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2168 "%sCPUSchedulingPolicy: %s\n"
2169 "%sCPUSchedulingPriority: %i\n"
2170 "%sCPUSchedulingResetOnFork: %s\n",
2171 prefix, strna(policy_str),
2172 prefix, c->cpu_sched_priority,
2173 prefix, yes_no(c->cpu_sched_reset_on_fork));
2177 fprintf(f, "%sCPUAffinity:", prefix);
2178 for (i = 0; i < c->cpuset_ncpus; i++)
2179 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2180 fprintf(f, " %u", i);
2184 if (c->timer_slack_nsec != (nsec_t) -1)
2185 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2188 "%sStandardInput: %s\n"
2189 "%sStandardOutput: %s\n"
2190 "%sStandardError: %s\n",
2191 prefix, exec_input_to_string(c->std_input),
2192 prefix, exec_output_to_string(c->std_output),
2193 prefix, exec_output_to_string(c->std_error));
2199 "%sTTYVHangup: %s\n"
2200 "%sTTYVTDisallocate: %s\n",
2201 prefix, c->tty_path,
2202 prefix, yes_no(c->tty_reset),
2203 prefix, yes_no(c->tty_vhangup),
2204 prefix, yes_no(c->tty_vt_disallocate));
2206 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2207 c->std_output == EXEC_OUTPUT_KMSG ||
2208 c->std_output == EXEC_OUTPUT_JOURNAL ||
2209 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2210 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2211 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2212 c->std_error == EXEC_OUTPUT_SYSLOG ||
2213 c->std_error == EXEC_OUTPUT_KMSG ||
2214 c->std_error == EXEC_OUTPUT_JOURNAL ||
2215 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2216 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2217 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2219 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2221 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2222 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2225 "%sSyslogFacility: %s\n"
2226 "%sSyslogLevel: %s\n",
2227 prefix, strna(fac_str),
2228 prefix, strna(lvl_str));
2231 if (c->capabilities) {
2232 _cleanup_cap_free_charp_ char *t;
2234 t = cap_to_text(c->capabilities, NULL);
2236 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2240 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2242 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2243 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2244 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2245 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2246 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2247 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2249 if (c->capability_bounding_set_drop) {
2251 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2253 for (l = 0; l <= cap_last_cap(); l++)
2254 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2255 _cleanup_cap_free_charp_ char *t;
2259 fprintf(f, " %s", t);
2266 fprintf(f, "%sUser: %s\n", prefix, c->user);
2268 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2270 if (strv_length(c->supplementary_groups) > 0) {
2271 fprintf(f, "%sSupplementaryGroups:", prefix);
2272 strv_fprintf(f, c->supplementary_groups);
2277 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2279 if (strv_length(c->read_write_dirs) > 0) {
2280 fprintf(f, "%sReadWriteDirs:", prefix);
2281 strv_fprintf(f, c->read_write_dirs);
2285 if (strv_length(c->read_only_dirs) > 0) {
2286 fprintf(f, "%sReadOnlyDirs:", prefix);
2287 strv_fprintf(f, c->read_only_dirs);
2291 if (strv_length(c->inaccessible_dirs) > 0) {
2292 fprintf(f, "%sInaccessibleDirs:", prefix);
2293 strv_fprintf(f, c->inaccessible_dirs);
2299 "%sUtmpIdentifier: %s\n",
2300 prefix, c->utmp_id);
2302 if (c->selinux_context)
2304 "%sSELinuxContext: %s%s\n",
2305 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2307 if (c->personality != 0xffffffffUL)
2309 "%sPersonality: %s\n",
2310 prefix, strna(personality_to_string(c->personality)));
2312 if (c->syscall_filter) {
2320 "%sSystemCallFilter: ",
2323 if (!c->syscall_whitelist)
2327 SET_FOREACH(id, c->syscall_filter, j) {
2328 _cleanup_free_ char *name = NULL;
2335 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2336 fputs(strna(name), f);
2343 if (c->syscall_archs) {
2350 "%sSystemCallArchitectures:",
2354 SET_FOREACH(id, c->syscall_archs, j)
2355 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2360 if (c->syscall_errno != 0)
2362 "%sSystemCallErrorNumber: %s\n",
2363 prefix, strna(errno_to_name(c->syscall_errno)));
2365 if (c->apparmor_profile)
2367 "%sAppArmorProfile: %s%s\n",
2368 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2371 void exec_status_start(ExecStatus *s, pid_t pid) {
2376 dual_timestamp_get(&s->start_timestamp);
2379 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2382 if (s->pid && s->pid != pid)
2386 dual_timestamp_get(&s->exit_timestamp);
2392 if (context->utmp_id)
2393 utmp_put_dead_process(context->utmp_id, pid, code, status);
2395 exec_context_tty_reset(context);
2399 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2400 char buf[FORMAT_TIMESTAMP_MAX];
2412 "%sPID: "PID_FMT"\n",
2415 if (s->start_timestamp.realtime > 0)
2417 "%sStart Timestamp: %s\n",
2418 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2420 if (s->exit_timestamp.realtime > 0)
2422 "%sExit Timestamp: %s\n"
2424 "%sExit Status: %i\n",
2425 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2426 prefix, sigchld_code_to_string(s->code),
2430 char *exec_command_line(char **argv) {
2438 STRV_FOREACH(a, argv)
2441 if (!(n = new(char, k)))
2445 STRV_FOREACH(a, argv) {
2452 if (strpbrk(*a, WHITESPACE)) {
2463 /* FIXME: this doesn't really handle arguments that have
2464 * spaces and ticks in them */
2469 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2471 const char *prefix2;
2480 p2 = strappend(prefix, "\t");
2481 prefix2 = p2 ? p2 : prefix;
2483 cmd = exec_command_line(c->argv);
2486 "%sCommand Line: %s\n",
2487 prefix, cmd ? cmd : strerror(ENOMEM));
2491 exec_status_dump(&c->exec_status, f, prefix2);
2496 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2502 LIST_FOREACH(command, c, c)
2503 exec_command_dump(c, f, prefix);
2506 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2513 /* It's kind of important, that we keep the order here */
2514 LIST_FIND_TAIL(command, *l, end);
2515 LIST_INSERT_AFTER(command, *l, end, e);
2520 int exec_command_set(ExecCommand *c, const char *path, ...) {
2528 l = strv_new_ap(path, ap);
2549 static int exec_runtime_allocate(ExecRuntime **rt) {
2554 *rt = new0(ExecRuntime, 1);
2559 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2564 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2574 if (!c->private_network && !c->private_tmp)
2577 r = exec_runtime_allocate(rt);
2581 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2582 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2586 if (c->private_tmp && !(*rt)->tmp_dir) {
2587 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2595 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2597 assert(r->n_ref > 0);
2603 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2608 assert(r->n_ref > 0);
2611 if (r->n_ref <= 0) {
2613 free(r->var_tmp_dir);
2614 safe_close_pair(r->netns_storage_socket);
2621 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2630 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2632 if (rt->var_tmp_dir)
2633 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2635 if (rt->netns_storage_socket[0] >= 0) {
2638 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2642 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2645 if (rt->netns_storage_socket[1] >= 0) {
2648 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2652 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2658 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2665 if (streq(key, "tmp-dir")) {
2668 r = exec_runtime_allocate(rt);
2672 copy = strdup(value);
2676 free((*rt)->tmp_dir);
2677 (*rt)->tmp_dir = copy;
2679 } else if (streq(key, "var-tmp-dir")) {
2682 r = exec_runtime_allocate(rt);
2686 copy = strdup(value);
2690 free((*rt)->var_tmp_dir);
2691 (*rt)->var_tmp_dir = copy;
2693 } else if (streq(key, "netns-socket-0")) {
2696 r = exec_runtime_allocate(rt);
2700 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2701 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2703 safe_close((*rt)->netns_storage_socket[0]);
2704 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2706 } else if (streq(key, "netns-socket-1")) {
2709 r = exec_runtime_allocate(rt);
2713 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2714 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2716 safe_close((*rt)->netns_storage_socket[1]);
2717 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2725 static void *remove_tmpdir_thread(void *p) {
2726 _cleanup_free_ char *path = p;
2728 rm_rf_dangerous(path, false, true, false);
2732 void exec_runtime_destroy(ExecRuntime *rt) {
2738 /* If there are multiple users of this, let's leave the stuff around */
2743 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2745 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2747 log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2754 if (rt->var_tmp_dir) {
2755 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2757 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2759 log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2760 free(rt->var_tmp_dir);
2763 rt->var_tmp_dir = NULL;
2766 safe_close_pair(rt->netns_storage_socket);
2769 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2770 [EXEC_INPUT_NULL] = "null",
2771 [EXEC_INPUT_TTY] = "tty",
2772 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2773 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2774 [EXEC_INPUT_SOCKET] = "socket"
2777 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2779 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2780 [EXEC_OUTPUT_INHERIT] = "inherit",
2781 [EXEC_OUTPUT_NULL] = "null",
2782 [EXEC_OUTPUT_TTY] = "tty",
2783 [EXEC_OUTPUT_SYSLOG] = "syslog",
2784 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2785 [EXEC_OUTPUT_KMSG] = "kmsg",
2786 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2787 [EXEC_OUTPUT_JOURNAL] = "journal",
2788 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2789 [EXEC_OUTPUT_SOCKET] = "socket"
2792 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);