1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
73 #include "exit-status.h"
75 #include "utmp-wtmp.h"
77 #include "path-util.h"
82 #include "selinux-util.h"
83 #include "errno-list.h"
86 #include "apparmor-util.h"
89 #include "seccomp-util.h"
92 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
93 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
95 /* This assumes there is a 'tty' group */
98 #define SNDBUF_SIZE (8*1024*1024)
100 static int shift_fds(int fds[], unsigned n_fds) {
101 int start, restart_from;
106 /* Modifies the fds array! (sorts it) */
116 for (i = start; i < (int) n_fds; i++) {
119 /* Already at right index? */
123 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
129 /* Hmm, the fd we wanted isn't free? Then
130 * let's remember that and try again from here*/
131 if (nfd != i+3 && restart_from < 0)
135 if (restart_from < 0)
138 start = restart_from;
144 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
153 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
155 for (i = 0; i < n_fds; i++) {
157 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
160 /* We unconditionally drop FD_CLOEXEC from the fds,
161 * since after all we want to pass these fds to our
164 if ((r = fd_cloexec(fds[i], false)) < 0)
171 _pure_ static const char *tty_path(const ExecContext *context) {
174 if (context->tty_path)
175 return context->tty_path;
177 return "/dev/console";
180 static void exec_context_tty_reset(const ExecContext *context) {
183 if (context->tty_vhangup)
184 terminal_vhangup(tty_path(context));
186 if (context->tty_reset)
187 reset_terminal(tty_path(context));
189 if (context->tty_vt_disallocate && context->tty_path)
190 vt_disallocate(context->tty_path);
193 static bool is_terminal_output(ExecOutput o) {
195 o == EXEC_OUTPUT_TTY ||
196 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
198 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
201 static int open_null_as(int flags, int nfd) {
206 fd = open("/dev/null", flags|O_NOCTTY);
211 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
221 union sockaddr_union sa = {
222 .un.sun_family = AF_UNIX,
223 .un.sun_path = "/run/systemd/journal/stdout",
227 assert(output < _EXEC_OUTPUT_MAX);
231 fd = socket(AF_UNIX, SOCK_STREAM, 0);
235 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
241 if (shutdown(fd, SHUT_RD) < 0) {
246 fd_inc_sndbuf(fd, SNDBUF_SIZE);
256 context->syslog_identifier ? context->syslog_identifier : ident,
258 context->syslog_priority,
259 !!context->syslog_level_prefix,
260 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
261 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
262 is_terminal_output(output));
265 r = dup2(fd, nfd) < 0 ? -errno : nfd;
272 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
278 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
282 r = dup2(fd, nfd) < 0 ? -errno : nfd;
290 static bool is_terminal_input(ExecInput i) {
292 i == EXEC_INPUT_TTY ||
293 i == EXEC_INPUT_TTY_FORCE ||
294 i == EXEC_INPUT_TTY_FAIL;
297 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
299 if (is_terminal_input(std_input) && !apply_tty_stdin)
300 return EXEC_INPUT_NULL;
302 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
303 return EXEC_INPUT_NULL;
308 static int fixup_output(ExecOutput std_output, int socket_fd) {
310 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
311 return EXEC_OUTPUT_INHERIT;
316 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
321 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
325 case EXEC_INPUT_NULL:
326 return open_null_as(O_RDONLY, STDIN_FILENO);
329 case EXEC_INPUT_TTY_FORCE:
330 case EXEC_INPUT_TTY_FAIL: {
333 fd = acquire_terminal(tty_path(context),
334 i == EXEC_INPUT_TTY_FAIL,
335 i == EXEC_INPUT_TTY_FORCE,
341 if (fd != STDIN_FILENO) {
342 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
350 case EXEC_INPUT_SOCKET:
351 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
354 assert_not_reached("Unknown input type");
358 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
366 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
367 o = fixup_output(context->std_output, socket_fd);
369 if (fileno == STDERR_FILENO) {
371 e = fixup_output(context->std_error, socket_fd);
373 /* This expects the input and output are already set up */
375 /* Don't change the stderr file descriptor if we inherit all
376 * the way and are not on a tty */
377 if (e == EXEC_OUTPUT_INHERIT &&
378 o == EXEC_OUTPUT_INHERIT &&
379 i == EXEC_INPUT_NULL &&
380 !is_terminal_input(context->std_input) &&
384 /* Duplicate from stdout if possible */
385 if (e == o || e == EXEC_OUTPUT_INHERIT)
386 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
390 } else if (o == EXEC_OUTPUT_INHERIT) {
391 /* If input got downgraded, inherit the original value */
392 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
393 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
395 /* If the input is connected to anything that's not a /dev/null, inherit that... */
396 if (i != EXEC_INPUT_NULL)
397 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
399 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
403 /* We need to open /dev/null here anew, to get the right access mode. */
404 return open_null_as(O_WRONLY, fileno);
409 case EXEC_OUTPUT_NULL:
410 return open_null_as(O_WRONLY, fileno);
412 case EXEC_OUTPUT_TTY:
413 if (is_terminal_input(i))
414 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
416 /* We don't reset the terminal if this is just about output */
417 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
419 case EXEC_OUTPUT_SYSLOG:
420 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421 case EXEC_OUTPUT_KMSG:
422 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423 case EXEC_OUTPUT_JOURNAL:
424 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425 r = connect_logger_as(context, o, ident, unit_id, fileno);
427 log_struct_unit(LOG_CRIT, unit_id,
428 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
429 fileno == STDOUT_FILENO ? "out" : "err",
430 unit_id, strerror(-r),
433 r = open_null_as(O_WRONLY, fileno);
437 case EXEC_OUTPUT_SOCKET:
438 assert(socket_fd >= 0);
439 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
442 assert_not_reached("Unknown error type");
446 static int chown_terminal(int fd, uid_t uid) {
451 /* This might fail. What matters are the results. */
452 (void) fchown(fd, uid, -1);
453 (void) fchmod(fd, TTY_MODE);
455 if (fstat(fd, &st) < 0)
458 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
464 static int setup_confirm_stdio(int *_saved_stdin,
465 int *_saved_stdout) {
466 int fd = -1, saved_stdin, saved_stdout = -1, r;
468 assert(_saved_stdin);
469 assert(_saved_stdout);
471 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
475 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
476 if (saved_stdout < 0) {
481 fd = acquire_terminal(
486 DEFAULT_CONFIRM_USEC);
492 r = chown_terminal(fd, getuid());
496 if (dup2(fd, STDIN_FILENO) < 0) {
501 if (dup2(fd, STDOUT_FILENO) < 0) {
509 *_saved_stdin = saved_stdin;
510 *_saved_stdout = saved_stdout;
515 safe_close(saved_stdout);
516 safe_close(saved_stdin);
522 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
523 _cleanup_close_ int fd = -1;
528 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
532 va_start(ap, format);
533 vdprintf(fd, format, ap);
539 static int restore_confirm_stdio(int *saved_stdin,
545 assert(saved_stdout);
549 if (*saved_stdin >= 0)
550 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
553 if (*saved_stdout >= 0)
554 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
557 safe_close(*saved_stdin);
558 safe_close(*saved_stdout);
563 static int ask_for_confirmation(char *response, char **argv) {
564 int saved_stdout = -1, saved_stdin = -1, r;
567 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
571 line = exec_command_line(argv);
575 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578 restore_confirm_stdio(&saved_stdin, &saved_stdout);
583 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
584 bool keep_groups = false;
589 /* Lookup and set GID and supplementary group list. Here too
590 * we avoid NSS lookups for gid=0. */
592 if (context->group || username) {
594 if (context->group) {
595 const char *g = context->group;
597 if ((r = get_group_creds(&g, &gid)) < 0)
601 /* First step, initialize groups from /etc/groups */
602 if (username && gid != 0) {
603 if (initgroups(username, gid) < 0)
609 /* Second step, set our gids */
610 if (setresgid(gid, gid, gid) < 0)
614 if (context->supplementary_groups) {
619 /* Final step, initialize any manually set supplementary groups */
620 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
622 if (!(gids = new(gid_t, ngroups_max)))
626 if ((k = getgroups(ngroups_max, gids)) < 0) {
633 STRV_FOREACH(i, context->supplementary_groups) {
636 if (k >= ngroups_max) {
642 r = get_group_creds(&g, gids+k);
651 if (setgroups(k, gids) < 0) {
662 static int enforce_user(const ExecContext *context, uid_t uid) {
665 /* Sets (but doesn't lookup) the uid and make sure we keep the
666 * capabilities while doing so. */
668 if (context->capabilities) {
669 _cleanup_cap_free_ cap_t d = NULL;
670 static const cap_value_t bits[] = {
671 CAP_SETUID, /* Necessary so that we can run setresuid() below */
672 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
675 /* First step: If we need to keep capabilities but
676 * drop privileges we need to make sure we keep our
677 * caps, while we drop privileges. */
679 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
681 if (prctl(PR_GET_SECUREBITS) != sb)
682 if (prctl(PR_SET_SECUREBITS, sb) < 0)
686 /* Second step: set the capabilities. This will reduce
687 * the capabilities to the minimum we need. */
689 d = cap_dup(context->capabilities);
693 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
694 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
697 if (cap_set_proc(d) < 0)
701 /* Third step: actually set the uids */
702 if (setresuid(uid, uid, uid) < 0)
705 /* At this point we should have all necessary capabilities but
706 are otherwise a normal user. However, the caps might got
707 corrupted due to the setresuid() so we need clean them up
708 later. This is done outside of this call. */
715 static int null_conv(
717 const struct pam_message **msg,
718 struct pam_response **resp,
721 /* We don't support conversations */
726 static int setup_pam(
732 int fds[], unsigned n_fds) {
734 static const struct pam_conv conv = {
739 pam_handle_t *handle = NULL;
741 int pam_code = PAM_SUCCESS;
744 bool close_session = false;
745 pid_t pam_pid = 0, parent_pid;
752 /* We set up PAM in the parent process, then fork. The child
753 * will then stay around until killed via PR_GET_PDEATHSIG or
754 * systemd via the cgroup logic. It will then remove the PAM
755 * session again. The parent process will exec() the actual
756 * daemon. We do things this way to ensure that the main PID
757 * of the daemon is the one we initially fork()ed. */
759 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
762 pam_code = pam_start(name, user, &conv, &handle);
763 if (pam_code != PAM_SUCCESS) {
769 pam_code = pam_set_item(handle, PAM_TTY, tty);
770 if (pam_code != PAM_SUCCESS)
774 pam_code = pam_acct_mgmt(handle, flags);
775 if (pam_code != PAM_SUCCESS)
778 pam_code = pam_open_session(handle, flags);
779 if (pam_code != PAM_SUCCESS)
782 close_session = true;
784 e = pam_getenvlist(handle);
786 pam_code = PAM_BUF_ERR;
790 /* Block SIGTERM, so that we know that it won't get lost in
792 if (sigemptyset(&ss) < 0 ||
793 sigaddset(&ss, SIGTERM) < 0 ||
794 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
797 parent_pid = getpid();
807 /* The child's job is to reset the PAM session on
810 /* This string must fit in 10 chars (i.e. the length
811 * of "/sbin/init"), to look pretty in /bin/ps */
812 rename_process("(sd-pam)");
814 /* Make sure we don't keep open the passed fds in this
815 child. We assume that otherwise only those fds are
816 open here that have been opened by PAM. */
817 close_many(fds, n_fds);
819 /* Drop privileges - we don't need any to pam_close_session
820 * and this will make PR_SET_PDEATHSIG work in most cases.
821 * If this fails, ignore the error - but expect sd-pam threads
822 * to fail to exit normally */
823 if (setresuid(uid, uid, uid) < 0)
824 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
826 /* Wait until our parent died. This will only work if
827 * the above setresuid() succeeds, otherwise the kernel
828 * will not allow unprivileged parents kill their privileged
829 * children this way. We rely on the control groups kill logic
830 * to do the rest for us. */
831 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
834 /* Check if our parent process might already have
836 if (getppid() == parent_pid) {
838 if (sigwait(&ss, &sig) < 0) {
845 assert(sig == SIGTERM);
850 /* If our parent died we'll end the session */
851 if (getppid() != parent_pid) {
852 pam_code = pam_close_session(handle, flags);
853 if (pam_code != PAM_SUCCESS)
860 pam_end(handle, pam_code | flags);
864 /* If the child was forked off successfully it will do all the
865 * cleanups, so forget about the handle here. */
868 /* Unblock SIGTERM again in the parent */
869 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
872 /* We close the log explicitly here, since the PAM modules
873 * might have opened it, but we don't want this fd around. */
882 if (pam_code != PAM_SUCCESS) {
883 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
884 err = -EPERM; /* PAM errors do not map to errno */
886 log_error("PAM failed: %m");
892 pam_code = pam_close_session(handle, flags);
894 pam_end(handle, pam_code | flags);
902 kill(pam_pid, SIGTERM);
903 kill(pam_pid, SIGCONT);
910 static void rename_process_from_path(const char *path) {
911 char process_name[11];
915 /* This resulting string must fit in 10 chars (i.e. the length
916 * of "/sbin/init") to look pretty in /bin/ps */
920 rename_process("(...)");
926 /* The end of the process name is usually more
927 * interesting, since the first bit might just be
933 process_name[0] = '(';
934 memcpy(process_name+1, p, l);
935 process_name[1+l] = ')';
936 process_name[1+l+1] = 0;
938 rename_process(process_name);
943 static int apply_seccomp(ExecContext *c) {
944 uint32_t negative_action, action;
945 scmp_filter_ctx *seccomp;
952 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
954 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
958 if (c->syscall_archs) {
960 SET_FOREACH(id, c->syscall_archs, i) {
961 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
969 r = seccomp_add_secondary_archs(seccomp);
974 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
975 SET_FOREACH(id, c->syscall_filter, i) {
976 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
981 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
985 r = seccomp_load(seccomp);
988 seccomp_release(seccomp);
992 static int apply_address_families(ExecContext *c) {
993 scmp_filter_ctx *seccomp;
999 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1003 r = seccomp_add_secondary_archs(seccomp);
1007 if (c->address_families_whitelist) {
1008 int af, first = 0, last = 0;
1011 /* If this is a whitelist, we first block the address
1012 * families that are out of range and then everything
1013 * that is not in the set. First, we find the lowest
1014 * and highest address family in the set. */
1016 SET_FOREACH(afp, c->address_families, i) {
1017 af = PTR_TO_INT(afp);
1019 if (af <= 0 || af >= af_max())
1022 if (first == 0 || af < first)
1025 if (last == 0 || af > last)
1029 assert((first == 0) == (last == 0));
1033 /* No entries in the valid range, block everything */
1034 r = seccomp_rule_add(
1036 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1044 /* Block everything below the first entry */
1045 r = seccomp_rule_add(
1047 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1050 SCMP_A0(SCMP_CMP_LT, first));
1054 /* Block everything above the last entry */
1055 r = seccomp_rule_add(
1057 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1060 SCMP_A0(SCMP_CMP_GT, last));
1064 /* Block everything between the first and last
1066 for (af = 1; af < af_max(); af++) {
1068 if (set_contains(c->address_families, INT_TO_PTR(af)))
1071 r = seccomp_rule_add(
1073 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076 SCMP_A0(SCMP_CMP_EQ, af));
1085 /* If this is a blacklist, then generate one rule for
1086 * each address family that are then combined in OR
1089 SET_FOREACH(af, c->address_families, i) {
1091 r = seccomp_rule_add(
1093 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1096 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1102 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1106 r = seccomp_load(seccomp);
1109 seccomp_release(seccomp);
1115 static void do_idle_pipe_dance(int idle_pipe[4]) {
1119 safe_close(idle_pipe[1]);
1120 safe_close(idle_pipe[2]);
1122 if (idle_pipe[0] >= 0) {
1125 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1127 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1128 /* Signal systemd that we are bored and want to continue. */
1129 write(idle_pipe[3], "x", 1);
1131 /* Wait for systemd to react to the signal above. */
1132 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1135 safe_close(idle_pipe[0]);
1139 safe_close(idle_pipe[3]);
1142 static int build_environment(
1145 usec_t watchdog_usec,
1147 const char *username,
1151 _cleanup_strv_free_ char **our_env = NULL;
1158 our_env = new0(char*, 10);
1163 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1165 our_env[n_env++] = x;
1167 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1169 our_env[n_env++] = x;
1172 if (watchdog_usec > 0) {
1173 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1175 our_env[n_env++] = x;
1177 if (asprintf(&x, "WATCHDOG_USEC=%llu", (unsigned long long) watchdog_usec) < 0)
1179 our_env[n_env++] = x;
1183 x = strappend("HOME=", home);
1186 our_env[n_env++] = x;
1190 x = strappend("LOGNAME=", username);
1193 our_env[n_env++] = x;
1195 x = strappend("USER=", username);
1198 our_env[n_env++] = x;
1202 x = strappend("SHELL=", shell);
1205 our_env[n_env++] = x;
1208 if (is_terminal_input(c->std_input) ||
1209 c->std_output == EXEC_OUTPUT_TTY ||
1210 c->std_error == EXEC_OUTPUT_TTY ||
1213 x = strdup(default_term_for_tty(tty_path(c)));
1216 our_env[n_env++] = x;
1219 our_env[n_env++] = NULL;
1220 assert(n_env <= 10);
1228 int exec_spawn(ExecCommand *command,
1230 ExecContext *context,
1231 int fds[], unsigned n_fds,
1233 bool apply_permissions,
1235 bool apply_tty_stdin,
1237 CGroupControllerMask cgroup_supported,
1238 const char *cgroup_path,
1239 const char *runtime_prefix,
1240 const char *unit_id,
1241 usec_t watchdog_usec,
1243 ExecRuntime *runtime,
1246 _cleanup_strv_free_ char **files_env = NULL;
1255 assert(fds || n_fds <= 0);
1257 if (context->std_input == EXEC_INPUT_SOCKET ||
1258 context->std_output == EXEC_OUTPUT_SOCKET ||
1259 context->std_error == EXEC_OUTPUT_SOCKET) {
1271 r = exec_context_load_environment(context, &files_env);
1273 log_struct_unit(LOG_ERR,
1275 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1282 argv = command->argv;
1284 line = exec_command_line(argv);
1288 log_struct_unit(LOG_DEBUG,
1290 "EXECUTABLE=%s", command->path,
1291 "MESSAGE=About to execute: %s", line,
1300 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1301 const char *username = NULL, *home = NULL, *shell = NULL;
1302 unsigned n_dont_close = 0;
1303 int dont_close[n_fds + 3];
1304 uid_t uid = (uid_t) -1;
1305 gid_t gid = (gid_t) -1;
1311 rename_process_from_path(command->path);
1313 /* We reset exactly these signals, since they are the
1314 * only ones we set to SIG_IGN in the main daemon. All
1315 * others we leave untouched because we set them to
1316 * SIG_DFL or a valid handler initially, both of which
1317 * will be demoted to SIG_DFL. */
1318 default_signals(SIGNALS_CRASH_HANDLER,
1319 SIGNALS_IGNORE, -1);
1321 if (context->ignore_sigpipe)
1322 ignore_signals(SIGPIPE, -1);
1324 assert_se(sigemptyset(&ss) == 0);
1325 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1327 r = EXIT_SIGNAL_MASK;
1332 do_idle_pipe_dance(idle_pipe);
1334 /* Close sockets very early to make sure we don't
1335 * block init reexecution because it cannot bind its
1340 dont_close[n_dont_close++] = socket_fd;
1342 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1343 n_dont_close += n_fds;
1346 if (runtime->netns_storage_socket[0] >= 0)
1347 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1348 if (runtime->netns_storage_socket[1] >= 0)
1349 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1352 err = close_all_fds(dont_close, n_dont_close);
1358 if (!context->same_pgrp)
1365 if (context->tcpwrap_name) {
1367 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1373 for (i = 0; i < (int) n_fds; i++) {
1374 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1382 exec_context_tty_reset(context);
1384 if (confirm_spawn) {
1387 err = ask_for_confirmation(&response, argv);
1388 if (err == -ETIMEDOUT)
1389 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1391 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1392 else if (response == 's') {
1393 write_confirm_message("Skipping execution.\n");
1397 } else if (response == 'n') {
1398 write_confirm_message("Failing execution.\n");
1404 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1405 * must sure to drop O_NONBLOCK */
1407 fd_nonblock(socket_fd, false);
1409 err = setup_input(context, socket_fd, apply_tty_stdin);
1415 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1421 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), unit_id, apply_tty_stdin);
1428 err = cg_attach_everywhere(cgroup_supported, cgroup_path, 0);
1435 if (context->oom_score_adjust_set) {
1438 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1441 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1443 r = EXIT_OOM_ADJUST;
1448 if (context->nice_set)
1449 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1455 if (context->cpu_sched_set) {
1456 struct sched_param param = {
1457 .sched_priority = context->cpu_sched_priority,
1460 r = sched_setscheduler(0,
1461 context->cpu_sched_policy |
1462 (context->cpu_sched_reset_on_fork ?
1463 SCHED_RESET_ON_FORK : 0),
1467 r = EXIT_SETSCHEDULER;
1472 if (context->cpuset)
1473 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1475 r = EXIT_CPUAFFINITY;
1479 if (context->ioprio_set)
1480 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1486 if (context->timer_slack_nsec != (nsec_t) -1)
1487 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1489 r = EXIT_TIMERSLACK;
1493 if (context->personality != 0xffffffffUL)
1494 if (personality(context->personality) < 0) {
1496 r = EXIT_PERSONALITY;
1500 if (context->utmp_id)
1501 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1503 if (context->user) {
1504 username = context->user;
1505 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1511 if (is_terminal_input(context->std_input)) {
1512 err = chown_terminal(STDIN_FILENO, uid);
1521 if (cgroup_path && context->user && context->pam_name) {
1522 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1529 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1537 if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
1540 STRV_FOREACH(rt, context->runtime_directory) {
1541 _cleanup_free_ char *p;
1543 p = strjoin(runtime_prefix, "/", *rt, NULL);
1545 r = EXIT_RUNTIME_DIRECTORY;
1550 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1552 r = EXIT_RUNTIME_DIRECTORY;
1558 if (apply_permissions) {
1559 err = enforce_groups(context, username, gid);
1566 umask(context->umask);
1569 if (apply_permissions && context->pam_name && username) {
1570 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1577 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1578 err = setup_netns(runtime->netns_storage_socket);
1585 if (!strv_isempty(context->read_write_dirs) ||
1586 !strv_isempty(context->read_only_dirs) ||
1587 !strv_isempty(context->inaccessible_dirs) ||
1588 context->mount_flags != 0 ||
1589 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1590 context->private_devices) {
1592 char *tmp = NULL, *var = NULL;
1594 /* The runtime struct only contains the parent
1595 * of the private /tmp, which is
1596 * non-accessible to world users. Inside of it
1597 * there's a /tmp that is sticky, and that's
1598 * the one we want to use here. */
1600 if (context->private_tmp && runtime) {
1601 if (runtime->tmp_dir)
1602 tmp = strappenda(runtime->tmp_dir, "/tmp");
1603 if (runtime->var_tmp_dir)
1604 var = strappenda(runtime->var_tmp_dir, "/tmp");
1607 err = setup_namespace(
1608 context->read_write_dirs,
1609 context->read_only_dirs,
1610 context->inaccessible_dirs,
1613 context->private_devices,
1614 context->mount_flags);
1623 if (context->root_directory)
1624 if (chroot(context->root_directory) < 0) {
1630 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1636 _cleanup_free_ char *d = NULL;
1638 if (asprintf(&d, "%s/%s",
1639 context->root_directory ? context->root_directory : "",
1640 context->working_directory ? context->working_directory : "") < 0) {
1653 /* We repeat the fd closing here, to make sure that
1654 * nothing is leaked from the PAM modules */
1655 err = close_all_fds(fds, n_fds);
1657 err = shift_fds(fds, n_fds);
1659 err = flags_fds(fds, n_fds, context->non_blocking);
1665 if (apply_permissions) {
1667 for (i = 0; i < _RLIMIT_MAX; i++) {
1668 if (!context->rlimit[i])
1671 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1678 if (context->capability_bounding_set_drop) {
1679 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1681 r = EXIT_CAPABILITIES;
1686 if (context->user) {
1687 err = enforce_user(context, uid);
1694 /* PR_GET_SECUREBITS is not privileged, while
1695 * PR_SET_SECUREBITS is. So to suppress
1696 * potential EPERMs we'll try not to call
1697 * PR_SET_SECUREBITS unless necessary. */
1698 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1699 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1701 r = EXIT_SECUREBITS;
1705 if (context->capabilities)
1706 if (cap_set_proc(context->capabilities) < 0) {
1708 r = EXIT_CAPABILITIES;
1712 if (context->no_new_privileges)
1713 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1715 r = EXIT_NO_NEW_PRIVILEGES;
1720 if (context->address_families_whitelist ||
1721 !set_isempty(context->address_families)) {
1722 err = apply_address_families(context);
1724 r = EXIT_ADDRESS_FAMILIES;
1729 if (context->syscall_whitelist ||
1730 !set_isempty(context->syscall_filter) ||
1731 !set_isempty(context->syscall_archs)) {
1732 err = apply_seccomp(context);
1741 if (context->selinux_context && use_selinux()) {
1742 err = setexeccon(context->selinux_context);
1743 if (err < 0 && !context->selinux_context_ignore) {
1744 r = EXIT_SELINUX_CONTEXT;
1750 #ifdef HAVE_APPARMOR
1751 if (context->apparmor_profile && use_apparmor()) {
1752 err = aa_change_onexec(context->apparmor_profile);
1753 if (err < 0 && !context->apparmor_profile_ignore) {
1754 r = EXIT_APPARMOR_PROFILE;
1761 err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
1767 final_env = strv_env_merge(5,
1770 context->environment,
1780 final_argv = replace_env_argv(argv, final_env);
1787 final_env = strv_env_clean(final_env);
1789 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1790 line = exec_command_line(final_argv);
1793 log_struct_unit(LOG_DEBUG,
1795 "EXECUTABLE=%s", command->path,
1796 "MESSAGE=Executing: %s", line,
1803 execve(command->path, final_argv, final_env);
1810 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1811 "EXECUTABLE=%s", command->path,
1812 "MESSAGE=Failed at step %s spawning %s: %s",
1813 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1814 command->path, strerror(-err),
1823 log_struct_unit(LOG_DEBUG,
1825 "MESSAGE=Forked %s as "PID_FMT,
1829 /* We add the new process to the cgroup both in the child (so
1830 * that we can be sure that no user code is ever executed
1831 * outside of the cgroup) and in the parent (so that we can be
1832 * sure that when we kill the cgroup the process will be
1835 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1837 exec_status_start(&command->exec_status, pid);
1843 void exec_context_init(ExecContext *c) {
1847 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1848 c->cpu_sched_policy = SCHED_OTHER;
1849 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1850 c->syslog_level_prefix = true;
1851 c->ignore_sigpipe = true;
1852 c->timer_slack_nsec = (nsec_t) -1;
1853 c->personality = 0xffffffffUL;
1854 c->runtime_directory_mode = 0755;
1857 void exec_context_done(ExecContext *c) {
1862 strv_free(c->environment);
1863 c->environment = NULL;
1865 strv_free(c->environment_files);
1866 c->environment_files = NULL;
1868 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1870 c->rlimit[l] = NULL;
1873 free(c->working_directory);
1874 c->working_directory = NULL;
1875 free(c->root_directory);
1876 c->root_directory = NULL;
1881 free(c->tcpwrap_name);
1882 c->tcpwrap_name = NULL;
1884 free(c->syslog_identifier);
1885 c->syslog_identifier = NULL;
1893 strv_free(c->supplementary_groups);
1894 c->supplementary_groups = NULL;
1899 if (c->capabilities) {
1900 cap_free(c->capabilities);
1901 c->capabilities = NULL;
1904 strv_free(c->read_only_dirs);
1905 c->read_only_dirs = NULL;
1907 strv_free(c->read_write_dirs);
1908 c->read_write_dirs = NULL;
1910 strv_free(c->inaccessible_dirs);
1911 c->inaccessible_dirs = NULL;
1914 CPU_FREE(c->cpuset);
1919 free(c->selinux_context);
1920 c->selinux_context = NULL;
1922 free(c->apparmor_profile);
1923 c->apparmor_profile = NULL;
1925 set_free(c->syscall_filter);
1926 c->syscall_filter = NULL;
1928 set_free(c->syscall_archs);
1929 c->syscall_archs = NULL;
1931 set_free(c->address_families);
1932 c->address_families = NULL;
1934 strv_free(c->runtime_directory);
1935 c->runtime_directory = NULL;
1938 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1943 if (!runtime_prefix)
1946 STRV_FOREACH(i, c->runtime_directory) {
1947 _cleanup_free_ char *p;
1949 p = strjoin(runtime_prefix, "/", *i, NULL);
1953 /* We execute this synchronously, since we need to be
1954 * sure this is gone when we start the service
1956 rm_rf_dangerous(p, false, true, false);
1962 void exec_command_done(ExecCommand *c) {
1972 void exec_command_done_array(ExecCommand *c, unsigned n) {
1975 for (i = 0; i < n; i++)
1976 exec_command_done(c+i);
1979 void exec_command_free_list(ExecCommand *c) {
1983 LIST_REMOVE(command, c, i);
1984 exec_command_done(i);
1989 void exec_command_free_array(ExecCommand **c, unsigned n) {
1992 for (i = 0; i < n; i++) {
1993 exec_command_free_list(c[i]);
1998 int exec_context_load_environment(const ExecContext *c, char ***l) {
1999 char **i, **r = NULL;
2004 STRV_FOREACH(i, c->environment_files) {
2007 bool ignore = false;
2009 _cleanup_globfree_ glob_t pglob = {};
2019 if (!path_is_absolute(fn)) {
2027 /* Filename supports globbing, take all matching files */
2029 if (glob(fn, 0, NULL, &pglob) != 0) {
2034 return errno ? -errno : -EINVAL;
2036 count = pglob.gl_pathc;
2044 for (n = 0; n < count; n++) {
2045 k = load_env_file(pglob.gl_pathv[n], NULL, &p);
2053 /* Log invalid environment variables with filename */
2055 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2062 m = strv_env_merge(2, r, p);
2078 static bool tty_may_match_dev_console(const char *tty) {
2079 char *active = NULL, *console;
2082 if (startswith(tty, "/dev/"))
2085 /* trivial identity? */
2086 if (streq(tty, "console"))
2089 console = resolve_dev_console(&active);
2090 /* if we could not resolve, assume it may */
2094 /* "tty0" means the active VC, so it may be the same sometimes */
2095 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2101 bool exec_context_may_touch_console(ExecContext *ec) {
2102 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2103 is_terminal_input(ec->std_input) ||
2104 is_terminal_output(ec->std_output) ||
2105 is_terminal_output(ec->std_error)) &&
2106 tty_may_match_dev_console(tty_path(ec));
2109 static void strv_fprintf(FILE *f, char **l) {
2115 fprintf(f, " %s", *g);
2118 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2125 prefix = strempty(prefix);
2129 "%sWorkingDirectory: %s\n"
2130 "%sRootDirectory: %s\n"
2131 "%sNonBlocking: %s\n"
2132 "%sPrivateTmp: %s\n"
2133 "%sPrivateNetwork: %s\n"
2134 "%sPrivateDevices: %s\n"
2135 "%sIgnoreSIGPIPE: %s\n",
2137 prefix, c->working_directory ? c->working_directory : "/",
2138 prefix, c->root_directory ? c->root_directory : "/",
2139 prefix, yes_no(c->non_blocking),
2140 prefix, yes_no(c->private_tmp),
2141 prefix, yes_no(c->private_network),
2142 prefix, yes_no(c->private_devices),
2143 prefix, yes_no(c->ignore_sigpipe));
2145 STRV_FOREACH(e, c->environment)
2146 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2148 STRV_FOREACH(e, c->environment_files)
2149 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2151 if (c->tcpwrap_name)
2153 "%sTCPWrapName: %s\n",
2154 prefix, c->tcpwrap_name);
2161 if (c->oom_score_adjust_set)
2163 "%sOOMScoreAdjust: %i\n",
2164 prefix, c->oom_score_adjust);
2166 for (i = 0; i < RLIM_NLIMITS; i++)
2168 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
2170 if (c->ioprio_set) {
2171 _cleanup_free_ char *class_str = NULL;
2173 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2175 "%sIOSchedulingClass: %s\n"
2176 "%sIOPriority: %i\n",
2177 prefix, strna(class_str),
2178 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2181 if (c->cpu_sched_set) {
2182 _cleanup_free_ char *policy_str = NULL;
2184 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2186 "%sCPUSchedulingPolicy: %s\n"
2187 "%sCPUSchedulingPriority: %i\n"
2188 "%sCPUSchedulingResetOnFork: %s\n",
2189 prefix, strna(policy_str),
2190 prefix, c->cpu_sched_priority,
2191 prefix, yes_no(c->cpu_sched_reset_on_fork));
2195 fprintf(f, "%sCPUAffinity:", prefix);
2196 for (i = 0; i < c->cpuset_ncpus; i++)
2197 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2198 fprintf(f, " %u", i);
2202 if (c->timer_slack_nsec != (nsec_t) -1)
2203 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2206 "%sStandardInput: %s\n"
2207 "%sStandardOutput: %s\n"
2208 "%sStandardError: %s\n",
2209 prefix, exec_input_to_string(c->std_input),
2210 prefix, exec_output_to_string(c->std_output),
2211 prefix, exec_output_to_string(c->std_error));
2217 "%sTTYVHangup: %s\n"
2218 "%sTTYVTDisallocate: %s\n",
2219 prefix, c->tty_path,
2220 prefix, yes_no(c->tty_reset),
2221 prefix, yes_no(c->tty_vhangup),
2222 prefix, yes_no(c->tty_vt_disallocate));
2224 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2225 c->std_output == EXEC_OUTPUT_KMSG ||
2226 c->std_output == EXEC_OUTPUT_JOURNAL ||
2227 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2228 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2229 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2230 c->std_error == EXEC_OUTPUT_SYSLOG ||
2231 c->std_error == EXEC_OUTPUT_KMSG ||
2232 c->std_error == EXEC_OUTPUT_JOURNAL ||
2233 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2234 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2235 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2237 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2239 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2240 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2243 "%sSyslogFacility: %s\n"
2244 "%sSyslogLevel: %s\n",
2245 prefix, strna(fac_str),
2246 prefix, strna(lvl_str));
2249 if (c->capabilities) {
2250 _cleanup_cap_free_charp_ char *t;
2252 t = cap_to_text(c->capabilities, NULL);
2254 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2258 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2260 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2261 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2262 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2263 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2264 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2265 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2267 if (c->capability_bounding_set_drop) {
2269 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2271 for (l = 0; l <= cap_last_cap(); l++)
2272 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2273 _cleanup_cap_free_charp_ char *t;
2277 fprintf(f, " %s", t);
2284 fprintf(f, "%sUser: %s\n", prefix, c->user);
2286 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2288 if (strv_length(c->supplementary_groups) > 0) {
2289 fprintf(f, "%sSupplementaryGroups:", prefix);
2290 strv_fprintf(f, c->supplementary_groups);
2295 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2297 if (strv_length(c->read_write_dirs) > 0) {
2298 fprintf(f, "%sReadWriteDirs:", prefix);
2299 strv_fprintf(f, c->read_write_dirs);
2303 if (strv_length(c->read_only_dirs) > 0) {
2304 fprintf(f, "%sReadOnlyDirs:", prefix);
2305 strv_fprintf(f, c->read_only_dirs);
2309 if (strv_length(c->inaccessible_dirs) > 0) {
2310 fprintf(f, "%sInaccessibleDirs:", prefix);
2311 strv_fprintf(f, c->inaccessible_dirs);
2317 "%sUtmpIdentifier: %s\n",
2318 prefix, c->utmp_id);
2320 if (c->selinux_context)
2322 "%sSELinuxContext: %s%s\n",
2323 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2325 if (c->personality != 0xffffffffUL)
2327 "%sPersonality: %s\n",
2328 prefix, strna(personality_to_string(c->personality)));
2330 if (c->syscall_filter) {
2338 "%sSystemCallFilter: ",
2341 if (!c->syscall_whitelist)
2345 SET_FOREACH(id, c->syscall_filter, j) {
2346 _cleanup_free_ char *name = NULL;
2353 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2354 fputs(strna(name), f);
2361 if (c->syscall_archs) {
2368 "%sSystemCallArchitectures:",
2372 SET_FOREACH(id, c->syscall_archs, j)
2373 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2378 if (c->syscall_errno != 0)
2380 "%sSystemCallErrorNumber: %s\n",
2381 prefix, strna(errno_to_name(c->syscall_errno)));
2383 if (c->apparmor_profile)
2385 "%sAppArmorProfile: %s%s\n",
2386 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2389 void exec_status_start(ExecStatus *s, pid_t pid) {
2394 dual_timestamp_get(&s->start_timestamp);
2397 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2400 if (s->pid && s->pid != pid)
2404 dual_timestamp_get(&s->exit_timestamp);
2410 if (context->utmp_id)
2411 utmp_put_dead_process(context->utmp_id, pid, code, status);
2413 exec_context_tty_reset(context);
2417 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2418 char buf[FORMAT_TIMESTAMP_MAX];
2430 "%sPID: "PID_FMT"\n",
2433 if (s->start_timestamp.realtime > 0)
2435 "%sStart Timestamp: %s\n",
2436 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2438 if (s->exit_timestamp.realtime > 0)
2440 "%sExit Timestamp: %s\n"
2442 "%sExit Status: %i\n",
2443 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2444 prefix, sigchld_code_to_string(s->code),
2448 char *exec_command_line(char **argv) {
2456 STRV_FOREACH(a, argv)
2459 if (!(n = new(char, k)))
2463 STRV_FOREACH(a, argv) {
2470 if (strpbrk(*a, WHITESPACE)) {
2481 /* FIXME: this doesn't really handle arguments that have
2482 * spaces and ticks in them */
2487 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2489 const char *prefix2;
2498 p2 = strappend(prefix, "\t");
2499 prefix2 = p2 ? p2 : prefix;
2501 cmd = exec_command_line(c->argv);
2504 "%sCommand Line: %s\n",
2505 prefix, cmd ? cmd : strerror(ENOMEM));
2509 exec_status_dump(&c->exec_status, f, prefix2);
2514 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2520 LIST_FOREACH(command, c, c)
2521 exec_command_dump(c, f, prefix);
2524 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2531 /* It's kind of important, that we keep the order here */
2532 LIST_FIND_TAIL(command, *l, end);
2533 LIST_INSERT_AFTER(command, *l, end, e);
2538 int exec_command_set(ExecCommand *c, const char *path, ...) {
2546 l = strv_new_ap(path, ap);
2567 static int exec_runtime_allocate(ExecRuntime **rt) {
2572 *rt = new0(ExecRuntime, 1);
2577 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2582 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2592 if (!c->private_network && !c->private_tmp)
2595 r = exec_runtime_allocate(rt);
2599 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2600 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2604 if (c->private_tmp && !(*rt)->tmp_dir) {
2605 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2613 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2615 assert(r->n_ref > 0);
2621 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2626 assert(r->n_ref > 0);
2629 if (r->n_ref <= 0) {
2631 free(r->var_tmp_dir);
2632 close_pipe(r->netns_storage_socket);
2639 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2648 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2650 if (rt->var_tmp_dir)
2651 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2653 if (rt->netns_storage_socket[0] >= 0) {
2656 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2660 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2663 if (rt->netns_storage_socket[1] >= 0) {
2666 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2670 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2676 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2683 if (streq(key, "tmp-dir")) {
2686 r = exec_runtime_allocate(rt);
2690 copy = strdup(value);
2694 free((*rt)->tmp_dir);
2695 (*rt)->tmp_dir = copy;
2697 } else if (streq(key, "var-tmp-dir")) {
2700 r = exec_runtime_allocate(rt);
2704 copy = strdup(value);
2708 free((*rt)->var_tmp_dir);
2709 (*rt)->var_tmp_dir = copy;
2711 } else if (streq(key, "netns-socket-0")) {
2714 r = exec_runtime_allocate(rt);
2718 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2719 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2721 safe_close((*rt)->netns_storage_socket[0]);
2722 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2724 } else if (streq(key, "netns-socket-1")) {
2727 r = exec_runtime_allocate(rt);
2731 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2732 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2734 safe_close((*rt)->netns_storage_socket[1]);
2735 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2743 static void *remove_tmpdir_thread(void *p) {
2744 _cleanup_free_ char *path = p;
2746 rm_rf_dangerous(path, false, true, false);
2750 void exec_runtime_destroy(ExecRuntime *rt) {
2756 /* If there are multiple users of this, let's leave the stuff around */
2761 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2763 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2765 log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2772 if (rt->var_tmp_dir) {
2773 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2775 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2777 log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2778 free(rt->var_tmp_dir);
2781 rt->var_tmp_dir = NULL;
2784 close_pipe(rt->netns_storage_socket);
2787 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2788 [EXEC_INPUT_NULL] = "null",
2789 [EXEC_INPUT_TTY] = "tty",
2790 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2791 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2792 [EXEC_INPUT_SOCKET] = "socket"
2795 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2797 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2798 [EXEC_OUTPUT_INHERIT] = "inherit",
2799 [EXEC_OUTPUT_NULL] = "null",
2800 [EXEC_OUTPUT_TTY] = "tty",
2801 [EXEC_OUTPUT_SYSLOG] = "syslog",
2802 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2803 [EXEC_OUTPUT_KMSG] = "kmsg",
2804 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2805 [EXEC_OUTPUT_JOURNAL] = "journal",
2806 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2807 [EXEC_OUTPUT_SOCKET] = "socket"
2810 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);