1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
74 #include "utmp-wtmp.h"
76 #include "path-util.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
91 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
108 /* Modifies the fds array! (sorts it) */
118 for (i = start; i < (int) n_fds; i++) {
121 /* Already at right index? */
125 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
131 /* Hmm, the fd we wanted isn't free? Then
132 * let's remember that and try again from here*/
133 if (nfd != i+3 && restart_from < 0)
137 if (restart_from < 0)
140 start = restart_from;
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
155 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157 for (i = 0; i < n_fds; i++) {
159 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
162 /* We unconditionally drop FD_CLOEXEC from the fds,
163 * since after all we want to pass these fds to our
166 if ((r = fd_cloexec(fds[i], false)) < 0)
173 _pure_ static const char *tty_path(const ExecContext *context) {
176 if (context->tty_path)
177 return context->tty_path;
179 return "/dev/console";
182 static void exec_context_tty_reset(const ExecContext *context) {
185 if (context->tty_vhangup)
186 terminal_vhangup(tty_path(context));
188 if (context->tty_reset)
189 reset_terminal(tty_path(context));
191 if (context->tty_vt_disallocate && context->tty_path)
192 vt_disallocate(context->tty_path);
195 static bool is_terminal_output(ExecOutput o) {
197 o == EXEC_OUTPUT_TTY ||
198 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
203 static int open_null_as(int flags, int nfd) {
208 fd = open("/dev/null", flags|O_NOCTTY);
213 r = dup2(fd, nfd) < 0 ? -errno : nfd;
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
223 union sockaddr_union sa = {
224 .un.sun_family = AF_UNIX,
225 .un.sun_path = "/run/systemd/journal/stdout",
229 assert(output < _EXEC_OUTPUT_MAX);
233 fd = socket(AF_UNIX, SOCK_STREAM, 0);
237 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
243 if (shutdown(fd, SHUT_RD) < 0) {
248 fd_inc_sndbuf(fd, SNDBUF_SIZE);
258 context->syslog_identifier ? context->syslog_identifier : ident,
260 context->syslog_priority,
261 !!context->syslog_level_prefix,
262 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264 is_terminal_output(output));
267 r = dup2(fd, nfd) < 0 ? -errno : nfd;
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
280 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
284 r = dup2(fd, nfd) < 0 ? -errno : nfd;
292 static bool is_terminal_input(ExecInput i) {
294 i == EXEC_INPUT_TTY ||
295 i == EXEC_INPUT_TTY_FORCE ||
296 i == EXEC_INPUT_TTY_FAIL;
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
301 if (is_terminal_input(std_input) && !apply_tty_stdin)
302 return EXEC_INPUT_NULL;
304 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305 return EXEC_INPUT_NULL;
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
312 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313 return EXEC_OUTPUT_INHERIT;
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
323 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
327 case EXEC_INPUT_NULL:
328 return open_null_as(O_RDONLY, STDIN_FILENO);
331 case EXEC_INPUT_TTY_FORCE:
332 case EXEC_INPUT_TTY_FAIL: {
335 fd = acquire_terminal(tty_path(context),
336 i == EXEC_INPUT_TTY_FAIL,
337 i == EXEC_INPUT_TTY_FORCE,
343 if (fd != STDIN_FILENO) {
344 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352 case EXEC_INPUT_SOCKET:
353 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
356 assert_not_reached("Unknown input type");
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
368 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369 o = fixup_output(context->std_output, socket_fd);
371 if (fileno == STDERR_FILENO) {
373 e = fixup_output(context->std_error, socket_fd);
375 /* This expects the input and output are already set up */
377 /* Don't change the stderr file descriptor if we inherit all
378 * the way and are not on a tty */
379 if (e == EXEC_OUTPUT_INHERIT &&
380 o == EXEC_OUTPUT_INHERIT &&
381 i == EXEC_INPUT_NULL &&
382 !is_terminal_input(context->std_input) &&
386 /* Duplicate from stdout if possible */
387 if (e == o || e == EXEC_OUTPUT_INHERIT)
388 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
392 } else if (o == EXEC_OUTPUT_INHERIT) {
393 /* If input got downgraded, inherit the original value */
394 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
397 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398 if (i != EXEC_INPUT_NULL)
399 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
405 /* We need to open /dev/null here anew, to get the right access mode. */
406 return open_null_as(O_WRONLY, fileno);
411 case EXEC_OUTPUT_NULL:
412 return open_null_as(O_WRONLY, fileno);
414 case EXEC_OUTPUT_TTY:
415 if (is_terminal_input(i))
416 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
418 /* We don't reset the terminal if this is just about output */
419 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
421 case EXEC_OUTPUT_SYSLOG:
422 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423 case EXEC_OUTPUT_KMSG:
424 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425 case EXEC_OUTPUT_JOURNAL:
426 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427 r = connect_logger_as(context, o, ident, unit_id, fileno);
429 log_struct_unit(LOG_CRIT, unit_id,
430 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
431 fileno == STDOUT_FILENO ? "out" : "err",
432 unit_id, strerror(-r),
435 r = open_null_as(O_WRONLY, fileno);
439 case EXEC_OUTPUT_SOCKET:
440 assert(socket_fd >= 0);
441 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
444 assert_not_reached("Unknown error type");
448 static int chown_terminal(int fd, uid_t uid) {
453 /* This might fail. What matters are the results. */
454 (void) fchown(fd, uid, -1);
455 (void) fchmod(fd, TTY_MODE);
457 if (fstat(fd, &st) < 0)
460 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
466 static int setup_confirm_stdio(int *_saved_stdin,
467 int *_saved_stdout) {
468 int fd = -1, saved_stdin, saved_stdout = -1, r;
470 assert(_saved_stdin);
471 assert(_saved_stdout);
473 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
477 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
478 if (saved_stdout < 0) {
483 fd = acquire_terminal(
488 DEFAULT_CONFIRM_USEC);
494 r = chown_terminal(fd, getuid());
498 if (dup2(fd, STDIN_FILENO) < 0) {
503 if (dup2(fd, STDOUT_FILENO) < 0) {
511 *_saved_stdin = saved_stdin;
512 *_saved_stdout = saved_stdout;
517 safe_close(saved_stdout);
518 safe_close(saved_stdin);
524 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
525 _cleanup_close_ int fd = -1;
530 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
534 va_start(ap, format);
535 vdprintf(fd, format, ap);
541 static int restore_confirm_stdio(int *saved_stdin,
547 assert(saved_stdout);
551 if (*saved_stdin >= 0)
552 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
555 if (*saved_stdout >= 0)
556 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
559 safe_close(*saved_stdin);
560 safe_close(*saved_stdout);
565 static int ask_for_confirmation(char *response, char **argv) {
566 int saved_stdout = -1, saved_stdin = -1, r;
567 _cleanup_free_ char *line = NULL;
569 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
573 line = exec_command_line(argv);
577 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
579 restore_confirm_stdio(&saved_stdin, &saved_stdout);
584 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
585 bool keep_groups = false;
590 /* Lookup and set GID and supplementary group list. Here too
591 * we avoid NSS lookups for gid=0. */
593 if (context->group || username) {
595 if (context->group) {
596 const char *g = context->group;
598 if ((r = get_group_creds(&g, &gid)) < 0)
602 /* First step, initialize groups from /etc/groups */
603 if (username && gid != 0) {
604 if (initgroups(username, gid) < 0)
610 /* Second step, set our gids */
611 if (setresgid(gid, gid, gid) < 0)
615 if (context->supplementary_groups) {
620 /* Final step, initialize any manually set supplementary groups */
621 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
623 if (!(gids = new(gid_t, ngroups_max)))
627 if ((k = getgroups(ngroups_max, gids)) < 0) {
634 STRV_FOREACH(i, context->supplementary_groups) {
637 if (k >= ngroups_max) {
643 r = get_group_creds(&g, gids+k);
652 if (setgroups(k, gids) < 0) {
663 static int enforce_user(const ExecContext *context, uid_t uid) {
666 /* Sets (but doesn't lookup) the uid and make sure we keep the
667 * capabilities while doing so. */
669 if (context->capabilities) {
670 _cleanup_cap_free_ cap_t d = NULL;
671 static const cap_value_t bits[] = {
672 CAP_SETUID, /* Necessary so that we can run setresuid() below */
673 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
676 /* First step: If we need to keep capabilities but
677 * drop privileges we need to make sure we keep our
678 * caps, while we drop privileges. */
680 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
682 if (prctl(PR_GET_SECUREBITS) != sb)
683 if (prctl(PR_SET_SECUREBITS, sb) < 0)
687 /* Second step: set the capabilities. This will reduce
688 * the capabilities to the minimum we need. */
690 d = cap_dup(context->capabilities);
694 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
695 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
698 if (cap_set_proc(d) < 0)
702 /* Third step: actually set the uids */
703 if (setresuid(uid, uid, uid) < 0)
706 /* At this point we should have all necessary capabilities but
707 are otherwise a normal user. However, the caps might got
708 corrupted due to the setresuid() so we need clean them up
709 later. This is done outside of this call. */
716 static int null_conv(
718 const struct pam_message **msg,
719 struct pam_response **resp,
722 /* We don't support conversations */
727 static int setup_pam(
733 int fds[], unsigned n_fds) {
735 static const struct pam_conv conv = {
740 pam_handle_t *handle = NULL;
742 int pam_code = PAM_SUCCESS;
745 bool close_session = false;
746 pid_t pam_pid = 0, parent_pid;
753 /* We set up PAM in the parent process, then fork. The child
754 * will then stay around until killed via PR_GET_PDEATHSIG or
755 * systemd via the cgroup logic. It will then remove the PAM
756 * session again. The parent process will exec() the actual
757 * daemon. We do things this way to ensure that the main PID
758 * of the daemon is the one we initially fork()ed. */
760 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
763 pam_code = pam_start(name, user, &conv, &handle);
764 if (pam_code != PAM_SUCCESS) {
770 pam_code = pam_set_item(handle, PAM_TTY, tty);
771 if (pam_code != PAM_SUCCESS)
775 pam_code = pam_acct_mgmt(handle, flags);
776 if (pam_code != PAM_SUCCESS)
779 pam_code = pam_open_session(handle, flags);
780 if (pam_code != PAM_SUCCESS)
783 close_session = true;
785 e = pam_getenvlist(handle);
787 pam_code = PAM_BUF_ERR;
791 /* Block SIGTERM, so that we know that it won't get lost in
793 if (sigemptyset(&ss) < 0 ||
794 sigaddset(&ss, SIGTERM) < 0 ||
795 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
798 parent_pid = getpid();
808 /* The child's job is to reset the PAM session on
811 /* This string must fit in 10 chars (i.e. the length
812 * of "/sbin/init"), to look pretty in /bin/ps */
813 rename_process("(sd-pam)");
815 /* Make sure we don't keep open the passed fds in this
816 child. We assume that otherwise only those fds are
817 open here that have been opened by PAM. */
818 close_many(fds, n_fds);
820 /* Drop privileges - we don't need any to pam_close_session
821 * and this will make PR_SET_PDEATHSIG work in most cases.
822 * If this fails, ignore the error - but expect sd-pam threads
823 * to fail to exit normally */
824 if (setresuid(uid, uid, uid) < 0)
825 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
827 /* Wait until our parent died. This will only work if
828 * the above setresuid() succeeds, otherwise the kernel
829 * will not allow unprivileged parents kill their privileged
830 * children this way. We rely on the control groups kill logic
831 * to do the rest for us. */
832 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
835 /* Check if our parent process might already have
837 if (getppid() == parent_pid) {
839 if (sigwait(&ss, &sig) < 0) {
846 assert(sig == SIGTERM);
851 /* If our parent died we'll end the session */
852 if (getppid() != parent_pid) {
853 pam_code = pam_close_session(handle, flags);
854 if (pam_code != PAM_SUCCESS)
861 pam_end(handle, pam_code | flags);
865 /* If the child was forked off successfully it will do all the
866 * cleanups, so forget about the handle here. */
869 /* Unblock SIGTERM again in the parent */
870 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
873 /* We close the log explicitly here, since the PAM modules
874 * might have opened it, but we don't want this fd around. */
883 if (pam_code != PAM_SUCCESS) {
884 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
885 err = -EPERM; /* PAM errors do not map to errno */
887 log_error("PAM failed: %m");
893 pam_code = pam_close_session(handle, flags);
895 pam_end(handle, pam_code | flags);
903 kill(pam_pid, SIGTERM);
904 kill(pam_pid, SIGCONT);
911 static void rename_process_from_path(const char *path) {
912 char process_name[11];
916 /* This resulting string must fit in 10 chars (i.e. the length
917 * of "/sbin/init") to look pretty in /bin/ps */
921 rename_process("(...)");
927 /* The end of the process name is usually more
928 * interesting, since the first bit might just be
934 process_name[0] = '(';
935 memcpy(process_name+1, p, l);
936 process_name[1+l] = ')';
937 process_name[1+l+1] = 0;
939 rename_process(process_name);
944 static int apply_seccomp(const ExecContext *c) {
945 uint32_t negative_action, action;
946 scmp_filter_ctx *seccomp;
953 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
955 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
959 if (c->syscall_archs) {
961 SET_FOREACH(id, c->syscall_archs, i) {
962 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
970 r = seccomp_add_secondary_archs(seccomp);
975 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
976 SET_FOREACH(id, c->syscall_filter, i) {
977 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
982 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
986 r = seccomp_load(seccomp);
989 seccomp_release(seccomp);
993 static int apply_address_families(const ExecContext *c) {
994 scmp_filter_ctx *seccomp;
1000 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1004 r = seccomp_add_secondary_archs(seccomp);
1008 if (c->address_families_whitelist) {
1009 int af, first = 0, last = 0;
1012 /* If this is a whitelist, we first block the address
1013 * families that are out of range and then everything
1014 * that is not in the set. First, we find the lowest
1015 * and highest address family in the set. */
1017 SET_FOREACH(afp, c->address_families, i) {
1018 af = PTR_TO_INT(afp);
1020 if (af <= 0 || af >= af_max())
1023 if (first == 0 || af < first)
1026 if (last == 0 || af > last)
1030 assert((first == 0) == (last == 0));
1034 /* No entries in the valid range, block everything */
1035 r = seccomp_rule_add(
1037 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1045 /* Block everything below the first entry */
1046 r = seccomp_rule_add(
1048 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1051 SCMP_A0(SCMP_CMP_LT, first));
1055 /* Block everything above the last entry */
1056 r = seccomp_rule_add(
1058 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1061 SCMP_A0(SCMP_CMP_GT, last));
1065 /* Block everything between the first and last
1067 for (af = 1; af < af_max(); af++) {
1069 if (set_contains(c->address_families, INT_TO_PTR(af)))
1072 r = seccomp_rule_add(
1074 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1077 SCMP_A0(SCMP_CMP_EQ, af));
1086 /* If this is a blacklist, then generate one rule for
1087 * each address family that are then combined in OR
1090 SET_FOREACH(af, c->address_families, i) {
1092 r = seccomp_rule_add(
1094 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1097 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1103 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1107 r = seccomp_load(seccomp);
1110 seccomp_release(seccomp);
1116 static void do_idle_pipe_dance(int idle_pipe[4]) {
1120 safe_close(idle_pipe[1]);
1121 safe_close(idle_pipe[2]);
1123 if (idle_pipe[0] >= 0) {
1126 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1128 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1129 /* Signal systemd that we are bored and want to continue. */
1130 write(idle_pipe[3], "x", 1);
1132 /* Wait for systemd to react to the signal above. */
1133 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1136 safe_close(idle_pipe[0]);
1140 safe_close(idle_pipe[3]);
1143 static int build_environment(
1144 const ExecContext *c,
1146 usec_t watchdog_usec,
1148 const char *username,
1152 _cleanup_strv_free_ char **our_env = NULL;
1159 our_env = new0(char*, 10);
1164 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1166 our_env[n_env++] = x;
1168 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1170 our_env[n_env++] = x;
1173 if (watchdog_usec > 0) {
1174 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1176 our_env[n_env++] = x;
1178 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1180 our_env[n_env++] = x;
1184 x = strappend("HOME=", home);
1187 our_env[n_env++] = x;
1191 x = strappend("LOGNAME=", username);
1194 our_env[n_env++] = x;
1196 x = strappend("USER=", username);
1199 our_env[n_env++] = x;
1203 x = strappend("SHELL=", shell);
1206 our_env[n_env++] = x;
1209 if (is_terminal_input(c->std_input) ||
1210 c->std_output == EXEC_OUTPUT_TTY ||
1211 c->std_error == EXEC_OUTPUT_TTY ||
1214 x = strdup(default_term_for_tty(tty_path(c)));
1217 our_env[n_env++] = x;
1220 our_env[n_env++] = NULL;
1221 assert(n_env <= 10);
1229 static int exec_child(ExecCommand *command,
1230 const ExecContext *context,
1231 const ExecParameters *params,
1232 ExecRuntime *runtime,
1235 int *fds, unsigned n_fds,
1239 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1240 const char *username = NULL, *home = NULL, *shell = NULL;
1241 unsigned n_dont_close = 0;
1242 int dont_close[n_fds + 4];
1243 uid_t uid = (uid_t) -1;
1244 gid_t gid = (gid_t) -1;
1252 rename_process_from_path(command->path);
1254 /* We reset exactly these signals, since they are the
1255 * only ones we set to SIG_IGN in the main daemon. All
1256 * others we leave untouched because we set them to
1257 * SIG_DFL or a valid handler initially, both of which
1258 * will be demoted to SIG_DFL. */
1259 default_signals(SIGNALS_CRASH_HANDLER,
1260 SIGNALS_IGNORE, -1);
1262 if (context->ignore_sigpipe)
1263 ignore_signals(SIGPIPE, -1);
1265 err = reset_signal_mask();
1267 *error = EXIT_SIGNAL_MASK;
1271 if (params->idle_pipe)
1272 do_idle_pipe_dance(params->idle_pipe);
1274 /* Close sockets very early to make sure we don't
1275 * block init reexecution because it cannot bind its
1280 dont_close[n_dont_close++] = socket_fd;
1282 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1283 n_dont_close += n_fds;
1285 if (params->bus_endpoint_fd >= 0)
1286 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1288 if (runtime->netns_storage_socket[0] >= 0)
1289 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1290 if (runtime->netns_storage_socket[1] >= 0)
1291 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1294 err = close_all_fds(dont_close, n_dont_close);
1300 if (!context->same_pgrp)
1302 *error = EXIT_SETSID;
1306 exec_context_tty_reset(context);
1308 if (params->confirm_spawn) {
1311 err = ask_for_confirmation(&response, argv);
1312 if (err == -ETIMEDOUT)
1313 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1315 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1316 else if (response == 's') {
1317 write_confirm_message("Skipping execution.\n");
1318 *error = EXIT_CONFIRM;
1320 } else if (response == 'n') {
1321 write_confirm_message("Failing execution.\n");
1327 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1328 * must sure to drop O_NONBLOCK */
1330 fd_nonblock(socket_fd, false);
1332 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1334 *error = EXIT_STDIN;
1338 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1340 *error = EXIT_STDOUT;
1344 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1346 *error = EXIT_STDERR;
1350 if (params->cgroup_path) {
1351 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1353 *error = EXIT_CGROUP;
1358 if (context->oom_score_adjust_set) {
1361 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1364 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1365 *error = EXIT_OOM_ADJUST;
1370 if (context->nice_set)
1371 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1376 if (context->cpu_sched_set) {
1377 struct sched_param param = {
1378 .sched_priority = context->cpu_sched_priority,
1381 err = sched_setscheduler(0,
1382 context->cpu_sched_policy |
1383 (context->cpu_sched_reset_on_fork ?
1384 SCHED_RESET_ON_FORK : 0),
1387 *error = EXIT_SETSCHEDULER;
1392 if (context->cpuset)
1393 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1394 *error = EXIT_CPUAFFINITY;
1398 if (context->ioprio_set)
1399 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1400 *error = EXIT_IOPRIO;
1404 if (context->timer_slack_nsec != NSEC_INFINITY)
1405 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1406 *error = EXIT_TIMERSLACK;
1410 if (context->personality != 0xffffffffUL)
1411 if (personality(context->personality) < 0) {
1412 *error = EXIT_PERSONALITY;
1416 if (context->utmp_id)
1417 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1419 if (context->user) {
1420 username = context->user;
1421 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1427 if (is_terminal_input(context->std_input)) {
1428 err = chown_terminal(STDIN_FILENO, uid);
1430 *error = EXIT_STDIN;
1437 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1438 uid_t ep_uid = (uid == (uid_t) -1) ? 0 : uid;
1440 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1442 *error = EXIT_BUS_ENDPOINT;
1448 /* If delegation is enabled we'll pass ownership of the cgroup
1449 * (but only in systemd's own controller hierarchy!) to the
1450 * user of the new process. */
1451 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1452 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1454 *error = EXIT_CGROUP;
1459 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1461 *error = EXIT_CGROUP;
1466 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1469 STRV_FOREACH(rt, context->runtime_directory) {
1470 _cleanup_free_ char *p;
1472 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1474 *error = EXIT_RUNTIME_DIRECTORY;
1478 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1480 *error = EXIT_RUNTIME_DIRECTORY;
1486 if (params->apply_permissions) {
1487 err = enforce_groups(context, username, gid);
1489 *error = EXIT_GROUP;
1494 umask(context->umask);
1497 if (params->apply_permissions && context->pam_name && username) {
1498 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1506 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1507 err = setup_netns(runtime->netns_storage_socket);
1509 *error = EXIT_NETWORK;
1514 if (!strv_isempty(context->read_write_dirs) ||
1515 !strv_isempty(context->read_only_dirs) ||
1516 !strv_isempty(context->inaccessible_dirs) ||
1517 context->mount_flags != 0 ||
1518 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1519 params->bus_endpoint_path ||
1520 context->private_devices ||
1521 context->protect_system != PROTECT_SYSTEM_NO ||
1522 context->protect_home != PROTECT_HOME_NO) {
1524 char *tmp = NULL, *var = NULL;
1526 /* The runtime struct only contains the parent
1527 * of the private /tmp, which is
1528 * non-accessible to world users. Inside of it
1529 * there's a /tmp that is sticky, and that's
1530 * the one we want to use here. */
1532 if (context->private_tmp && runtime) {
1533 if (runtime->tmp_dir)
1534 tmp = strappenda(runtime->tmp_dir, "/tmp");
1535 if (runtime->var_tmp_dir)
1536 var = strappenda(runtime->var_tmp_dir, "/tmp");
1539 err = setup_namespace(
1540 context->read_write_dirs,
1541 context->read_only_dirs,
1542 context->inaccessible_dirs,
1545 params->bus_endpoint_path,
1546 context->private_devices,
1547 context->protect_home,
1548 context->protect_system,
1549 context->mount_flags);
1552 log_warning_unit(params->unit_id, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %s", strerror(-err));
1554 *error = EXIT_NAMESPACE;
1559 if (params->apply_chroot) {
1560 if (context->root_directory)
1561 if (chroot(context->root_directory) < 0) {
1562 *error = EXIT_CHROOT;
1566 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1567 *error = EXIT_CHDIR;
1571 _cleanup_free_ char *d = NULL;
1573 if (asprintf(&d, "%s/%s",
1574 context->root_directory ? context->root_directory : "",
1575 context->working_directory ? context->working_directory : "") < 0) {
1576 *error = EXIT_MEMORY;
1581 *error = EXIT_CHDIR;
1586 /* We repeat the fd closing here, to make sure that
1587 * nothing is leaked from the PAM modules. Note that
1588 * we are more aggressive this time since socket_fd
1589 * and the netns fds we don't need anymore. The custom
1590 * endpoint fd was needed to upload the policy and can
1591 * now be closed as well. */
1592 err = close_all_fds(fds, n_fds);
1594 err = shift_fds(fds, n_fds);
1596 err = flags_fds(fds, n_fds, context->non_blocking);
1602 if (params->apply_permissions) {
1604 for (i = 0; i < _RLIMIT_MAX; i++) {
1605 if (!context->rlimit[i])
1608 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1609 *error = EXIT_LIMITS;
1614 if (context->capability_bounding_set_drop) {
1615 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1617 *error = EXIT_CAPABILITIES;
1623 if (context->smack_process_label) {
1624 err = mac_smack_apply_pid(0, context->smack_process_label);
1626 *error = EXIT_SMACK_PROCESS_LABEL;
1632 if (context->user) {
1633 err = enforce_user(context, uid);
1640 /* PR_GET_SECUREBITS is not privileged, while
1641 * PR_SET_SECUREBITS is. So to suppress
1642 * potential EPERMs we'll try not to call
1643 * PR_SET_SECUREBITS unless necessary. */
1644 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1645 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1646 *error = EXIT_SECUREBITS;
1650 if (context->capabilities)
1651 if (cap_set_proc(context->capabilities) < 0) {
1652 *error = EXIT_CAPABILITIES;
1656 if (context->no_new_privileges)
1657 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1658 *error = EXIT_NO_NEW_PRIVILEGES;
1663 if (context->address_families_whitelist ||
1664 !set_isempty(context->address_families)) {
1665 err = apply_address_families(context);
1667 *error = EXIT_ADDRESS_FAMILIES;
1672 if (context->syscall_whitelist ||
1673 !set_isempty(context->syscall_filter) ||
1674 !set_isempty(context->syscall_archs)) {
1675 err = apply_seccomp(context);
1677 *error = EXIT_SECCOMP;
1684 if (mac_selinux_use()) {
1685 if (context->selinux_context) {
1686 err = setexeccon(context->selinux_context);
1687 if (err < 0 && !context->selinux_context_ignore) {
1688 *error = EXIT_SELINUX_CONTEXT;
1693 if (params->selinux_context_net && socket_fd >= 0) {
1694 _cleanup_free_ char *label = NULL;
1696 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1698 *error = EXIT_SELINUX_CONTEXT;
1702 err = setexeccon(label);
1704 *error = EXIT_SELINUX_CONTEXT;
1711 #ifdef HAVE_APPARMOR
1712 if (context->apparmor_profile && mac_apparmor_use()) {
1713 err = aa_change_onexec(context->apparmor_profile);
1714 if (err < 0 && !context->apparmor_profile_ignore) {
1715 *error = EXIT_APPARMOR_PROFILE;
1722 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1724 *error = EXIT_MEMORY;
1728 final_env = strv_env_merge(5,
1729 params->environment,
1731 context->environment,
1736 *error = EXIT_MEMORY;
1740 final_argv = replace_env_argv(argv, final_env);
1742 *error = EXIT_MEMORY;
1746 final_env = strv_env_clean(final_env);
1748 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1749 _cleanup_free_ char *line;
1751 line = exec_command_line(final_argv);
1754 log_struct_unit(LOG_DEBUG,
1756 "EXECUTABLE=%s", command->path,
1757 "MESSAGE=Executing: %s", line,
1762 execve(command->path, final_argv, final_env);
1767 int exec_spawn(ExecCommand *command,
1768 const ExecContext *context,
1769 const ExecParameters *params,
1770 ExecRuntime *runtime,
1773 _cleanup_strv_free_ char **files_env = NULL;
1774 int *fds = NULL; unsigned n_fds = 0;
1784 assert(params->fds || params->n_fds <= 0);
1786 if (context->std_input == EXEC_INPUT_SOCKET ||
1787 context->std_output == EXEC_OUTPUT_SOCKET ||
1788 context->std_error == EXEC_OUTPUT_SOCKET) {
1790 if (params->n_fds != 1)
1793 socket_fd = params->fds[0];
1797 n_fds = params->n_fds;
1800 err = exec_context_load_environment(context, params->unit_id, &files_env);
1802 log_struct_unit(LOG_ERR,
1804 "MESSAGE=Failed to load environment files: %s", strerror(-err),
1810 argv = params->argv ?: command->argv;
1812 line = exec_command_line(argv);
1816 log_struct_unit(LOG_DEBUG,
1818 "EXECUTABLE=%s", command->path,
1819 "MESSAGE=About to execute: %s", line,
1830 err = exec_child(command,
1841 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1842 "EXECUTABLE=%s", command->path,
1843 "MESSAGE=Failed at step %s spawning %s: %s",
1844 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1845 command->path, strerror(-err),
1854 log_struct_unit(LOG_DEBUG,
1856 "MESSAGE=Forked %s as "PID_FMT,
1860 /* We add the new process to the cgroup both in the child (so
1861 * that we can be sure that no user code is ever executed
1862 * outside of the cgroup) and in the parent (so that we can be
1863 * sure that when we kill the cgroup the process will be
1865 if (params->cgroup_path)
1866 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1868 exec_status_start(&command->exec_status, pid);
1874 void exec_context_init(ExecContext *c) {
1878 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1879 c->cpu_sched_policy = SCHED_OTHER;
1880 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1881 c->syslog_level_prefix = true;
1882 c->ignore_sigpipe = true;
1883 c->timer_slack_nsec = NSEC_INFINITY;
1884 c->personality = 0xffffffffUL;
1885 c->runtime_directory_mode = 0755;
1888 void exec_context_done(ExecContext *c) {
1893 strv_free(c->environment);
1894 c->environment = NULL;
1896 strv_free(c->environment_files);
1897 c->environment_files = NULL;
1899 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1901 c->rlimit[l] = NULL;
1904 free(c->working_directory);
1905 c->working_directory = NULL;
1906 free(c->root_directory);
1907 c->root_directory = NULL;
1912 free(c->syslog_identifier);
1913 c->syslog_identifier = NULL;
1921 strv_free(c->supplementary_groups);
1922 c->supplementary_groups = NULL;
1927 if (c->capabilities) {
1928 cap_free(c->capabilities);
1929 c->capabilities = NULL;
1932 strv_free(c->read_only_dirs);
1933 c->read_only_dirs = NULL;
1935 strv_free(c->read_write_dirs);
1936 c->read_write_dirs = NULL;
1938 strv_free(c->inaccessible_dirs);
1939 c->inaccessible_dirs = NULL;
1942 CPU_FREE(c->cpuset);
1947 free(c->selinux_context);
1948 c->selinux_context = NULL;
1950 free(c->apparmor_profile);
1951 c->apparmor_profile = NULL;
1953 set_free(c->syscall_filter);
1954 c->syscall_filter = NULL;
1956 set_free(c->syscall_archs);
1957 c->syscall_archs = NULL;
1959 set_free(c->address_families);
1960 c->address_families = NULL;
1962 strv_free(c->runtime_directory);
1963 c->runtime_directory = NULL;
1965 bus_endpoint_free(c->bus_endpoint);
1966 c->bus_endpoint = NULL;
1969 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1974 if (!runtime_prefix)
1977 STRV_FOREACH(i, c->runtime_directory) {
1978 _cleanup_free_ char *p;
1980 p = strjoin(runtime_prefix, "/", *i, NULL);
1984 /* We execute this synchronously, since we need to be
1985 * sure this is gone when we start the service
1987 rm_rf_dangerous(p, false, true, false);
1993 void exec_command_done(ExecCommand *c) {
2003 void exec_command_done_array(ExecCommand *c, unsigned n) {
2006 for (i = 0; i < n; i++)
2007 exec_command_done(c+i);
2010 void exec_command_free_list(ExecCommand *c) {
2014 LIST_REMOVE(command, c, i);
2015 exec_command_done(i);
2020 void exec_command_free_array(ExecCommand **c, unsigned n) {
2023 for (i = 0; i < n; i++) {
2024 exec_command_free_list(c[i]);
2029 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2030 char **i, **r = NULL;
2035 STRV_FOREACH(i, c->environment_files) {
2038 bool ignore = false;
2040 _cleanup_globfree_ glob_t pglob = {};
2050 if (!path_is_absolute(fn)) {
2058 /* Filename supports globbing, take all matching files */
2060 if (glob(fn, 0, NULL, &pglob) != 0) {
2065 return errno ? -errno : -EINVAL;
2067 count = pglob.gl_pathc;
2075 for (n = 0; n < count; n++) {
2076 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2084 /* Log invalid environment variables with filename */
2086 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2093 m = strv_env_merge(2, r, p);
2109 static bool tty_may_match_dev_console(const char *tty) {
2110 _cleanup_free_ char *active = NULL;
2113 if (startswith(tty, "/dev/"))
2116 /* trivial identity? */
2117 if (streq(tty, "console"))
2120 console = resolve_dev_console(&active);
2121 /* if we could not resolve, assume it may */
2125 /* "tty0" means the active VC, so it may be the same sometimes */
2126 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2129 bool exec_context_may_touch_console(ExecContext *ec) {
2130 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2131 is_terminal_input(ec->std_input) ||
2132 is_terminal_output(ec->std_output) ||
2133 is_terminal_output(ec->std_error)) &&
2134 tty_may_match_dev_console(tty_path(ec));
2137 static void strv_fprintf(FILE *f, char **l) {
2143 fprintf(f, " %s", *g);
2146 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2153 prefix = strempty(prefix);
2157 "%sWorkingDirectory: %s\n"
2158 "%sRootDirectory: %s\n"
2159 "%sNonBlocking: %s\n"
2160 "%sPrivateTmp: %s\n"
2161 "%sPrivateNetwork: %s\n"
2162 "%sPrivateDevices: %s\n"
2163 "%sProtectHome: %s\n"
2164 "%sProtectSystem: %s\n"
2165 "%sIgnoreSIGPIPE: %s\n",
2167 prefix, c->working_directory ? c->working_directory : "/",
2168 prefix, c->root_directory ? c->root_directory : "/",
2169 prefix, yes_no(c->non_blocking),
2170 prefix, yes_no(c->private_tmp),
2171 prefix, yes_no(c->private_network),
2172 prefix, yes_no(c->private_devices),
2173 prefix, protect_home_to_string(c->protect_home),
2174 prefix, protect_system_to_string(c->protect_system),
2175 prefix, yes_no(c->ignore_sigpipe));
2177 STRV_FOREACH(e, c->environment)
2178 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2180 STRV_FOREACH(e, c->environment_files)
2181 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2188 if (c->oom_score_adjust_set)
2190 "%sOOMScoreAdjust: %i\n",
2191 prefix, c->oom_score_adjust);
2193 for (i = 0; i < RLIM_NLIMITS; i++)
2195 fprintf(f, "%s%s: "RLIM_FMT"\n",
2196 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2198 if (c->ioprio_set) {
2199 _cleanup_free_ char *class_str = NULL;
2201 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2203 "%sIOSchedulingClass: %s\n"
2204 "%sIOPriority: %i\n",
2205 prefix, strna(class_str),
2206 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2209 if (c->cpu_sched_set) {
2210 _cleanup_free_ char *policy_str = NULL;
2212 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2214 "%sCPUSchedulingPolicy: %s\n"
2215 "%sCPUSchedulingPriority: %i\n"
2216 "%sCPUSchedulingResetOnFork: %s\n",
2217 prefix, strna(policy_str),
2218 prefix, c->cpu_sched_priority,
2219 prefix, yes_no(c->cpu_sched_reset_on_fork));
2223 fprintf(f, "%sCPUAffinity:", prefix);
2224 for (i = 0; i < c->cpuset_ncpus; i++)
2225 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2226 fprintf(f, " %u", i);
2230 if (c->timer_slack_nsec != NSEC_INFINITY)
2231 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2234 "%sStandardInput: %s\n"
2235 "%sStandardOutput: %s\n"
2236 "%sStandardError: %s\n",
2237 prefix, exec_input_to_string(c->std_input),
2238 prefix, exec_output_to_string(c->std_output),
2239 prefix, exec_output_to_string(c->std_error));
2245 "%sTTYVHangup: %s\n"
2246 "%sTTYVTDisallocate: %s\n",
2247 prefix, c->tty_path,
2248 prefix, yes_no(c->tty_reset),
2249 prefix, yes_no(c->tty_vhangup),
2250 prefix, yes_no(c->tty_vt_disallocate));
2252 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2253 c->std_output == EXEC_OUTPUT_KMSG ||
2254 c->std_output == EXEC_OUTPUT_JOURNAL ||
2255 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2256 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2257 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2258 c->std_error == EXEC_OUTPUT_SYSLOG ||
2259 c->std_error == EXEC_OUTPUT_KMSG ||
2260 c->std_error == EXEC_OUTPUT_JOURNAL ||
2261 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2262 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2263 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2265 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2267 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2268 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2271 "%sSyslogFacility: %s\n"
2272 "%sSyslogLevel: %s\n",
2273 prefix, strna(fac_str),
2274 prefix, strna(lvl_str));
2277 if (c->capabilities) {
2278 _cleanup_cap_free_charp_ char *t;
2280 t = cap_to_text(c->capabilities, NULL);
2282 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2286 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2288 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2289 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2290 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2291 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2292 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2293 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2295 if (c->capability_bounding_set_drop) {
2297 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2299 for (l = 0; l <= cap_last_cap(); l++)
2300 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2301 _cleanup_cap_free_charp_ char *t;
2305 fprintf(f, " %s", t);
2312 fprintf(f, "%sUser: %s\n", prefix, c->user);
2314 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2316 if (strv_length(c->supplementary_groups) > 0) {
2317 fprintf(f, "%sSupplementaryGroups:", prefix);
2318 strv_fprintf(f, c->supplementary_groups);
2323 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2325 if (strv_length(c->read_write_dirs) > 0) {
2326 fprintf(f, "%sReadWriteDirs:", prefix);
2327 strv_fprintf(f, c->read_write_dirs);
2331 if (strv_length(c->read_only_dirs) > 0) {
2332 fprintf(f, "%sReadOnlyDirs:", prefix);
2333 strv_fprintf(f, c->read_only_dirs);
2337 if (strv_length(c->inaccessible_dirs) > 0) {
2338 fprintf(f, "%sInaccessibleDirs:", prefix);
2339 strv_fprintf(f, c->inaccessible_dirs);
2345 "%sUtmpIdentifier: %s\n",
2346 prefix, c->utmp_id);
2348 if (c->selinux_context)
2350 "%sSELinuxContext: %s%s\n",
2351 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2353 if (c->personality != 0xffffffffUL)
2355 "%sPersonality: %s\n",
2356 prefix, strna(personality_to_string(c->personality)));
2358 if (c->syscall_filter) {
2366 "%sSystemCallFilter: ",
2369 if (!c->syscall_whitelist)
2373 SET_FOREACH(id, c->syscall_filter, j) {
2374 _cleanup_free_ char *name = NULL;
2381 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2382 fputs(strna(name), f);
2389 if (c->syscall_archs) {
2396 "%sSystemCallArchitectures:",
2400 SET_FOREACH(id, c->syscall_archs, j)
2401 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2406 if (c->syscall_errno != 0)
2408 "%sSystemCallErrorNumber: %s\n",
2409 prefix, strna(errno_to_name(c->syscall_errno)));
2411 if (c->apparmor_profile)
2413 "%sAppArmorProfile: %s%s\n",
2414 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2417 bool exec_context_maintains_privileges(ExecContext *c) {
2420 /* Returns true if the process forked off would run run under
2421 * an unchanged UID or as root. */
2426 if (streq(c->user, "root") || streq(c->user, "0"))
2432 void exec_status_start(ExecStatus *s, pid_t pid) {
2437 dual_timestamp_get(&s->start_timestamp);
2440 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2443 if (s->pid && s->pid != pid)
2447 dual_timestamp_get(&s->exit_timestamp);
2453 if (context->utmp_id)
2454 utmp_put_dead_process(context->utmp_id, pid, code, status);
2456 exec_context_tty_reset(context);
2460 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2461 char buf[FORMAT_TIMESTAMP_MAX];
2469 prefix = strempty(prefix);
2472 "%sPID: "PID_FMT"\n",
2475 if (s->start_timestamp.realtime > 0)
2477 "%sStart Timestamp: %s\n",
2478 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2480 if (s->exit_timestamp.realtime > 0)
2482 "%sExit Timestamp: %s\n"
2484 "%sExit Status: %i\n",
2485 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2486 prefix, sigchld_code_to_string(s->code),
2490 char *exec_command_line(char **argv) {
2498 STRV_FOREACH(a, argv)
2501 if (!(n = new(char, k)))
2505 STRV_FOREACH(a, argv) {
2512 if (strpbrk(*a, WHITESPACE)) {
2523 /* FIXME: this doesn't really handle arguments that have
2524 * spaces and ticks in them */
2529 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2530 _cleanup_free_ char *cmd = NULL;
2531 const char *prefix2;
2536 prefix = strempty(prefix);
2537 prefix2 = strappenda(prefix, "\t");
2539 cmd = exec_command_line(c->argv);
2541 "%sCommand Line: %s\n",
2542 prefix, cmd ? cmd : strerror(ENOMEM));
2544 exec_status_dump(&c->exec_status, f, prefix2);
2547 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2550 prefix = strempty(prefix);
2552 LIST_FOREACH(command, c, c)
2553 exec_command_dump(c, f, prefix);
2556 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2563 /* It's kind of important, that we keep the order here */
2564 LIST_FIND_TAIL(command, *l, end);
2565 LIST_INSERT_AFTER(command, *l, end, e);
2570 int exec_command_set(ExecCommand *c, const char *path, ...) {
2578 l = strv_new_ap(path, ap);
2599 int exec_command_append(ExecCommand *c, const char *path, ...) {
2600 _cleanup_strv_free_ char **l = NULL;
2608 l = strv_new_ap(path, ap);
2614 r = strv_extend_strv(&c->argv, l);
2622 static int exec_runtime_allocate(ExecRuntime **rt) {
2627 *rt = new0(ExecRuntime, 1);
2632 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2637 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2647 if (!c->private_network && !c->private_tmp)
2650 r = exec_runtime_allocate(rt);
2654 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2655 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2659 if (c->private_tmp && !(*rt)->tmp_dir) {
2660 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2668 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2670 assert(r->n_ref > 0);
2676 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2681 assert(r->n_ref > 0);
2684 if (r->n_ref <= 0) {
2686 free(r->var_tmp_dir);
2687 safe_close_pair(r->netns_storage_socket);
2694 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2703 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2705 if (rt->var_tmp_dir)
2706 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2708 if (rt->netns_storage_socket[0] >= 0) {
2711 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2715 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2718 if (rt->netns_storage_socket[1] >= 0) {
2721 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2725 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2731 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2738 if (streq(key, "tmp-dir")) {
2741 r = exec_runtime_allocate(rt);
2745 copy = strdup(value);
2749 free((*rt)->tmp_dir);
2750 (*rt)->tmp_dir = copy;
2752 } else if (streq(key, "var-tmp-dir")) {
2755 r = exec_runtime_allocate(rt);
2759 copy = strdup(value);
2763 free((*rt)->var_tmp_dir);
2764 (*rt)->var_tmp_dir = copy;
2766 } else if (streq(key, "netns-socket-0")) {
2769 r = exec_runtime_allocate(rt);
2773 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2774 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2776 safe_close((*rt)->netns_storage_socket[0]);
2777 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2779 } else if (streq(key, "netns-socket-1")) {
2782 r = exec_runtime_allocate(rt);
2786 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2787 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2789 safe_close((*rt)->netns_storage_socket[1]);
2790 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2798 static void *remove_tmpdir_thread(void *p) {
2799 _cleanup_free_ char *path = p;
2801 rm_rf_dangerous(path, false, true, false);
2805 void exec_runtime_destroy(ExecRuntime *rt) {
2811 /* If there are multiple users of this, let's leave the stuff around */
2816 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2818 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2820 log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2827 if (rt->var_tmp_dir) {
2828 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2830 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2832 log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2833 free(rt->var_tmp_dir);
2836 rt->var_tmp_dir = NULL;
2839 safe_close_pair(rt->netns_storage_socket);
2842 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2843 [EXEC_INPUT_NULL] = "null",
2844 [EXEC_INPUT_TTY] = "tty",
2845 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2846 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2847 [EXEC_INPUT_SOCKET] = "socket"
2850 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2852 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2853 [EXEC_OUTPUT_INHERIT] = "inherit",
2854 [EXEC_OUTPUT_NULL] = "null",
2855 [EXEC_OUTPUT_TTY] = "tty",
2856 [EXEC_OUTPUT_SYSLOG] = "syslog",
2857 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2858 [EXEC_OUTPUT_KMSG] = "kmsg",
2859 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2860 [EXEC_OUTPUT_JOURNAL] = "journal",
2861 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2862 [EXEC_OUTPUT_SOCKET] = "socket"
2865 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);