1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
74 #include "utmp-wtmp.h"
76 #include "path-util.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
85 #include "apparmor-util.h"
86 #include "smack-util.h"
87 #include "bus-kernel.h"
91 #include "seccomp-util.h"
94 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
95 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
97 /* This assumes there is a 'tty' group */
100 #define SNDBUF_SIZE (8*1024*1024)
102 static int shift_fds(int fds[], unsigned n_fds) {
103 int start, restart_from;
108 /* Modifies the fds array! (sorts it) */
118 for (i = start; i < (int) n_fds; i++) {
121 /* Already at right index? */
125 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
131 /* Hmm, the fd we wanted isn't free? Then
132 * let's remember that and try again from here*/
133 if (nfd != i+3 && restart_from < 0)
137 if (restart_from < 0)
140 start = restart_from;
146 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
155 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
157 for (i = 0; i < n_fds; i++) {
159 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
162 /* We unconditionally drop FD_CLOEXEC from the fds,
163 * since after all we want to pass these fds to our
166 if ((r = fd_cloexec(fds[i], false)) < 0)
173 _pure_ static const char *tty_path(const ExecContext *context) {
176 if (context->tty_path)
177 return context->tty_path;
179 return "/dev/console";
182 static void exec_context_tty_reset(const ExecContext *context) {
185 if (context->tty_vhangup)
186 terminal_vhangup(tty_path(context));
188 if (context->tty_reset)
189 reset_terminal(tty_path(context));
191 if (context->tty_vt_disallocate && context->tty_path)
192 vt_disallocate(context->tty_path);
195 static bool is_terminal_output(ExecOutput o) {
197 o == EXEC_OUTPUT_TTY ||
198 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
200 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
203 static int open_null_as(int flags, int nfd) {
208 fd = open("/dev/null", flags|O_NOCTTY);
213 r = dup2(fd, nfd) < 0 ? -errno : nfd;
221 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
223 union sockaddr_union sa = {
224 .un.sun_family = AF_UNIX,
225 .un.sun_path = "/run/systemd/journal/stdout",
229 assert(output < _EXEC_OUTPUT_MAX);
233 fd = socket(AF_UNIX, SOCK_STREAM, 0);
237 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
243 if (shutdown(fd, SHUT_RD) < 0) {
248 fd_inc_sndbuf(fd, SNDBUF_SIZE);
258 context->syslog_identifier ? context->syslog_identifier : ident,
260 context->syslog_priority,
261 !!context->syslog_level_prefix,
262 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
263 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
264 is_terminal_output(output));
267 r = dup2(fd, nfd) < 0 ? -errno : nfd;
274 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
280 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
284 r = dup2(fd, nfd) < 0 ? -errno : nfd;
292 static bool is_terminal_input(ExecInput i) {
294 i == EXEC_INPUT_TTY ||
295 i == EXEC_INPUT_TTY_FORCE ||
296 i == EXEC_INPUT_TTY_FAIL;
299 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
301 if (is_terminal_input(std_input) && !apply_tty_stdin)
302 return EXEC_INPUT_NULL;
304 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
305 return EXEC_INPUT_NULL;
310 static int fixup_output(ExecOutput std_output, int socket_fd) {
312 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
313 return EXEC_OUTPUT_INHERIT;
318 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
323 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
327 case EXEC_INPUT_NULL:
328 return open_null_as(O_RDONLY, STDIN_FILENO);
331 case EXEC_INPUT_TTY_FORCE:
332 case EXEC_INPUT_TTY_FAIL: {
335 fd = acquire_terminal(tty_path(context),
336 i == EXEC_INPUT_TTY_FAIL,
337 i == EXEC_INPUT_TTY_FORCE,
343 if (fd != STDIN_FILENO) {
344 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
352 case EXEC_INPUT_SOCKET:
353 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
356 assert_not_reached("Unknown input type");
360 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
368 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
369 o = fixup_output(context->std_output, socket_fd);
371 if (fileno == STDERR_FILENO) {
373 e = fixup_output(context->std_error, socket_fd);
375 /* This expects the input and output are already set up */
377 /* Don't change the stderr file descriptor if we inherit all
378 * the way and are not on a tty */
379 if (e == EXEC_OUTPUT_INHERIT &&
380 o == EXEC_OUTPUT_INHERIT &&
381 i == EXEC_INPUT_NULL &&
382 !is_terminal_input(context->std_input) &&
386 /* Duplicate from stdout if possible */
387 if (e == o || e == EXEC_OUTPUT_INHERIT)
388 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
392 } else if (o == EXEC_OUTPUT_INHERIT) {
393 /* If input got downgraded, inherit the original value */
394 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
395 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
397 /* If the input is connected to anything that's not a /dev/null, inherit that... */
398 if (i != EXEC_INPUT_NULL)
399 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
405 /* We need to open /dev/null here anew, to get the right access mode. */
406 return open_null_as(O_WRONLY, fileno);
411 case EXEC_OUTPUT_NULL:
412 return open_null_as(O_WRONLY, fileno);
414 case EXEC_OUTPUT_TTY:
415 if (is_terminal_input(i))
416 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
418 /* We don't reset the terminal if this is just about output */
419 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
421 case EXEC_OUTPUT_SYSLOG:
422 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
423 case EXEC_OUTPUT_KMSG:
424 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
425 case EXEC_OUTPUT_JOURNAL:
426 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
427 r = connect_logger_as(context, o, ident, unit_id, fileno);
429 log_unit_struct(unit_id,
431 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
432 fileno == STDOUT_FILENO ? "stdout" : "stderr",
433 unit_id, strerror(-r)),
436 r = open_null_as(O_WRONLY, fileno);
440 case EXEC_OUTPUT_SOCKET:
441 assert(socket_fd >= 0);
442 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
445 assert_not_reached("Unknown error type");
449 static int chown_terminal(int fd, uid_t uid) {
454 /* This might fail. What matters are the results. */
455 (void) fchown(fd, uid, -1);
456 (void) fchmod(fd, TTY_MODE);
458 if (fstat(fd, &st) < 0)
461 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
467 static int setup_confirm_stdio(int *_saved_stdin,
468 int *_saved_stdout) {
469 int fd = -1, saved_stdin, saved_stdout = -1, r;
471 assert(_saved_stdin);
472 assert(_saved_stdout);
474 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
478 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
479 if (saved_stdout < 0) {
484 fd = acquire_terminal(
489 DEFAULT_CONFIRM_USEC);
495 r = chown_terminal(fd, getuid());
499 if (dup2(fd, STDIN_FILENO) < 0) {
504 if (dup2(fd, STDOUT_FILENO) < 0) {
512 *_saved_stdin = saved_stdin;
513 *_saved_stdout = saved_stdout;
518 safe_close(saved_stdout);
519 safe_close(saved_stdin);
525 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
526 _cleanup_close_ int fd = -1;
531 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
535 va_start(ap, format);
536 vdprintf(fd, format, ap);
542 static int restore_confirm_stdio(int *saved_stdin,
548 assert(saved_stdout);
552 if (*saved_stdin >= 0)
553 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
556 if (*saved_stdout >= 0)
557 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
560 safe_close(*saved_stdin);
561 safe_close(*saved_stdout);
566 static int ask_for_confirmation(char *response, char **argv) {
567 int saved_stdout = -1, saved_stdin = -1, r;
568 _cleanup_free_ char *line = NULL;
570 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
574 line = exec_command_line(argv);
578 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
580 restore_confirm_stdio(&saved_stdin, &saved_stdout);
585 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
586 bool keep_groups = false;
591 /* Lookup and set GID and supplementary group list. Here too
592 * we avoid NSS lookups for gid=0. */
594 if (context->group || username) {
596 if (context->group) {
597 const char *g = context->group;
599 if ((r = get_group_creds(&g, &gid)) < 0)
603 /* First step, initialize groups from /etc/groups */
604 if (username && gid != 0) {
605 if (initgroups(username, gid) < 0)
611 /* Second step, set our gids */
612 if (setresgid(gid, gid, gid) < 0)
616 if (context->supplementary_groups) {
621 /* Final step, initialize any manually set supplementary groups */
622 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
624 if (!(gids = new(gid_t, ngroups_max)))
628 if ((k = getgroups(ngroups_max, gids)) < 0) {
635 STRV_FOREACH(i, context->supplementary_groups) {
638 if (k >= ngroups_max) {
644 r = get_group_creds(&g, gids+k);
653 if (setgroups(k, gids) < 0) {
664 static int enforce_user(const ExecContext *context, uid_t uid) {
667 /* Sets (but doesn't lookup) the uid and make sure we keep the
668 * capabilities while doing so. */
670 if (context->capabilities) {
671 _cleanup_cap_free_ cap_t d = NULL;
672 static const cap_value_t bits[] = {
673 CAP_SETUID, /* Necessary so that we can run setresuid() below */
674 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
677 /* First step: If we need to keep capabilities but
678 * drop privileges we need to make sure we keep our
679 * caps, while we drop privileges. */
681 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
683 if (prctl(PR_GET_SECUREBITS) != sb)
684 if (prctl(PR_SET_SECUREBITS, sb) < 0)
688 /* Second step: set the capabilities. This will reduce
689 * the capabilities to the minimum we need. */
691 d = cap_dup(context->capabilities);
695 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
696 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
699 if (cap_set_proc(d) < 0)
703 /* Third step: actually set the uids */
704 if (setresuid(uid, uid, uid) < 0)
707 /* At this point we should have all necessary capabilities but
708 are otherwise a normal user. However, the caps might got
709 corrupted due to the setresuid() so we need clean them up
710 later. This is done outside of this call. */
717 static int null_conv(
719 const struct pam_message **msg,
720 struct pam_response **resp,
723 /* We don't support conversations */
728 static int setup_pam(
734 int fds[], unsigned n_fds) {
736 static const struct pam_conv conv = {
741 pam_handle_t *handle = NULL;
743 int pam_code = PAM_SUCCESS;
746 bool close_session = false;
747 pid_t pam_pid = 0, parent_pid;
754 /* We set up PAM in the parent process, then fork. The child
755 * will then stay around until killed via PR_GET_PDEATHSIG or
756 * systemd via the cgroup logic. It will then remove the PAM
757 * session again. The parent process will exec() the actual
758 * daemon. We do things this way to ensure that the main PID
759 * of the daemon is the one we initially fork()ed. */
761 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
764 pam_code = pam_start(name, user, &conv, &handle);
765 if (pam_code != PAM_SUCCESS) {
771 pam_code = pam_set_item(handle, PAM_TTY, tty);
772 if (pam_code != PAM_SUCCESS)
776 pam_code = pam_acct_mgmt(handle, flags);
777 if (pam_code != PAM_SUCCESS)
780 pam_code = pam_open_session(handle, flags);
781 if (pam_code != PAM_SUCCESS)
784 close_session = true;
786 e = pam_getenvlist(handle);
788 pam_code = PAM_BUF_ERR;
792 /* Block SIGTERM, so that we know that it won't get lost in
794 if (sigemptyset(&ss) < 0 ||
795 sigaddset(&ss, SIGTERM) < 0 ||
796 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
799 parent_pid = getpid();
809 /* The child's job is to reset the PAM session on
812 /* This string must fit in 10 chars (i.e. the length
813 * of "/sbin/init"), to look pretty in /bin/ps */
814 rename_process("(sd-pam)");
816 /* Make sure we don't keep open the passed fds in this
817 child. We assume that otherwise only those fds are
818 open here that have been opened by PAM. */
819 close_many(fds, n_fds);
821 /* Drop privileges - we don't need any to pam_close_session
822 * and this will make PR_SET_PDEATHSIG work in most cases.
823 * If this fails, ignore the error - but expect sd-pam threads
824 * to fail to exit normally */
825 if (setresuid(uid, uid, uid) < 0)
826 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
828 /* Wait until our parent died. This will only work if
829 * the above setresuid() succeeds, otherwise the kernel
830 * will not allow unprivileged parents kill their privileged
831 * children this way. We rely on the control groups kill logic
832 * to do the rest for us. */
833 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
836 /* Check if our parent process might already have
838 if (getppid() == parent_pid) {
840 if (sigwait(&ss, &sig) < 0) {
847 assert(sig == SIGTERM);
852 /* If our parent died we'll end the session */
853 if (getppid() != parent_pid) {
854 pam_code = pam_close_session(handle, flags);
855 if (pam_code != PAM_SUCCESS)
862 pam_end(handle, pam_code | flags);
866 /* If the child was forked off successfully it will do all the
867 * cleanups, so forget about the handle here. */
870 /* Unblock SIGTERM again in the parent */
871 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
874 /* We close the log explicitly here, since the PAM modules
875 * might have opened it, but we don't want this fd around. */
884 if (pam_code != PAM_SUCCESS) {
885 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
886 err = -EPERM; /* PAM errors do not map to errno */
888 log_error_errno(errno, "PAM failed: %m");
894 pam_code = pam_close_session(handle, flags);
896 pam_end(handle, pam_code | flags);
904 kill(pam_pid, SIGTERM);
905 kill(pam_pid, SIGCONT);
912 static void rename_process_from_path(const char *path) {
913 char process_name[11];
917 /* This resulting string must fit in 10 chars (i.e. the length
918 * of "/sbin/init") to look pretty in /bin/ps */
922 rename_process("(...)");
928 /* The end of the process name is usually more
929 * interesting, since the first bit might just be
935 process_name[0] = '(';
936 memcpy(process_name+1, p, l);
937 process_name[1+l] = ')';
938 process_name[1+l+1] = 0;
940 rename_process(process_name);
945 static int apply_seccomp(const ExecContext *c) {
946 uint32_t negative_action, action;
947 scmp_filter_ctx *seccomp;
954 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
956 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
960 if (c->syscall_archs) {
962 SET_FOREACH(id, c->syscall_archs, i) {
963 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
971 r = seccomp_add_secondary_archs(seccomp);
976 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
977 SET_FOREACH(id, c->syscall_filter, i) {
978 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
983 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
987 r = seccomp_load(seccomp);
990 seccomp_release(seccomp);
994 static int apply_address_families(const ExecContext *c) {
995 scmp_filter_ctx *seccomp;
1001 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1005 r = seccomp_add_secondary_archs(seccomp);
1009 if (c->address_families_whitelist) {
1010 int af, first = 0, last = 0;
1013 /* If this is a whitelist, we first block the address
1014 * families that are out of range and then everything
1015 * that is not in the set. First, we find the lowest
1016 * and highest address family in the set. */
1018 SET_FOREACH(afp, c->address_families, i) {
1019 af = PTR_TO_INT(afp);
1021 if (af <= 0 || af >= af_max())
1024 if (first == 0 || af < first)
1027 if (last == 0 || af > last)
1031 assert((first == 0) == (last == 0));
1035 /* No entries in the valid range, block everything */
1036 r = seccomp_rule_add(
1038 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1046 /* Block everything below the first entry */
1047 r = seccomp_rule_add(
1049 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1052 SCMP_A0(SCMP_CMP_LT, first));
1056 /* Block everything above the last entry */
1057 r = seccomp_rule_add(
1059 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1062 SCMP_A0(SCMP_CMP_GT, last));
1066 /* Block everything between the first and last
1068 for (af = 1; af < af_max(); af++) {
1070 if (set_contains(c->address_families, INT_TO_PTR(af)))
1073 r = seccomp_rule_add(
1075 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1078 SCMP_A0(SCMP_CMP_EQ, af));
1087 /* If this is a blacklist, then generate one rule for
1088 * each address family that are then combined in OR
1091 SET_FOREACH(af, c->address_families, i) {
1093 r = seccomp_rule_add(
1095 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1098 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1104 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1108 r = seccomp_load(seccomp);
1111 seccomp_release(seccomp);
1117 static void do_idle_pipe_dance(int idle_pipe[4]) {
1121 safe_close(idle_pipe[1]);
1122 safe_close(idle_pipe[2]);
1124 if (idle_pipe[0] >= 0) {
1127 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1129 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1130 /* Signal systemd that we are bored and want to continue. */
1131 write(idle_pipe[3], "x", 1);
1133 /* Wait for systemd to react to the signal above. */
1134 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1137 safe_close(idle_pipe[0]);
1141 safe_close(idle_pipe[3]);
1144 static int build_environment(
1145 const ExecContext *c,
1147 usec_t watchdog_usec,
1149 const char *username,
1153 _cleanup_strv_free_ char **our_env = NULL;
1160 our_env = new0(char*, 10);
1165 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1167 our_env[n_env++] = x;
1169 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1171 our_env[n_env++] = x;
1174 if (watchdog_usec > 0) {
1175 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1177 our_env[n_env++] = x;
1179 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1181 our_env[n_env++] = x;
1185 x = strappend("HOME=", home);
1188 our_env[n_env++] = x;
1192 x = strappend("LOGNAME=", username);
1195 our_env[n_env++] = x;
1197 x = strappend("USER=", username);
1200 our_env[n_env++] = x;
1204 x = strappend("SHELL=", shell);
1207 our_env[n_env++] = x;
1210 if (is_terminal_input(c->std_input) ||
1211 c->std_output == EXEC_OUTPUT_TTY ||
1212 c->std_error == EXEC_OUTPUT_TTY ||
1215 x = strdup(default_term_for_tty(tty_path(c)));
1218 our_env[n_env++] = x;
1221 our_env[n_env++] = NULL;
1222 assert(n_env <= 10);
1230 static int exec_child(ExecCommand *command,
1231 const ExecContext *context,
1232 const ExecParameters *params,
1233 ExecRuntime *runtime,
1236 int *fds, unsigned n_fds,
1240 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1241 const char *username = NULL, *home = NULL, *shell = NULL;
1242 unsigned n_dont_close = 0;
1243 int dont_close[n_fds + 4];
1244 uid_t uid = UID_INVALID;
1245 gid_t gid = GID_INVALID;
1253 rename_process_from_path(command->path);
1255 /* We reset exactly these signals, since they are the
1256 * only ones we set to SIG_IGN in the main daemon. All
1257 * others we leave untouched because we set them to
1258 * SIG_DFL or a valid handler initially, both of which
1259 * will be demoted to SIG_DFL. */
1260 default_signals(SIGNALS_CRASH_HANDLER,
1261 SIGNALS_IGNORE, -1);
1263 if (context->ignore_sigpipe)
1264 ignore_signals(SIGPIPE, -1);
1266 err = reset_signal_mask();
1268 *error = EXIT_SIGNAL_MASK;
1272 if (params->idle_pipe)
1273 do_idle_pipe_dance(params->idle_pipe);
1275 /* Close sockets very early to make sure we don't
1276 * block init reexecution because it cannot bind its
1281 dont_close[n_dont_close++] = socket_fd;
1283 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1284 n_dont_close += n_fds;
1286 if (params->bus_endpoint_fd >= 0)
1287 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1289 if (runtime->netns_storage_socket[0] >= 0)
1290 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1291 if (runtime->netns_storage_socket[1] >= 0)
1292 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1295 err = close_all_fds(dont_close, n_dont_close);
1301 if (!context->same_pgrp)
1303 *error = EXIT_SETSID;
1307 exec_context_tty_reset(context);
1309 if (params->confirm_spawn) {
1312 err = ask_for_confirmation(&response, argv);
1313 if (err == -ETIMEDOUT)
1314 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1316 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1317 else if (response == 's') {
1318 write_confirm_message("Skipping execution.\n");
1319 *error = EXIT_CONFIRM;
1321 } else if (response == 'n') {
1322 write_confirm_message("Failing execution.\n");
1328 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1329 * must sure to drop O_NONBLOCK */
1331 fd_nonblock(socket_fd, false);
1333 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1335 *error = EXIT_STDIN;
1339 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1341 *error = EXIT_STDOUT;
1345 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1347 *error = EXIT_STDERR;
1351 if (params->cgroup_path) {
1352 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1354 *error = EXIT_CGROUP;
1359 if (context->oom_score_adjust_set) {
1362 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1365 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1366 *error = EXIT_OOM_ADJUST;
1371 if (context->nice_set)
1372 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1377 if (context->cpu_sched_set) {
1378 struct sched_param param = {
1379 .sched_priority = context->cpu_sched_priority,
1382 err = sched_setscheduler(0,
1383 context->cpu_sched_policy |
1384 (context->cpu_sched_reset_on_fork ?
1385 SCHED_RESET_ON_FORK : 0),
1388 *error = EXIT_SETSCHEDULER;
1393 if (context->cpuset)
1394 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1395 *error = EXIT_CPUAFFINITY;
1399 if (context->ioprio_set)
1400 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1401 *error = EXIT_IOPRIO;
1405 if (context->timer_slack_nsec != NSEC_INFINITY)
1406 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1407 *error = EXIT_TIMERSLACK;
1411 if (context->personality != 0xffffffffUL)
1412 if (personality(context->personality) < 0) {
1413 *error = EXIT_PERSONALITY;
1417 if (context->utmp_id)
1418 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1420 if (context->user) {
1421 username = context->user;
1422 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1428 if (is_terminal_input(context->std_input)) {
1429 err = chown_terminal(STDIN_FILENO, uid);
1431 *error = EXIT_STDIN;
1438 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1439 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1441 err = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1443 *error = EXIT_BUS_ENDPOINT;
1449 /* If delegation is enabled we'll pass ownership of the cgroup
1450 * (but only in systemd's own controller hierarchy!) to the
1451 * user of the new process. */
1452 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1453 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1455 *error = EXIT_CGROUP;
1460 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1462 *error = EXIT_CGROUP;
1467 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1470 STRV_FOREACH(rt, context->runtime_directory) {
1471 _cleanup_free_ char *p;
1473 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1475 *error = EXIT_RUNTIME_DIRECTORY;
1479 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1481 *error = EXIT_RUNTIME_DIRECTORY;
1487 if (params->apply_permissions) {
1488 err = enforce_groups(context, username, gid);
1490 *error = EXIT_GROUP;
1495 umask(context->umask);
1498 if (params->apply_permissions && context->pam_name && username) {
1499 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1507 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1508 err = setup_netns(runtime->netns_storage_socket);
1510 *error = EXIT_NETWORK;
1515 if (!strv_isempty(context->read_write_dirs) ||
1516 !strv_isempty(context->read_only_dirs) ||
1517 !strv_isempty(context->inaccessible_dirs) ||
1518 context->mount_flags != 0 ||
1519 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1520 params->bus_endpoint_path ||
1521 context->private_devices ||
1522 context->protect_system != PROTECT_SYSTEM_NO ||
1523 context->protect_home != PROTECT_HOME_NO) {
1525 char *tmp = NULL, *var = NULL;
1527 /* The runtime struct only contains the parent
1528 * of the private /tmp, which is
1529 * non-accessible to world users. Inside of it
1530 * there's a /tmp that is sticky, and that's
1531 * the one we want to use here. */
1533 if (context->private_tmp && runtime) {
1534 if (runtime->tmp_dir)
1535 tmp = strappenda(runtime->tmp_dir, "/tmp");
1536 if (runtime->var_tmp_dir)
1537 var = strappenda(runtime->var_tmp_dir, "/tmp");
1540 err = setup_namespace(
1541 context->read_write_dirs,
1542 context->read_only_dirs,
1543 context->inaccessible_dirs,
1546 params->bus_endpoint_path,
1547 context->private_devices,
1548 context->protect_home,
1549 context->protect_system,
1550 context->mount_flags);
1553 log_unit_warning_errno(params->unit_id, err, "Failed to set up file system namespace due to lack of privileges. Execution sandbox will not be in effect: %m");
1555 *error = EXIT_NAMESPACE;
1560 if (params->apply_chroot) {
1561 if (context->root_directory)
1562 if (chroot(context->root_directory) < 0) {
1563 *error = EXIT_CHROOT;
1567 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1568 *error = EXIT_CHDIR;
1572 _cleanup_free_ char *d = NULL;
1574 if (asprintf(&d, "%s/%s",
1575 context->root_directory ? context->root_directory : "",
1576 context->working_directory ? context->working_directory : "") < 0) {
1577 *error = EXIT_MEMORY;
1582 *error = EXIT_CHDIR;
1587 /* We repeat the fd closing here, to make sure that
1588 * nothing is leaked from the PAM modules. Note that
1589 * we are more aggressive this time since socket_fd
1590 * and the netns fds we don't need anymore. The custom
1591 * endpoint fd was needed to upload the policy and can
1592 * now be closed as well. */
1593 err = close_all_fds(fds, n_fds);
1595 err = shift_fds(fds, n_fds);
1597 err = flags_fds(fds, n_fds, context->non_blocking);
1603 if (params->apply_permissions) {
1605 for (i = 0; i < _RLIMIT_MAX; i++) {
1606 if (!context->rlimit[i])
1609 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1610 *error = EXIT_LIMITS;
1615 if (context->capability_bounding_set_drop) {
1616 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1618 *error = EXIT_CAPABILITIES;
1624 if (context->smack_process_label) {
1625 err = mac_smack_apply_pid(0, context->smack_process_label);
1627 *error = EXIT_SMACK_PROCESS_LABEL;
1633 if (context->user) {
1634 err = enforce_user(context, uid);
1641 /* PR_GET_SECUREBITS is not privileged, while
1642 * PR_SET_SECUREBITS is. So to suppress
1643 * potential EPERMs we'll try not to call
1644 * PR_SET_SECUREBITS unless necessary. */
1645 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1646 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1647 *error = EXIT_SECUREBITS;
1651 if (context->capabilities)
1652 if (cap_set_proc(context->capabilities) < 0) {
1653 *error = EXIT_CAPABILITIES;
1657 if (context->no_new_privileges)
1658 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1659 *error = EXIT_NO_NEW_PRIVILEGES;
1664 if (context->address_families_whitelist ||
1665 !set_isempty(context->address_families)) {
1666 err = apply_address_families(context);
1668 *error = EXIT_ADDRESS_FAMILIES;
1673 if (context->syscall_whitelist ||
1674 !set_isempty(context->syscall_filter) ||
1675 !set_isempty(context->syscall_archs)) {
1676 err = apply_seccomp(context);
1678 *error = EXIT_SECCOMP;
1685 if (mac_selinux_use()) {
1686 if (context->selinux_context) {
1687 err = setexeccon(context->selinux_context);
1688 if (err < 0 && !context->selinux_context_ignore) {
1689 *error = EXIT_SELINUX_CONTEXT;
1694 if (params->selinux_context_net && socket_fd >= 0) {
1695 _cleanup_free_ char *label = NULL;
1697 err = mac_selinux_get_child_mls_label(socket_fd, command->path, &label);
1699 *error = EXIT_SELINUX_CONTEXT;
1703 err = setexeccon(label);
1705 *error = EXIT_SELINUX_CONTEXT;
1712 #ifdef HAVE_APPARMOR
1713 if (context->apparmor_profile && mac_apparmor_use()) {
1714 err = aa_change_onexec(context->apparmor_profile);
1715 if (err < 0 && !context->apparmor_profile_ignore) {
1716 *error = EXIT_APPARMOR_PROFILE;
1723 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1725 *error = EXIT_MEMORY;
1729 final_env = strv_env_merge(5,
1730 params->environment,
1732 context->environment,
1737 *error = EXIT_MEMORY;
1741 final_argv = replace_env_argv(argv, final_env);
1743 *error = EXIT_MEMORY;
1747 final_env = strv_env_clean(final_env);
1749 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1750 _cleanup_free_ char *line;
1752 line = exec_command_line(final_argv);
1755 log_unit_struct(params->unit_id,
1757 "EXECUTABLE=%s", command->path,
1758 LOG_MESSAGE("Executing: %s", line),
1763 execve(command->path, final_argv, final_env);
1768 int exec_spawn(ExecCommand *command,
1769 const ExecContext *context,
1770 const ExecParameters *params,
1771 ExecRuntime *runtime,
1774 _cleanup_strv_free_ char **files_env = NULL;
1775 int *fds = NULL; unsigned n_fds = 0;
1785 assert(params->fds || params->n_fds <= 0);
1787 if (context->std_input == EXEC_INPUT_SOCKET ||
1788 context->std_output == EXEC_OUTPUT_SOCKET ||
1789 context->std_error == EXEC_OUTPUT_SOCKET) {
1791 if (params->n_fds != 1)
1794 socket_fd = params->fds[0];
1798 n_fds = params->n_fds;
1801 err = exec_context_load_environment(context, params->unit_id, &files_env);
1803 log_unit_struct(params->unit_id,
1805 LOG_MESSAGE("Failed to load environment files: %s", strerror(-err)),
1811 argv = params->argv ?: command->argv;
1813 line = exec_command_line(argv);
1817 log_unit_struct(params->unit_id,
1819 "EXECUTABLE=%s", command->path,
1820 LOG_MESSAGE("About to execute: %s", line),
1831 err = exec_child(command,
1843 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1844 "EXECUTABLE=%s", command->path,
1845 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1846 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1847 command->path, strerror(-err)),
1856 log_unit_struct(params->unit_id,
1858 LOG_MESSAGE("Forked %s as "PID_FMT,
1859 command->path, pid),
1862 /* We add the new process to the cgroup both in the child (so
1863 * that we can be sure that no user code is ever executed
1864 * outside of the cgroup) and in the parent (so that we can be
1865 * sure that when we kill the cgroup the process will be
1867 if (params->cgroup_path)
1868 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1870 exec_status_start(&command->exec_status, pid);
1876 void exec_context_init(ExecContext *c) {
1880 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1881 c->cpu_sched_policy = SCHED_OTHER;
1882 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1883 c->syslog_level_prefix = true;
1884 c->ignore_sigpipe = true;
1885 c->timer_slack_nsec = NSEC_INFINITY;
1886 c->personality = 0xffffffffUL;
1887 c->runtime_directory_mode = 0755;
1890 void exec_context_done(ExecContext *c) {
1895 strv_free(c->environment);
1896 c->environment = NULL;
1898 strv_free(c->environment_files);
1899 c->environment_files = NULL;
1901 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1903 c->rlimit[l] = NULL;
1906 free(c->working_directory);
1907 c->working_directory = NULL;
1908 free(c->root_directory);
1909 c->root_directory = NULL;
1914 free(c->syslog_identifier);
1915 c->syslog_identifier = NULL;
1923 strv_free(c->supplementary_groups);
1924 c->supplementary_groups = NULL;
1929 if (c->capabilities) {
1930 cap_free(c->capabilities);
1931 c->capabilities = NULL;
1934 strv_free(c->read_only_dirs);
1935 c->read_only_dirs = NULL;
1937 strv_free(c->read_write_dirs);
1938 c->read_write_dirs = NULL;
1940 strv_free(c->inaccessible_dirs);
1941 c->inaccessible_dirs = NULL;
1944 CPU_FREE(c->cpuset);
1949 free(c->selinux_context);
1950 c->selinux_context = NULL;
1952 free(c->apparmor_profile);
1953 c->apparmor_profile = NULL;
1955 set_free(c->syscall_filter);
1956 c->syscall_filter = NULL;
1958 set_free(c->syscall_archs);
1959 c->syscall_archs = NULL;
1961 set_free(c->address_families);
1962 c->address_families = NULL;
1964 strv_free(c->runtime_directory);
1965 c->runtime_directory = NULL;
1967 bus_endpoint_free(c->bus_endpoint);
1968 c->bus_endpoint = NULL;
1971 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1976 if (!runtime_prefix)
1979 STRV_FOREACH(i, c->runtime_directory) {
1980 _cleanup_free_ char *p;
1982 p = strjoin(runtime_prefix, "/", *i, NULL);
1986 /* We execute this synchronously, since we need to be
1987 * sure this is gone when we start the service
1989 rm_rf_dangerous(p, false, true, false);
1995 void exec_command_done(ExecCommand *c) {
2005 void exec_command_done_array(ExecCommand *c, unsigned n) {
2008 for (i = 0; i < n; i++)
2009 exec_command_done(c+i);
2012 void exec_command_free_list(ExecCommand *c) {
2016 LIST_REMOVE(command, c, i);
2017 exec_command_done(i);
2022 void exec_command_free_array(ExecCommand **c, unsigned n) {
2025 for (i = 0; i < n; i++) {
2026 exec_command_free_list(c[i]);
2031 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2032 char **i, **r = NULL;
2037 STRV_FOREACH(i, c->environment_files) {
2040 bool ignore = false;
2042 _cleanup_globfree_ glob_t pglob = {};
2052 if (!path_is_absolute(fn)) {
2060 /* Filename supports globbing, take all matching files */
2062 if (glob(fn, 0, NULL, &pglob) != 0) {
2067 return errno ? -errno : -EINVAL;
2069 count = pglob.gl_pathc;
2077 for (n = 0; n < count; n++) {
2078 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2086 /* Log invalid environment variables with filename */
2088 p = strv_env_clean_log(p, unit_id, pglob.gl_pathv[n]);
2095 m = strv_env_merge(2, r, p);
2111 static bool tty_may_match_dev_console(const char *tty) {
2112 _cleanup_free_ char *active = NULL;
2115 if (startswith(tty, "/dev/"))
2118 /* trivial identity? */
2119 if (streq(tty, "console"))
2122 console = resolve_dev_console(&active);
2123 /* if we could not resolve, assume it may */
2127 /* "tty0" means the active VC, so it may be the same sometimes */
2128 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2131 bool exec_context_may_touch_console(ExecContext *ec) {
2132 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2133 is_terminal_input(ec->std_input) ||
2134 is_terminal_output(ec->std_output) ||
2135 is_terminal_output(ec->std_error)) &&
2136 tty_may_match_dev_console(tty_path(ec));
2139 static void strv_fprintf(FILE *f, char **l) {
2145 fprintf(f, " %s", *g);
2148 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2155 prefix = strempty(prefix);
2159 "%sWorkingDirectory: %s\n"
2160 "%sRootDirectory: %s\n"
2161 "%sNonBlocking: %s\n"
2162 "%sPrivateTmp: %s\n"
2163 "%sPrivateNetwork: %s\n"
2164 "%sPrivateDevices: %s\n"
2165 "%sProtectHome: %s\n"
2166 "%sProtectSystem: %s\n"
2167 "%sIgnoreSIGPIPE: %s\n",
2169 prefix, c->working_directory ? c->working_directory : "/",
2170 prefix, c->root_directory ? c->root_directory : "/",
2171 prefix, yes_no(c->non_blocking),
2172 prefix, yes_no(c->private_tmp),
2173 prefix, yes_no(c->private_network),
2174 prefix, yes_no(c->private_devices),
2175 prefix, protect_home_to_string(c->protect_home),
2176 prefix, protect_system_to_string(c->protect_system),
2177 prefix, yes_no(c->ignore_sigpipe));
2179 STRV_FOREACH(e, c->environment)
2180 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2182 STRV_FOREACH(e, c->environment_files)
2183 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2190 if (c->oom_score_adjust_set)
2192 "%sOOMScoreAdjust: %i\n",
2193 prefix, c->oom_score_adjust);
2195 for (i = 0; i < RLIM_NLIMITS; i++)
2197 fprintf(f, "%s%s: "RLIM_FMT"\n",
2198 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2200 if (c->ioprio_set) {
2201 _cleanup_free_ char *class_str = NULL;
2203 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2205 "%sIOSchedulingClass: %s\n"
2206 "%sIOPriority: %i\n",
2207 prefix, strna(class_str),
2208 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2211 if (c->cpu_sched_set) {
2212 _cleanup_free_ char *policy_str = NULL;
2214 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2216 "%sCPUSchedulingPolicy: %s\n"
2217 "%sCPUSchedulingPriority: %i\n"
2218 "%sCPUSchedulingResetOnFork: %s\n",
2219 prefix, strna(policy_str),
2220 prefix, c->cpu_sched_priority,
2221 prefix, yes_no(c->cpu_sched_reset_on_fork));
2225 fprintf(f, "%sCPUAffinity:", prefix);
2226 for (i = 0; i < c->cpuset_ncpus; i++)
2227 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2228 fprintf(f, " %u", i);
2232 if (c->timer_slack_nsec != NSEC_INFINITY)
2233 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2236 "%sStandardInput: %s\n"
2237 "%sStandardOutput: %s\n"
2238 "%sStandardError: %s\n",
2239 prefix, exec_input_to_string(c->std_input),
2240 prefix, exec_output_to_string(c->std_output),
2241 prefix, exec_output_to_string(c->std_error));
2247 "%sTTYVHangup: %s\n"
2248 "%sTTYVTDisallocate: %s\n",
2249 prefix, c->tty_path,
2250 prefix, yes_no(c->tty_reset),
2251 prefix, yes_no(c->tty_vhangup),
2252 prefix, yes_no(c->tty_vt_disallocate));
2254 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2255 c->std_output == EXEC_OUTPUT_KMSG ||
2256 c->std_output == EXEC_OUTPUT_JOURNAL ||
2257 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2258 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2259 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2260 c->std_error == EXEC_OUTPUT_SYSLOG ||
2261 c->std_error == EXEC_OUTPUT_KMSG ||
2262 c->std_error == EXEC_OUTPUT_JOURNAL ||
2263 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2264 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2265 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2267 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2269 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2270 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2273 "%sSyslogFacility: %s\n"
2274 "%sSyslogLevel: %s\n",
2275 prefix, strna(fac_str),
2276 prefix, strna(lvl_str));
2279 if (c->capabilities) {
2280 _cleanup_cap_free_charp_ char *t;
2282 t = cap_to_text(c->capabilities, NULL);
2284 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2288 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2290 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2291 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2292 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2293 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2294 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2295 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2297 if (c->capability_bounding_set_drop) {
2299 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2301 for (l = 0; l <= cap_last_cap(); l++)
2302 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2303 _cleanup_cap_free_charp_ char *t;
2307 fprintf(f, " %s", t);
2314 fprintf(f, "%sUser: %s\n", prefix, c->user);
2316 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2318 if (strv_length(c->supplementary_groups) > 0) {
2319 fprintf(f, "%sSupplementaryGroups:", prefix);
2320 strv_fprintf(f, c->supplementary_groups);
2325 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2327 if (strv_length(c->read_write_dirs) > 0) {
2328 fprintf(f, "%sReadWriteDirs:", prefix);
2329 strv_fprintf(f, c->read_write_dirs);
2333 if (strv_length(c->read_only_dirs) > 0) {
2334 fprintf(f, "%sReadOnlyDirs:", prefix);
2335 strv_fprintf(f, c->read_only_dirs);
2339 if (strv_length(c->inaccessible_dirs) > 0) {
2340 fprintf(f, "%sInaccessibleDirs:", prefix);
2341 strv_fprintf(f, c->inaccessible_dirs);
2347 "%sUtmpIdentifier: %s\n",
2348 prefix, c->utmp_id);
2350 if (c->selinux_context)
2352 "%sSELinuxContext: %s%s\n",
2353 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2355 if (c->personality != 0xffffffffUL)
2357 "%sPersonality: %s\n",
2358 prefix, strna(personality_to_string(c->personality)));
2360 if (c->syscall_filter) {
2368 "%sSystemCallFilter: ",
2371 if (!c->syscall_whitelist)
2375 SET_FOREACH(id, c->syscall_filter, j) {
2376 _cleanup_free_ char *name = NULL;
2383 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2384 fputs(strna(name), f);
2391 if (c->syscall_archs) {
2398 "%sSystemCallArchitectures:",
2402 SET_FOREACH(id, c->syscall_archs, j)
2403 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2408 if (c->syscall_errno != 0)
2410 "%sSystemCallErrorNumber: %s\n",
2411 prefix, strna(errno_to_name(c->syscall_errno)));
2413 if (c->apparmor_profile)
2415 "%sAppArmorProfile: %s%s\n",
2416 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2419 bool exec_context_maintains_privileges(ExecContext *c) {
2422 /* Returns true if the process forked off would run run under
2423 * an unchanged UID or as root. */
2428 if (streq(c->user, "root") || streq(c->user, "0"))
2434 void exec_status_start(ExecStatus *s, pid_t pid) {
2439 dual_timestamp_get(&s->start_timestamp);
2442 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2445 if (s->pid && s->pid != pid)
2449 dual_timestamp_get(&s->exit_timestamp);
2455 if (context->utmp_id)
2456 utmp_put_dead_process(context->utmp_id, pid, code, status);
2458 exec_context_tty_reset(context);
2462 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2463 char buf[FORMAT_TIMESTAMP_MAX];
2471 prefix = strempty(prefix);
2474 "%sPID: "PID_FMT"\n",
2477 if (s->start_timestamp.realtime > 0)
2479 "%sStart Timestamp: %s\n",
2480 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2482 if (s->exit_timestamp.realtime > 0)
2484 "%sExit Timestamp: %s\n"
2486 "%sExit Status: %i\n",
2487 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2488 prefix, sigchld_code_to_string(s->code),
2492 char *exec_command_line(char **argv) {
2500 STRV_FOREACH(a, argv)
2503 if (!(n = new(char, k)))
2507 STRV_FOREACH(a, argv) {
2514 if (strpbrk(*a, WHITESPACE)) {
2525 /* FIXME: this doesn't really handle arguments that have
2526 * spaces and ticks in them */
2531 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2532 _cleanup_free_ char *cmd = NULL;
2533 const char *prefix2;
2538 prefix = strempty(prefix);
2539 prefix2 = strappenda(prefix, "\t");
2541 cmd = exec_command_line(c->argv);
2543 "%sCommand Line: %s\n",
2544 prefix, cmd ? cmd : strerror(ENOMEM));
2546 exec_status_dump(&c->exec_status, f, prefix2);
2549 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2552 prefix = strempty(prefix);
2554 LIST_FOREACH(command, c, c)
2555 exec_command_dump(c, f, prefix);
2558 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2565 /* It's kind of important, that we keep the order here */
2566 LIST_FIND_TAIL(command, *l, end);
2567 LIST_INSERT_AFTER(command, *l, end, e);
2572 int exec_command_set(ExecCommand *c, const char *path, ...) {
2580 l = strv_new_ap(path, ap);
2601 int exec_command_append(ExecCommand *c, const char *path, ...) {
2602 _cleanup_strv_free_ char **l = NULL;
2610 l = strv_new_ap(path, ap);
2616 r = strv_extend_strv(&c->argv, l);
2624 static int exec_runtime_allocate(ExecRuntime **rt) {
2629 *rt = new0(ExecRuntime, 1);
2634 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2639 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2649 if (!c->private_network && !c->private_tmp)
2652 r = exec_runtime_allocate(rt);
2656 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2657 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2661 if (c->private_tmp && !(*rt)->tmp_dir) {
2662 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2670 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2672 assert(r->n_ref > 0);
2678 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2683 assert(r->n_ref > 0);
2686 if (r->n_ref <= 0) {
2688 free(r->var_tmp_dir);
2689 safe_close_pair(r->netns_storage_socket);
2696 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2705 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2707 if (rt->var_tmp_dir)
2708 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2710 if (rt->netns_storage_socket[0] >= 0) {
2713 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2717 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2720 if (rt->netns_storage_socket[1] >= 0) {
2723 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2727 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2733 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2740 if (streq(key, "tmp-dir")) {
2743 r = exec_runtime_allocate(rt);
2747 copy = strdup(value);
2751 free((*rt)->tmp_dir);
2752 (*rt)->tmp_dir = copy;
2754 } else if (streq(key, "var-tmp-dir")) {
2757 r = exec_runtime_allocate(rt);
2761 copy = strdup(value);
2765 free((*rt)->var_tmp_dir);
2766 (*rt)->var_tmp_dir = copy;
2768 } else if (streq(key, "netns-socket-0")) {
2771 r = exec_runtime_allocate(rt);
2775 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2776 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2778 safe_close((*rt)->netns_storage_socket[0]);
2779 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2781 } else if (streq(key, "netns-socket-1")) {
2784 r = exec_runtime_allocate(rt);
2788 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2789 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2791 safe_close((*rt)->netns_storage_socket[1]);
2792 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2800 static void *remove_tmpdir_thread(void *p) {
2801 _cleanup_free_ char *path = p;
2803 rm_rf_dangerous(path, false, true, false);
2807 void exec_runtime_destroy(ExecRuntime *rt) {
2813 /* If there are multiple users of this, let's leave the stuff around */
2818 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2820 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2822 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2829 if (rt->var_tmp_dir) {
2830 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2832 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2834 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2835 free(rt->var_tmp_dir);
2838 rt->var_tmp_dir = NULL;
2841 safe_close_pair(rt->netns_storage_socket);
2844 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2845 [EXEC_INPUT_NULL] = "null",
2846 [EXEC_INPUT_TTY] = "tty",
2847 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2848 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2849 [EXEC_INPUT_SOCKET] = "socket"
2852 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2854 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2855 [EXEC_OUTPUT_INHERIT] = "inherit",
2856 [EXEC_OUTPUT_NULL] = "null",
2857 [EXEC_OUTPUT_TTY] = "tty",
2858 [EXEC_OUTPUT_SYSLOG] = "syslog",
2859 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2860 [EXEC_OUTPUT_KMSG] = "kmsg",
2861 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2862 [EXEC_OUTPUT_JOURNAL] = "journal",
2863 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2864 [EXEC_OUTPUT_SOCKET] = "socket"
2867 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);