1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static int open_null_as(int flags, int nfd) {
173 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
177 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178 close_nointr_nofail(fd);
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
187 union sockaddr_union sa;
190 assert(output < _EXEC_OUTPUT_MAX);
194 fd = socket(AF_UNIX, SOCK_STREAM, 0);
199 sa.un.sun_family = AF_UNIX;
200 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
202 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
204 close_nointr_nofail(fd);
208 if (shutdown(fd, SHUT_RD) < 0) {
209 close_nointr_nofail(fd);
221 context->syslog_identifier ? context->syslog_identifier : ident,
223 context->syslog_priority,
224 !!context->syslog_level_prefix,
225 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231 close_nointr_nofail(fd);
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
243 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
247 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248 close_nointr_nofail(fd);
255 static bool is_terminal_input(ExecInput i) {
257 i == EXEC_INPUT_TTY ||
258 i == EXEC_INPUT_TTY_FORCE ||
259 i == EXEC_INPUT_TTY_FAIL;
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
264 if (is_terminal_input(std_input) && !apply_tty_stdin)
265 return EXEC_INPUT_NULL;
267 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268 return EXEC_INPUT_NULL;
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
275 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276 return EXEC_OUTPUT_INHERIT;
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
286 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
290 case EXEC_INPUT_NULL:
291 return open_null_as(O_RDONLY, STDIN_FILENO);
294 case EXEC_INPUT_TTY_FORCE:
295 case EXEC_INPUT_TTY_FAIL: {
298 if ((fd = acquire_terminal(
300 i == EXEC_INPUT_TTY_FAIL,
301 i == EXEC_INPUT_TTY_FORCE,
306 if (fd != STDIN_FILENO) {
307 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308 close_nointr_nofail(fd);
315 case EXEC_INPUT_SOCKET:
316 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
319 assert_not_reached("Unknown input type");
323 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
331 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332 o = fixup_output(context->std_output, socket_fd);
334 if (fileno == STDERR_FILENO) {
336 e = fixup_output(context->std_error, socket_fd);
338 /* This expects the input and output are already set up */
340 /* Don't change the stderr file descriptor if we inherit all
341 * the way and are not on a tty */
342 if (e == EXEC_OUTPUT_INHERIT &&
343 o == EXEC_OUTPUT_INHERIT &&
344 i == EXEC_INPUT_NULL &&
345 !is_terminal_input(context->std_input) &&
349 /* Duplicate from stdout if possible */
350 if (e == o || e == EXEC_OUTPUT_INHERIT)
351 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
355 } else if (o == EXEC_OUTPUT_INHERIT) {
356 /* If input got downgraded, inherit the original value */
357 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
358 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
360 /* If the input is connected to anything that's not a /dev/null, inherit that... */
361 if (i != EXEC_INPUT_NULL)
362 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
364 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
368 /* We need to open /dev/null here anew, to get the right access mode. */
369 return open_null_as(O_WRONLY, fileno);
374 case EXEC_OUTPUT_NULL:
375 return open_null_as(O_WRONLY, fileno);
377 case EXEC_OUTPUT_TTY:
378 if (is_terminal_input(i))
379 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
381 /* We don't reset the terminal if this is just about output */
382 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
384 case EXEC_OUTPUT_SYSLOG:
385 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
386 case EXEC_OUTPUT_KMSG:
387 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
388 case EXEC_OUTPUT_JOURNAL:
389 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
390 r = connect_logger_as(context, o, ident, unit_id, fileno);
392 log_struct_unit(LOG_CRIT, unit_id,
393 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
394 fileno == STDOUT_FILENO ? "out" : "err",
395 unit_id, strerror(-r),
398 r = open_null_as(O_WRONLY, fileno);
402 case EXEC_OUTPUT_SOCKET:
403 assert(socket_fd >= 0);
404 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
407 assert_not_reached("Unknown error type");
411 static int chown_terminal(int fd, uid_t uid) {
416 /* This might fail. What matters are the results. */
417 (void) fchown(fd, uid, -1);
418 (void) fchmod(fd, TTY_MODE);
420 if (fstat(fd, &st) < 0)
423 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
429 static int setup_confirm_stdio(int *_saved_stdin,
430 int *_saved_stdout) {
431 int fd = -1, saved_stdin, saved_stdout = -1, r;
433 assert(_saved_stdin);
434 assert(_saved_stdout);
436 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
440 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
441 if (saved_stdout < 0) {
446 fd = acquire_terminal(
451 DEFAULT_CONFIRM_USEC);
457 r = chown_terminal(fd, getuid());
461 if (dup2(fd, STDIN_FILENO) < 0) {
466 if (dup2(fd, STDOUT_FILENO) < 0) {
472 close_nointr_nofail(fd);
474 *_saved_stdin = saved_stdin;
475 *_saved_stdout = saved_stdout;
480 if (saved_stdout >= 0)
481 close_nointr_nofail(saved_stdout);
483 if (saved_stdin >= 0)
484 close_nointr_nofail(saved_stdin);
487 close_nointr_nofail(fd);
492 static int write_confirm_message(const char *format, ...) {
498 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
502 va_start(ap, format);
503 vdprintf(fd, format, ap);
506 close_nointr_nofail(fd);
511 static int restore_confirm_stdio(int *saved_stdin,
517 assert(saved_stdout);
521 if (*saved_stdin >= 0)
522 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
525 if (*saved_stdout >= 0)
526 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
529 if (*saved_stdin >= 0)
530 close_nointr_nofail(*saved_stdin);
532 if (*saved_stdout >= 0)
533 close_nointr_nofail(*saved_stdout);
538 static int ask_for_confirmation(char *response, char **argv) {
539 int saved_stdout = -1, saved_stdin = -1, r;
542 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
546 line = exec_command_line(argv);
550 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
553 restore_confirm_stdio(&saved_stdin, &saved_stdout);
558 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
559 bool keep_groups = false;
564 /* Lookup and set GID and supplementary group list. Here too
565 * we avoid NSS lookups for gid=0. */
567 if (context->group || username) {
569 if (context->group) {
570 const char *g = context->group;
572 if ((r = get_group_creds(&g, &gid)) < 0)
576 /* First step, initialize groups from /etc/groups */
577 if (username && gid != 0) {
578 if (initgroups(username, gid) < 0)
584 /* Second step, set our gids */
585 if (setresgid(gid, gid, gid) < 0)
589 if (context->supplementary_groups) {
594 /* Final step, initialize any manually set supplementary groups */
595 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
597 if (!(gids = new(gid_t, ngroups_max)))
601 if ((k = getgroups(ngroups_max, gids)) < 0) {
608 STRV_FOREACH(i, context->supplementary_groups) {
611 if (k >= ngroups_max) {
617 r = get_group_creds(&g, gids+k);
626 if (setgroups(k, gids) < 0) {
637 static int enforce_user(const ExecContext *context, uid_t uid) {
641 /* Sets (but doesn't lookup) the uid and make sure we keep the
642 * capabilities while doing so. */
644 if (context->capabilities) {
646 static const cap_value_t bits[] = {
647 CAP_SETUID, /* Necessary so that we can run setresuid() below */
648 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
651 /* First step: If we need to keep capabilities but
652 * drop privileges we need to make sure we keep our
653 * caps, whiel we drop privileges. */
655 int sb = context->secure_bits|SECURE_KEEP_CAPS;
657 if (prctl(PR_GET_SECUREBITS) != sb)
658 if (prctl(PR_SET_SECUREBITS, sb) < 0)
662 /* Second step: set the capabilities. This will reduce
663 * the capabilities to the minimum we need. */
665 if (!(d = cap_dup(context->capabilities)))
668 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
669 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
675 if (cap_set_proc(d) < 0) {
684 /* Third step: actually set the uids */
685 if (setresuid(uid, uid, uid) < 0)
688 /* At this point we should have all necessary capabilities but
689 are otherwise a normal user. However, the caps might got
690 corrupted due to the setresuid() so we need clean them up
691 later. This is done outside of this call. */
698 static int null_conv(
700 const struct pam_message **msg,
701 struct pam_response **resp,
704 /* We don't support conversations */
709 static int setup_pam(
715 int fds[], unsigned n_fds) {
717 static const struct pam_conv conv = {
722 pam_handle_t *handle = NULL;
724 int pam_code = PAM_SUCCESS;
727 bool close_session = false;
728 pid_t pam_pid = 0, parent_pid;
734 /* We set up PAM in the parent process, then fork. The child
735 * will then stay around until killed via PR_GET_PDEATHSIG or
736 * systemd via the cgroup logic. It will then remove the PAM
737 * session again. The parent process will exec() the actual
738 * daemon. We do things this way to ensure that the main PID
739 * of the daemon is the one we initially fork()ed. */
741 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
747 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
750 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
753 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
756 close_session = true;
758 if ((!(e = pam_getenvlist(handle)))) {
759 pam_code = PAM_BUF_ERR;
763 /* Block SIGTERM, so that we know that it won't get lost in
765 if (sigemptyset(&ss) < 0 ||
766 sigaddset(&ss, SIGTERM) < 0 ||
767 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
770 parent_pid = getpid();
772 if ((pam_pid = fork()) < 0)
779 /* The child's job is to reset the PAM session on
782 /* This string must fit in 10 chars (i.e. the length
783 * of "/sbin/init"), to look pretty in /bin/ps */
784 rename_process("(sd-pam)");
786 /* Make sure we don't keep open the passed fds in this
787 child. We assume that otherwise only those fds are
788 open here that have been opened by PAM. */
789 close_many(fds, n_fds);
791 /* Drop privileges - we don't need any to pam_close_session
792 * and this will make PR_SET_PDEATHSIG work in most cases.
793 * If this fails, ignore the error - but expect sd-pam threads
794 * to fail to exit normally */
795 if (setresuid(uid, uid, uid) < 0)
796 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
798 /* Wait until our parent died. This will only work if
799 * the above setresuid() succeeds, otherwise the kernel
800 * will not allow unprivileged parents kill their privileged
801 * children this way. We rely on the control groups kill logic
802 * to do the rest for us. */
803 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
806 /* Check if our parent process might already have
808 if (getppid() == parent_pid) {
810 if (sigwait(&ss, &sig) < 0) {
817 assert(sig == SIGTERM);
822 /* If our parent died we'll end the session */
823 if (getppid() != parent_pid)
824 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
830 pam_end(handle, pam_code | PAM_DATA_SILENT);
834 /* If the child was forked off successfully it will do all the
835 * cleanups, so forget about the handle here. */
838 /* Unblock SIGTERM again in the parent */
839 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
842 /* We close the log explicitly here, since the PAM modules
843 * might have opened it, but we don't want this fd around. */
852 if (pam_code != PAM_SUCCESS)
853 err = -EPERM; /* PAM errors do not map to errno */
859 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
861 pam_end(handle, pam_code | PAM_DATA_SILENT);
869 kill(pam_pid, SIGTERM);
870 kill(pam_pid, SIGCONT);
877 static void rename_process_from_path(const char *path) {
878 char process_name[11];
882 /* This resulting string must fit in 10 chars (i.e. the length
883 * of "/sbin/init") to look pretty in /bin/ps */
885 p = path_get_file_name(path);
887 rename_process("(...)");
893 /* The end of the process name is usually more
894 * interesting, since the first bit might just be
900 process_name[0] = '(';
901 memcpy(process_name+1, p, l);
902 process_name[1+l] = ')';
903 process_name[1+l+1] = 0;
905 rename_process(process_name);
908 static int apply_seccomp(uint32_t *syscall_filter) {
909 static const struct sock_filter header[] = {
910 VALIDATE_ARCHITECTURE,
913 static const struct sock_filter footer[] = {
919 struct sock_filter *f;
920 struct sock_fprog prog;
922 assert(syscall_filter);
924 /* First: count the syscalls to check for */
925 for (i = 0, n = 0; i < syscall_max(); i++)
926 if (syscall_filter[i >> 4] & (1 << (i & 31)))
929 /* Second: build the filter program from a header the syscall
930 * matches and the footer */
931 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
932 memcpy(f, header, sizeof(header));
934 for (i = 0, n = 0; i < syscall_max(); i++)
935 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
936 struct sock_filter item[] = {
937 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
938 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
941 assert_cc(ELEMENTSOF(item) == 2);
943 f[ELEMENTSOF(header) + 2*n] = item[0];
944 f[ELEMENTSOF(header) + 2*n+1] = item[1];
949 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
951 /* Third: install the filter */
953 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
955 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
961 int exec_spawn(ExecCommand *command,
963 const ExecContext *context,
964 int fds[], unsigned n_fds,
966 bool apply_permissions,
968 bool apply_tty_stdin,
970 CGroupBonding *cgroup_bondings,
971 CGroupAttribute *cgroup_attributes,
972 const char *cgroup_suffix,
981 char _cleanup_strv_free_ **files_env = NULL;
986 assert(fds || n_fds <= 0);
988 if (context->std_input == EXEC_INPUT_SOCKET ||
989 context->std_output == EXEC_OUTPUT_SOCKET ||
990 context->std_error == EXEC_OUTPUT_SOCKET) {
1002 r = exec_context_load_environment(context, &files_env);
1004 log_struct_unit(LOG_ERR,
1006 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1013 argv = command->argv;
1015 line = exec_command_line(argv);
1019 log_struct_unit(LOG_DEBUG,
1021 "MESSAGE=About to execute %s", line,
1025 r = cgroup_bonding_realize_list(cgroup_bondings);
1029 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1038 const char *username = NULL, *home = NULL;
1039 uid_t uid = (uid_t) -1;
1040 gid_t gid = (gid_t) -1;
1041 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1042 **final_env = NULL, **final_argv = NULL;
1044 bool set_access = false;
1048 rename_process_from_path(command->path);
1050 /* We reset exactly these signals, since they are the
1051 * only ones we set to SIG_IGN in the main daemon. All
1052 * others we leave untouched because we set them to
1053 * SIG_DFL or a valid handler initially, both of which
1054 * will be demoted to SIG_DFL. */
1055 default_signals(SIGNALS_CRASH_HANDLER,
1056 SIGNALS_IGNORE, -1);
1058 if (context->ignore_sigpipe)
1059 ignore_signals(SIGPIPE, -1);
1061 assert_se(sigemptyset(&ss) == 0);
1062 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1064 r = EXIT_SIGNAL_MASK;
1069 if (idle_pipe[1] >= 0)
1070 close_nointr_nofail(idle_pipe[1]);
1071 if (idle_pipe[0] >= 0) {
1072 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1073 close_nointr_nofail(idle_pipe[0]);
1077 /* Close sockets very early to make sure we don't
1078 * block init reexecution because it cannot bind its
1081 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1082 socket_fd >= 0 ? 1 : n_fds);
1088 if (!context->same_pgrp)
1095 if (context->tcpwrap_name) {
1097 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1103 for (i = 0; i < (int) n_fds; i++) {
1104 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1112 exec_context_tty_reset(context);
1114 if (confirm_spawn) {
1117 err = ask_for_confirmation(&response, argv);
1118 if (err == -ETIMEDOUT)
1119 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1121 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1122 else if (response == 's') {
1123 write_confirm_message("Skipping execution.\n");
1127 } else if (response == 'n') {
1128 write_confirm_message("Failing execution.\n");
1134 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1135 * must sure to drop O_NONBLOCK */
1137 fd_nonblock(socket_fd, false);
1139 err = setup_input(context, socket_fd, apply_tty_stdin);
1145 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1151 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1157 if (cgroup_bondings) {
1158 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1165 if (context->oom_score_adjust_set) {
1168 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1171 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1173 r = EXIT_OOM_ADJUST;
1178 if (context->nice_set)
1179 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1185 if (context->cpu_sched_set) {
1186 struct sched_param param;
1189 param.sched_priority = context->cpu_sched_priority;
1191 if (sched_setscheduler(0, context->cpu_sched_policy |
1192 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1194 r = EXIT_SETSCHEDULER;
1199 if (context->cpuset)
1200 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1202 r = EXIT_CPUAFFINITY;
1206 if (context->ioprio_set)
1207 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1213 if (context->timer_slack_nsec != (nsec_t) -1)
1214 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1216 r = EXIT_TIMERSLACK;
1220 if (context->utmp_id)
1221 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1223 if (context->user) {
1224 username = context->user;
1225 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1231 if (is_terminal_input(context->std_input)) {
1232 err = chown_terminal(STDIN_FILENO, uid);
1239 if (cgroup_bondings && context->control_group_modify) {
1240 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1242 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1252 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1253 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1260 if (apply_permissions) {
1261 err = enforce_groups(context, username, gid);
1268 umask(context->umask);
1271 if (apply_permissions && context->pam_name && username) {
1272 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1279 if (context->private_network) {
1280 if (unshare(CLONE_NEWNET) < 0) {
1289 if (strv_length(context->read_write_dirs) > 0 ||
1290 strv_length(context->read_only_dirs) > 0 ||
1291 strv_length(context->inaccessible_dirs) > 0 ||
1292 context->mount_flags != 0 ||
1293 context->private_tmp) {
1294 err = setup_namespace(context->read_write_dirs,
1295 context->read_only_dirs,
1296 context->inaccessible_dirs,
1297 context->private_tmp,
1298 context->mount_flags);
1306 if (context->root_directory)
1307 if (chroot(context->root_directory) < 0) {
1313 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1319 char _cleanup_free_ *d = NULL;
1321 if (asprintf(&d, "%s/%s",
1322 context->root_directory ? context->root_directory : "",
1323 context->working_directory ? context->working_directory : "") < 0) {
1336 /* We repeat the fd closing here, to make sure that
1337 * nothing is leaked from the PAM modules */
1338 err = close_all_fds(fds, n_fds);
1340 err = shift_fds(fds, n_fds);
1342 err = flags_fds(fds, n_fds, context->non_blocking);
1348 if (apply_permissions) {
1350 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1351 if (!context->rlimit[i])
1354 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1361 if (context->capability_bounding_set_drop) {
1362 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1364 r = EXIT_CAPABILITIES;
1369 if (context->user) {
1370 err = enforce_user(context, uid);
1377 /* PR_GET_SECUREBITS is not privileged, while
1378 * PR_SET_SECUREBITS is. So to suppress
1379 * potential EPERMs we'll try not to call
1380 * PR_SET_SECUREBITS unless necessary. */
1381 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1382 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1384 r = EXIT_SECUREBITS;
1388 if (context->capabilities)
1389 if (cap_set_proc(context->capabilities) < 0) {
1391 r = EXIT_CAPABILITIES;
1395 if (context->no_new_privileges)
1396 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1398 r = EXIT_NO_NEW_PRIVILEGES;
1402 if (context->syscall_filter) {
1403 err = apply_seccomp(context->syscall_filter);
1411 if (!(our_env = new0(char*, 7))) {
1418 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1419 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1426 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1433 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1434 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1440 if (is_terminal_input(context->std_input) ||
1441 context->std_output == EXEC_OUTPUT_TTY ||
1442 context->std_error == EXEC_OUTPUT_TTY)
1443 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1451 if (!(final_env = strv_env_merge(
1455 context->environment,
1464 if (!(final_argv = replace_env_argv(argv, final_env))) {
1470 final_env = strv_env_clean(final_env);
1472 execve(command->path, final_argv, final_env);
1479 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1480 "EXECUTABLE=%s", command->path,
1481 "MESSAGE=Failed at step %s spawning %s: %s",
1482 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1483 command->path, strerror(-err),
1492 log_struct_unit(LOG_DEBUG,
1494 "MESSAGE=Forked %s as %lu",
1495 command->path, (unsigned long) pid,
1498 /* We add the new process to the cgroup both in the child (so
1499 * that we can be sure that no user code is ever executed
1500 * outside of the cgroup) and in the parent (so that we can be
1501 * sure that when we kill the cgroup the process will be
1503 if (cgroup_bondings)
1504 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1506 exec_status_start(&command->exec_status, pid);
1512 void exec_context_init(ExecContext *c) {
1516 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1517 c->cpu_sched_policy = SCHED_OTHER;
1518 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1519 c->syslog_level_prefix = true;
1520 c->control_group_persistent = -1;
1521 c->ignore_sigpipe = true;
1522 c->timer_slack_nsec = (nsec_t) -1;
1525 void exec_context_done(ExecContext *c) {
1530 strv_free(c->environment);
1531 c->environment = NULL;
1533 strv_free(c->environment_files);
1534 c->environment_files = NULL;
1536 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1538 c->rlimit[l] = NULL;
1541 free(c->working_directory);
1542 c->working_directory = NULL;
1543 free(c->root_directory);
1544 c->root_directory = NULL;
1549 free(c->tcpwrap_name);
1550 c->tcpwrap_name = NULL;
1552 free(c->syslog_identifier);
1553 c->syslog_identifier = NULL;
1561 strv_free(c->supplementary_groups);
1562 c->supplementary_groups = NULL;
1567 if (c->capabilities) {
1568 cap_free(c->capabilities);
1569 c->capabilities = NULL;
1572 strv_free(c->read_only_dirs);
1573 c->read_only_dirs = NULL;
1575 strv_free(c->read_write_dirs);
1576 c->read_write_dirs = NULL;
1578 strv_free(c->inaccessible_dirs);
1579 c->inaccessible_dirs = NULL;
1582 CPU_FREE(c->cpuset);
1587 free(c->syscall_filter);
1588 c->syscall_filter = NULL;
1591 void exec_command_done(ExecCommand *c) {
1601 void exec_command_done_array(ExecCommand *c, unsigned n) {
1604 for (i = 0; i < n; i++)
1605 exec_command_done(c+i);
1608 void exec_command_free_list(ExecCommand *c) {
1612 LIST_REMOVE(ExecCommand, command, c, i);
1613 exec_command_done(i);
1618 void exec_command_free_array(ExecCommand **c, unsigned n) {
1621 for (i = 0; i < n; i++) {
1622 exec_command_free_list(c[i]);
1627 int exec_context_load_environment(const ExecContext *c, char ***l) {
1628 char **i, **r = NULL;
1633 STRV_FOREACH(i, c->environment_files) {
1636 bool ignore = false;
1648 if (!path_is_absolute(fn)) {
1657 /* Filename supports globbing, take all matching files */
1660 if (glob(fn, 0, NULL, &pglob) != 0) {
1666 return errno ? -errno : -EINVAL;
1668 count = pglob.gl_pathc;
1677 for (n = 0; n < count; n++) {
1678 k = load_env_file(pglob.gl_pathv[n], &p);
1693 m = strv_env_merge(2, r, p);
1713 static void strv_fprintf(FILE *f, char **l) {
1719 fprintf(f, " %s", *g);
1722 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1734 "%sWorkingDirectory: %s\n"
1735 "%sRootDirectory: %s\n"
1736 "%sNonBlocking: %s\n"
1737 "%sPrivateTmp: %s\n"
1738 "%sControlGroupModify: %s\n"
1739 "%sControlGroupPersistent: %s\n"
1740 "%sPrivateNetwork: %s\n"
1741 "%sIgnoreSIGPIPE: %s\n",
1743 prefix, c->working_directory ? c->working_directory : "/",
1744 prefix, c->root_directory ? c->root_directory : "/",
1745 prefix, yes_no(c->non_blocking),
1746 prefix, yes_no(c->private_tmp),
1747 prefix, yes_no(c->control_group_modify),
1748 prefix, yes_no(c->control_group_persistent),
1749 prefix, yes_no(c->private_network),
1750 prefix, yes_no(c->ignore_sigpipe));
1752 STRV_FOREACH(e, c->environment)
1753 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1755 STRV_FOREACH(e, c->environment_files)
1756 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1758 if (c->tcpwrap_name)
1760 "%sTCPWrapName: %s\n",
1761 prefix, c->tcpwrap_name);
1768 if (c->oom_score_adjust_set)
1770 "%sOOMScoreAdjust: %i\n",
1771 prefix, c->oom_score_adjust);
1773 for (i = 0; i < RLIM_NLIMITS; i++)
1775 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1777 if (c->ioprio_set) {
1781 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1785 "%sIOSchedulingClass: %s\n"
1786 "%sIOPriority: %i\n",
1787 prefix, strna(class_str),
1788 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1792 if (c->cpu_sched_set) {
1796 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1800 "%sCPUSchedulingPolicy: %s\n"
1801 "%sCPUSchedulingPriority: %i\n"
1802 "%sCPUSchedulingResetOnFork: %s\n",
1803 prefix, strna(policy_str),
1804 prefix, c->cpu_sched_priority,
1805 prefix, yes_no(c->cpu_sched_reset_on_fork));
1810 fprintf(f, "%sCPUAffinity:", prefix);
1811 for (i = 0; i < c->cpuset_ncpus; i++)
1812 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1813 fprintf(f, " %i", i);
1817 if (c->timer_slack_nsec != (nsec_t) -1)
1818 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1821 "%sStandardInput: %s\n"
1822 "%sStandardOutput: %s\n"
1823 "%sStandardError: %s\n",
1824 prefix, exec_input_to_string(c->std_input),
1825 prefix, exec_output_to_string(c->std_output),
1826 prefix, exec_output_to_string(c->std_error));
1832 "%sTTYVHangup: %s\n"
1833 "%sTTYVTDisallocate: %s\n",
1834 prefix, c->tty_path,
1835 prefix, yes_no(c->tty_reset),
1836 prefix, yes_no(c->tty_vhangup),
1837 prefix, yes_no(c->tty_vt_disallocate));
1839 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1840 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1841 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1842 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1843 char *fac_str, *lvl_str;
1846 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1850 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1855 "%sSyslogFacility: %s\n"
1856 "%sSyslogLevel: %s\n",
1857 prefix, strna(fac_str),
1858 prefix, strna(lvl_str));
1863 if (c->capabilities) {
1865 if ((t = cap_to_text(c->capabilities, NULL))) {
1866 fprintf(f, "%sCapabilities: %s\n",
1873 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1875 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1876 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1877 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1878 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1879 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1880 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1882 if (c->capability_bounding_set_drop) {
1884 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1886 for (l = 0; l <= cap_last_cap(); l++)
1887 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1890 if ((t = cap_to_name(l))) {
1891 fprintf(f, " %s", t);
1900 fprintf(f, "%sUser: %s\n", prefix, c->user);
1902 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1904 if (strv_length(c->supplementary_groups) > 0) {
1905 fprintf(f, "%sSupplementaryGroups:", prefix);
1906 strv_fprintf(f, c->supplementary_groups);
1911 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1913 if (strv_length(c->read_write_dirs) > 0) {
1914 fprintf(f, "%sReadWriteDirs:", prefix);
1915 strv_fprintf(f, c->read_write_dirs);
1919 if (strv_length(c->read_only_dirs) > 0) {
1920 fprintf(f, "%sReadOnlyDirs:", prefix);
1921 strv_fprintf(f, c->read_only_dirs);
1925 if (strv_length(c->inaccessible_dirs) > 0) {
1926 fprintf(f, "%sInaccessibleDirs:", prefix);
1927 strv_fprintf(f, c->inaccessible_dirs);
1933 "%sUtmpIdentifier: %s\n",
1934 prefix, c->utmp_id);
1937 void exec_status_start(ExecStatus *s, pid_t pid) {
1942 dual_timestamp_get(&s->start_timestamp);
1945 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1948 if (s->pid && s->pid != pid)
1952 dual_timestamp_get(&s->exit_timestamp);
1958 if (context->utmp_id)
1959 utmp_put_dead_process(context->utmp_id, pid, code, status);
1961 exec_context_tty_reset(context);
1965 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1966 char buf[FORMAT_TIMESTAMP_MAX];
1979 prefix, (unsigned long) s->pid);
1981 if (s->start_timestamp.realtime > 0)
1983 "%sStart Timestamp: %s\n",
1984 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1986 if (s->exit_timestamp.realtime > 0)
1988 "%sExit Timestamp: %s\n"
1990 "%sExit Status: %i\n",
1991 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1992 prefix, sigchld_code_to_string(s->code),
1996 char *exec_command_line(char **argv) {
2004 STRV_FOREACH(a, argv)
2007 if (!(n = new(char, k)))
2011 STRV_FOREACH(a, argv) {
2018 if (strpbrk(*a, WHITESPACE)) {
2029 /* FIXME: this doesn't really handle arguments that have
2030 * spaces and ticks in them */
2035 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2037 const char *prefix2;
2046 p2 = strappend(prefix, "\t");
2047 prefix2 = p2 ? p2 : prefix;
2049 cmd = exec_command_line(c->argv);
2052 "%sCommand Line: %s\n",
2053 prefix, cmd ? cmd : strerror(ENOMEM));
2057 exec_status_dump(&c->exec_status, f, prefix2);
2062 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2068 LIST_FOREACH(command, c, c)
2069 exec_command_dump(c, f, prefix);
2072 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2079 /* It's kind of important, that we keep the order here */
2080 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2081 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2086 int exec_command_set(ExecCommand *c, const char *path, ...) {
2094 l = strv_new_ap(path, ap);
2100 if (!(p = strdup(path))) {
2114 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2115 [EXEC_INPUT_NULL] = "null",
2116 [EXEC_INPUT_TTY] = "tty",
2117 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2118 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2119 [EXEC_INPUT_SOCKET] = "socket"
2122 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2124 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2125 [EXEC_OUTPUT_INHERIT] = "inherit",
2126 [EXEC_OUTPUT_NULL] = "null",
2127 [EXEC_OUTPUT_TTY] = "tty",
2128 [EXEC_OUTPUT_SYSLOG] = "syslog",
2129 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2130 [EXEC_OUTPUT_KMSG] = "kmsg",
2131 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2132 [EXEC_OUTPUT_JOURNAL] = "journal",
2133 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2134 [EXEC_OUTPUT_SOCKET] = "socket"
2137 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);