1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
44 #include <security/pam_appl.h>
50 #include "capability.h"
54 #include "securebits.h"
56 #include "namespace.h"
58 #include "exit-status.h"
60 #include "utmp-wtmp.h"
62 #include "loopback-setup.h"
63 #include "path-util.h"
64 #include "syscall-list.h"
66 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
68 /* This assumes there is a 'tty' group */
71 static int shift_fds(int fds[], unsigned n_fds) {
72 int start, restart_from;
77 /* Modifies the fds array! (sorts it) */
87 for (i = start; i < (int) n_fds; i++) {
90 /* Already at right index? */
94 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
97 close_nointr_nofail(fds[i]);
100 /* Hmm, the fd we wanted isn't free? Then
101 * let's remember that and try again from here*/
102 if (nfd != i+3 && restart_from < 0)
106 if (restart_from < 0)
109 start = restart_from;
115 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
124 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
126 for (i = 0; i < n_fds; i++) {
128 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
131 /* We unconditionally drop FD_CLOEXEC from the fds,
132 * since after all we want to pass these fds to our
135 if ((r = fd_cloexec(fds[i], false)) < 0)
142 static const char *tty_path(const ExecContext *context) {
145 if (context->tty_path)
146 return context->tty_path;
148 return "/dev/console";
151 void exec_context_tty_reset(const ExecContext *context) {
154 if (context->tty_vhangup)
155 terminal_vhangup(tty_path(context));
157 if (context->tty_reset)
158 reset_terminal(tty_path(context));
160 if (context->tty_vt_disallocate && context->tty_path)
161 vt_disallocate(context->tty_path);
164 static int open_null_as(int flags, int nfd) {
169 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
173 r = dup2(fd, nfd) < 0 ? -errno : nfd;
174 close_nointr_nofail(fd);
181 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
183 union sockaddr_union sa;
186 assert(output < _EXEC_OUTPUT_MAX);
190 fd = socket(AF_UNIX, SOCK_STREAM, 0);
195 sa.un.sun_family = AF_UNIX;
196 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
198 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
200 close_nointr_nofail(fd);
204 if (shutdown(fd, SHUT_RD) < 0) {
205 close_nointr_nofail(fd);
217 context->syslog_identifier ? context->syslog_identifier : ident,
219 context->syslog_priority,
220 !!context->syslog_level_prefix,
221 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
222 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
223 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
226 r = dup2(fd, nfd) < 0 ? -errno : nfd;
227 close_nointr_nofail(fd);
233 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
239 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
243 r = dup2(fd, nfd) < 0 ? -errno : nfd;
244 close_nointr_nofail(fd);
251 static bool is_terminal_input(ExecInput i) {
253 i == EXEC_INPUT_TTY ||
254 i == EXEC_INPUT_TTY_FORCE ||
255 i == EXEC_INPUT_TTY_FAIL;
258 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
260 if (is_terminal_input(std_input) && !apply_tty_stdin)
261 return EXEC_INPUT_NULL;
263 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
264 return EXEC_INPUT_NULL;
269 static int fixup_output(ExecOutput std_output, int socket_fd) {
271 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
272 return EXEC_OUTPUT_INHERIT;
277 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
282 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
286 case EXEC_INPUT_NULL:
287 return open_null_as(O_RDONLY, STDIN_FILENO);
290 case EXEC_INPUT_TTY_FORCE:
291 case EXEC_INPUT_TTY_FAIL: {
294 if ((fd = acquire_terminal(
296 i == EXEC_INPUT_TTY_FAIL,
297 i == EXEC_INPUT_TTY_FORCE,
302 if (fd != STDIN_FILENO) {
303 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
304 close_nointr_nofail(fd);
311 case EXEC_INPUT_SOCKET:
312 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
315 assert_not_reached("Unknown input type");
319 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
326 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
327 o = fixup_output(context->std_output, socket_fd);
329 /* This expects the input is already set up */
333 case EXEC_OUTPUT_INHERIT:
335 /* If input got downgraded, inherit the original value */
336 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
337 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
339 /* If the input is connected to anything that's not a /dev/null, inherit that... */
340 if (i != EXEC_INPUT_NULL)
341 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
343 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
345 return STDOUT_FILENO;
347 /* We need to open /dev/null here anew, to get the
348 * right access mode. So we fall through */
350 case EXEC_OUTPUT_NULL:
351 return open_null_as(O_WRONLY, STDOUT_FILENO);
353 case EXEC_OUTPUT_TTY:
354 if (is_terminal_input(i))
355 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
357 /* We don't reset the terminal if this is just about output */
358 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
360 case EXEC_OUTPUT_SYSLOG:
361 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
362 case EXEC_OUTPUT_KMSG:
363 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
364 case EXEC_OUTPUT_JOURNAL:
365 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
366 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
368 case EXEC_OUTPUT_SOCKET:
369 assert(socket_fd >= 0);
370 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
373 assert_not_reached("Unknown output type");
377 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
384 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
385 o = fixup_output(context->std_output, socket_fd);
386 e = fixup_output(context->std_error, socket_fd);
388 /* This expects the input and output are already set up */
390 /* Don't change the stderr file descriptor if we inherit all
391 * the way and are not on a tty */
392 if (e == EXEC_OUTPUT_INHERIT &&
393 o == EXEC_OUTPUT_INHERIT &&
394 i == EXEC_INPUT_NULL &&
395 !is_terminal_input(context->std_input) &&
397 return STDERR_FILENO;
399 /* Duplicate from stdout if possible */
400 if (e == o || e == EXEC_OUTPUT_INHERIT)
401 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
405 case EXEC_OUTPUT_NULL:
406 return open_null_as(O_WRONLY, STDERR_FILENO);
408 case EXEC_OUTPUT_TTY:
409 if (is_terminal_input(i))
410 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
412 /* We don't reset the terminal if this is just about output */
413 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
415 case EXEC_OUTPUT_SYSLOG:
416 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
417 case EXEC_OUTPUT_KMSG:
418 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
419 case EXEC_OUTPUT_JOURNAL:
420 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
421 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
423 case EXEC_OUTPUT_SOCKET:
424 assert(socket_fd >= 0);
425 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
428 assert_not_reached("Unknown error type");
432 static int chown_terminal(int fd, uid_t uid) {
437 /* This might fail. What matters are the results. */
438 (void) fchown(fd, uid, -1);
439 (void) fchmod(fd, TTY_MODE);
441 if (fstat(fd, &st) < 0)
444 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
450 static int setup_confirm_stdio(int *_saved_stdin,
451 int *_saved_stdout) {
452 int fd = -1, saved_stdin, saved_stdout = -1, r;
454 assert(_saved_stdin);
455 assert(_saved_stdout);
457 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
461 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
462 if (saved_stdout < 0) {
467 fd = acquire_terminal(
472 DEFAULT_CONFIRM_USEC);
478 r = chown_terminal(fd, getuid());
482 if (dup2(fd, STDIN_FILENO) < 0) {
487 if (dup2(fd, STDOUT_FILENO) < 0) {
493 close_nointr_nofail(fd);
495 *_saved_stdin = saved_stdin;
496 *_saved_stdout = saved_stdout;
501 if (saved_stdout >= 0)
502 close_nointr_nofail(saved_stdout);
504 if (saved_stdin >= 0)
505 close_nointr_nofail(saved_stdin);
508 close_nointr_nofail(fd);
513 static int write_confirm_message(const char *format, ...) {
519 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
523 va_start(ap, format);
524 vdprintf(fd, format, ap);
527 close_nointr_nofail(fd);
532 static int restore_confirm_stdio(int *saved_stdin,
538 assert(saved_stdout);
542 if (*saved_stdin >= 0)
543 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546 if (*saved_stdout >= 0)
547 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550 if (*saved_stdin >= 0)
551 close_nointr_nofail(*saved_stdin);
553 if (*saved_stdout >= 0)
554 close_nointr_nofail(*saved_stdout);
559 static int ask_for_confirmation(char *response, char **argv) {
560 int saved_stdout = -1, saved_stdin = -1, r;
563 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567 line = exec_command_line(argv);
571 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574 restore_confirm_stdio(&saved_stdin, &saved_stdout);
579 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
580 bool keep_groups = false;
585 /* Lookup and set GID and supplementary group list. Here too
586 * we avoid NSS lookups for gid=0. */
588 if (context->group || username) {
590 if (context->group) {
591 const char *g = context->group;
593 if ((r = get_group_creds(&g, &gid)) < 0)
597 /* First step, initialize groups from /etc/groups */
598 if (username && gid != 0) {
599 if (initgroups(username, gid) < 0)
605 /* Second step, set our gids */
606 if (setresgid(gid, gid, gid) < 0)
610 if (context->supplementary_groups) {
615 /* Final step, initialize any manually set supplementary groups */
616 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
618 if (!(gids = new(gid_t, ngroups_max)))
622 if ((k = getgroups(ngroups_max, gids)) < 0) {
629 STRV_FOREACH(i, context->supplementary_groups) {
632 if (k >= ngroups_max) {
638 r = get_group_creds(&g, gids+k);
647 if (setgroups(k, gids) < 0) {
658 static int enforce_user(const ExecContext *context, uid_t uid) {
662 /* Sets (but doesn't lookup) the uid and make sure we keep the
663 * capabilities while doing so. */
665 if (context->capabilities) {
667 static const cap_value_t bits[] = {
668 CAP_SETUID, /* Necessary so that we can run setresuid() below */
669 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
672 /* First step: If we need to keep capabilities but
673 * drop privileges we need to make sure we keep our
674 * caps, whiel we drop privileges. */
676 int sb = context->secure_bits|SECURE_KEEP_CAPS;
678 if (prctl(PR_GET_SECUREBITS) != sb)
679 if (prctl(PR_SET_SECUREBITS, sb) < 0)
683 /* Second step: set the capabilities. This will reduce
684 * the capabilities to the minimum we need. */
686 if (!(d = cap_dup(context->capabilities)))
689 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
690 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
696 if (cap_set_proc(d) < 0) {
705 /* Third step: actually set the uids */
706 if (setresuid(uid, uid, uid) < 0)
709 /* At this point we should have all necessary capabilities but
710 are otherwise a normal user. However, the caps might got
711 corrupted due to the setresuid() so we need clean them up
712 later. This is done outside of this call. */
719 static int null_conv(
721 const struct pam_message **msg,
722 struct pam_response **resp,
725 /* We don't support conversations */
730 static int setup_pam(
736 int fds[], unsigned n_fds) {
738 static const struct pam_conv conv = {
743 pam_handle_t *handle = NULL;
745 int pam_code = PAM_SUCCESS;
748 bool close_session = false;
749 pid_t pam_pid = 0, parent_pid;
755 /* We set up PAM in the parent process, then fork. The child
756 * will then stay around until killed via PR_GET_PDEATHSIG or
757 * systemd via the cgroup logic. It will then remove the PAM
758 * session again. The parent process will exec() the actual
759 * daemon. We do things this way to ensure that the main PID
760 * of the daemon is the one we initially fork()ed. */
762 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
768 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
771 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
774 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
777 close_session = true;
779 if ((!(e = pam_getenvlist(handle)))) {
780 pam_code = PAM_BUF_ERR;
784 /* Block SIGTERM, so that we know that it won't get lost in
786 if (sigemptyset(&ss) < 0 ||
787 sigaddset(&ss, SIGTERM) < 0 ||
788 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
791 parent_pid = getpid();
793 if ((pam_pid = fork()) < 0)
800 /* The child's job is to reset the PAM session on
803 /* This string must fit in 10 chars (i.e. the length
804 * of "/sbin/init"), to look pretty in /bin/ps */
805 rename_process("(sd-pam)");
807 /* Make sure we don't keep open the passed fds in this
808 child. We assume that otherwise only those fds are
809 open here that have been opened by PAM. */
810 close_many(fds, n_fds);
812 /* Drop privileges - we don't need any to pam_close_session
813 * and this will make PR_SET_PDEATHSIG work in most cases.
814 * If this fails, ignore the error - but expect sd-pam threads
815 * to fail to exit normally */
816 if (setresuid(uid, uid, uid) < 0)
817 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
819 /* Wait until our parent died. This will only work if
820 * the above setresuid() succeeds, otherwise the kernel
821 * will not allow unprivileged parents kill their privileged
822 * children this way. We rely on the control groups kill logic
823 * to do the rest for us. */
824 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
827 /* Check if our parent process might already have
829 if (getppid() == parent_pid) {
831 if (sigwait(&ss, &sig) < 0) {
838 assert(sig == SIGTERM);
843 /* If our parent died we'll end the session */
844 if (getppid() != parent_pid)
845 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
851 pam_end(handle, pam_code | PAM_DATA_SILENT);
855 /* If the child was forked off successfully it will do all the
856 * cleanups, so forget about the handle here. */
859 /* Unblock SIGTERM again in the parent */
860 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
863 /* We close the log explicitly here, since the PAM modules
864 * might have opened it, but we don't want this fd around. */
873 if (pam_code != PAM_SUCCESS)
874 err = -EPERM; /* PAM errors do not map to errno */
880 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
882 pam_end(handle, pam_code | PAM_DATA_SILENT);
890 kill(pam_pid, SIGTERM);
891 kill(pam_pid, SIGCONT);
898 static void rename_process_from_path(const char *path) {
899 char process_name[11];
903 /* This resulting string must fit in 10 chars (i.e. the length
904 * of "/sbin/init") to look pretty in /bin/ps */
906 p = path_get_file_name(path);
908 rename_process("(...)");
914 /* The end of the process name is usually more
915 * interesting, since the first bit might just be
921 process_name[0] = '(';
922 memcpy(process_name+1, p, l);
923 process_name[1+l] = ')';
924 process_name[1+l+1] = 0;
926 rename_process(process_name);
929 static int apply_seccomp(uint32_t *syscall_filter) {
930 static const struct sock_filter header[] = {
931 VALIDATE_ARCHITECTURE,
934 static const struct sock_filter footer[] = {
940 struct sock_filter *f;
941 struct sock_fprog prog;
943 assert(syscall_filter);
945 /* First: count the syscalls to check for */
946 for (i = 0, n = 0; i < syscall_max(); i++)
947 if (syscall_filter[i >> 4] & (1 << (i & 31)))
950 /* Second: build the filter program from a header the syscall
951 * matches and the footer */
952 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
953 memcpy(f, header, sizeof(header));
955 for (i = 0, n = 0; i < syscall_max(); i++)
956 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
957 struct sock_filter item[] = {
958 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
959 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
962 assert_cc(ELEMENTSOF(item) == 2);
964 f[ELEMENTSOF(header) + 2*n] = item[0];
965 f[ELEMENTSOF(header) + 2*n+1] = item[1];
970 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
972 /* Third: install the filter */
974 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
976 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
982 int exec_spawn(ExecCommand *command,
984 const ExecContext *context,
985 int fds[], unsigned n_fds,
987 bool apply_permissions,
989 bool apply_tty_stdin,
991 CGroupBonding *cgroup_bondings,
992 CGroupAttribute *cgroup_attributes,
993 const char *cgroup_suffix,
1002 char **files_env = NULL;
1007 assert(fds || n_fds <= 0);
1009 if (context->std_input == EXEC_INPUT_SOCKET ||
1010 context->std_output == EXEC_OUTPUT_SOCKET ||
1011 context->std_error == EXEC_OUTPUT_SOCKET) {
1023 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1024 log_error("Failed to load environment files: %s", strerror(-r));
1029 argv = command->argv;
1031 line = exec_command_line(argv);
1037 log_debug("About to execute: %s", line);
1040 r = cgroup_bonding_realize_list(cgroup_bondings);
1044 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1046 if ((pid = fork()) < 0) {
1054 const char *username = NULL, *home = NULL;
1055 uid_t uid = (uid_t) -1;
1056 gid_t gid = (gid_t) -1;
1057 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1059 bool set_access = false;
1063 rename_process_from_path(command->path);
1065 /* We reset exactly these signals, since they are the
1066 * only ones we set to SIG_IGN in the main daemon. All
1067 * others we leave untouched because we set them to
1068 * SIG_DFL or a valid handler initially, both of which
1069 * will be demoted to SIG_DFL. */
1070 default_signals(SIGNALS_CRASH_HANDLER,
1071 SIGNALS_IGNORE, -1);
1073 if (context->ignore_sigpipe)
1074 ignore_signals(SIGPIPE, -1);
1076 assert_se(sigemptyset(&ss) == 0);
1077 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1079 r = EXIT_SIGNAL_MASK;
1084 if (idle_pipe[1] >= 0)
1085 close_nointr_nofail(idle_pipe[1]);
1086 if (idle_pipe[0] >= 0) {
1087 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1088 close_nointr_nofail(idle_pipe[0]);
1092 /* Close sockets very early to make sure we don't
1093 * block init reexecution because it cannot bind its
1096 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1097 socket_fd >= 0 ? 1 : n_fds);
1103 if (!context->same_pgrp)
1110 if (context->tcpwrap_name) {
1112 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1118 for (i = 0; i < (int) n_fds; i++) {
1119 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1127 exec_context_tty_reset(context);
1129 if (confirm_spawn) {
1132 err = ask_for_confirmation(&response, argv);
1133 if (err == -ETIMEDOUT)
1134 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1136 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1137 else if (response == 's') {
1138 write_confirm_message("Skipping execution.\n");
1142 } else if (response == 'n') {
1143 write_confirm_message("Failing execution.\n");
1149 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1150 * must sure to drop O_NONBLOCK */
1152 fd_nonblock(socket_fd, false);
1154 err = setup_input(context, socket_fd, apply_tty_stdin);
1160 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1166 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1172 if (cgroup_bondings) {
1173 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1180 if (context->oom_score_adjust_set) {
1183 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1186 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1188 r = EXIT_OOM_ADJUST;
1193 if (context->nice_set)
1194 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1200 if (context->cpu_sched_set) {
1201 struct sched_param param;
1204 param.sched_priority = context->cpu_sched_priority;
1206 if (sched_setscheduler(0, context->cpu_sched_policy |
1207 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1209 r = EXIT_SETSCHEDULER;
1214 if (context->cpuset)
1215 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1217 r = EXIT_CPUAFFINITY;
1221 if (context->ioprio_set)
1222 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1228 if (context->timer_slack_nsec != (nsec_t) -1)
1229 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1231 r = EXIT_TIMERSLACK;
1235 if (context->utmp_id)
1236 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1238 if (context->user) {
1239 username = context->user;
1240 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1246 if (is_terminal_input(context->std_input)) {
1247 err = chown_terminal(STDIN_FILENO, uid);
1254 if (cgroup_bondings && context->control_group_modify) {
1255 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1257 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1267 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1268 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1275 if (apply_permissions) {
1276 err = enforce_groups(context, username, gid);
1283 umask(context->umask);
1286 if (apply_permissions && context->pam_name && username) {
1287 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1294 if (context->private_network) {
1295 if (unshare(CLONE_NEWNET) < 0) {
1304 if (strv_length(context->read_write_dirs) > 0 ||
1305 strv_length(context->read_only_dirs) > 0 ||
1306 strv_length(context->inaccessible_dirs) > 0 ||
1307 context->mount_flags != 0 ||
1308 context->private_tmp) {
1309 err = setup_namespace(context->read_write_dirs,
1310 context->read_only_dirs,
1311 context->inaccessible_dirs,
1312 context->private_tmp,
1313 context->mount_flags);
1321 if (context->root_directory)
1322 if (chroot(context->root_directory) < 0) {
1328 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1337 if (asprintf(&d, "%s/%s",
1338 context->root_directory ? context->root_directory : "",
1339 context->working_directory ? context->working_directory : "") < 0) {
1355 /* We repeat the fd closing here, to make sure that
1356 * nothing is leaked from the PAM modules */
1357 err = close_all_fds(fds, n_fds);
1359 err = shift_fds(fds, n_fds);
1361 err = flags_fds(fds, n_fds, context->non_blocking);
1367 if (apply_permissions) {
1369 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1370 if (!context->rlimit[i])
1373 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1380 if (context->capability_bounding_set_drop) {
1381 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1383 r = EXIT_CAPABILITIES;
1388 if (context->user) {
1389 err = enforce_user(context, uid);
1396 /* PR_GET_SECUREBITS is not privileged, while
1397 * PR_SET_SECUREBITS is. So to suppress
1398 * potential EPERMs we'll try not to call
1399 * PR_SET_SECUREBITS unless necessary. */
1400 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1401 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1403 r = EXIT_SECUREBITS;
1407 if (context->capabilities)
1408 if (cap_set_proc(context->capabilities) < 0) {
1410 r = EXIT_CAPABILITIES;
1414 if (context->no_new_privileges)
1415 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1417 r = EXIT_NO_NEW_PRIVILEGES;
1421 if (context->syscall_filter) {
1422 err = apply_seccomp(context->syscall_filter);
1430 if (!(our_env = new0(char*, 7))) {
1437 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1438 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1445 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1452 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1453 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1459 if (is_terminal_input(context->std_input) ||
1460 context->std_output == EXEC_OUTPUT_TTY ||
1461 context->std_error == EXEC_OUTPUT_TTY)
1462 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1470 if (!(final_env = strv_env_merge(
1474 context->environment,
1483 if (!(final_argv = replace_env_argv(argv, final_env))) {
1489 final_env = strv_env_clean(final_env);
1491 execve(command->path, final_argv, final_env);
1498 log_warning("Failed at step %s spawning %s: %s",
1499 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1500 command->path, strerror(-err));
1504 strv_free(final_env);
1506 strv_free(files_env);
1507 strv_free(final_argv);
1512 strv_free(files_env);
1514 /* We add the new process to the cgroup both in the child (so
1515 * that we can be sure that no user code is ever executed
1516 * outside of the cgroup) and in the parent (so that we can be
1517 * sure that when we kill the cgroup the process will be
1519 if (cgroup_bondings)
1520 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1522 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1524 exec_status_start(&command->exec_status, pid);
1530 strv_free(files_env);
1535 void exec_context_init(ExecContext *c) {
1539 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540 c->cpu_sched_policy = SCHED_OTHER;
1541 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542 c->syslog_level_prefix = true;
1543 c->control_group_persistent = -1;
1544 c->ignore_sigpipe = true;
1545 c->timer_slack_nsec = (nsec_t) -1;
1548 void exec_context_done(ExecContext *c) {
1553 strv_free(c->environment);
1554 c->environment = NULL;
1556 strv_free(c->environment_files);
1557 c->environment_files = NULL;
1559 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1561 c->rlimit[l] = NULL;
1564 free(c->working_directory);
1565 c->working_directory = NULL;
1566 free(c->root_directory);
1567 c->root_directory = NULL;
1572 free(c->tcpwrap_name);
1573 c->tcpwrap_name = NULL;
1575 free(c->syslog_identifier);
1576 c->syslog_identifier = NULL;
1584 strv_free(c->supplementary_groups);
1585 c->supplementary_groups = NULL;
1590 if (c->capabilities) {
1591 cap_free(c->capabilities);
1592 c->capabilities = NULL;
1595 strv_free(c->read_only_dirs);
1596 c->read_only_dirs = NULL;
1598 strv_free(c->read_write_dirs);
1599 c->read_write_dirs = NULL;
1601 strv_free(c->inaccessible_dirs);
1602 c->inaccessible_dirs = NULL;
1605 CPU_FREE(c->cpuset);
1610 free(c->syscall_filter);
1611 c->syscall_filter = NULL;
1614 void exec_command_done(ExecCommand *c) {
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1627 for (i = 0; i < n; i++)
1628 exec_command_done(c+i);
1631 void exec_command_free_list(ExecCommand *c) {
1635 LIST_REMOVE(ExecCommand, command, c, i);
1636 exec_command_done(i);
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1644 for (i = 0; i < n; i++) {
1645 exec_command_free_list(c[i]);
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651 char **i, **r = NULL;
1656 STRV_FOREACH(i, c->environment_files) {
1659 bool ignore = false;
1669 if (!path_is_absolute(fn)) {
1678 if ((k = load_env_file(fn, &p)) < 0) {
1692 m = strv_env_merge(2, r, p);
1708 static void strv_fprintf(FILE *f, char **l) {
1714 fprintf(f, " %s", *g);
1717 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1729 "%sWorkingDirectory: %s\n"
1730 "%sRootDirectory: %s\n"
1731 "%sNonBlocking: %s\n"
1732 "%sPrivateTmp: %s\n"
1733 "%sControlGroupModify: %s\n"
1734 "%sControlGroupPersistent: %s\n"
1735 "%sPrivateNetwork: %s\n"
1736 "%sIgnoreSIGPIPE: %s\n",
1738 prefix, c->working_directory ? c->working_directory : "/",
1739 prefix, c->root_directory ? c->root_directory : "/",
1740 prefix, yes_no(c->non_blocking),
1741 prefix, yes_no(c->private_tmp),
1742 prefix, yes_no(c->control_group_modify),
1743 prefix, yes_no(c->control_group_persistent),
1744 prefix, yes_no(c->private_network),
1745 prefix, yes_no(c->ignore_sigpipe));
1747 STRV_FOREACH(e, c->environment)
1748 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1750 STRV_FOREACH(e, c->environment_files)
1751 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1753 if (c->tcpwrap_name)
1755 "%sTCPWrapName: %s\n",
1756 prefix, c->tcpwrap_name);
1763 if (c->oom_score_adjust_set)
1765 "%sOOMScoreAdjust: %i\n",
1766 prefix, c->oom_score_adjust);
1768 for (i = 0; i < RLIM_NLIMITS; i++)
1770 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1774 "%sIOSchedulingClass: %s\n"
1775 "%sIOPriority: %i\n",
1776 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1777 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1779 if (c->cpu_sched_set)
1781 "%sCPUSchedulingPolicy: %s\n"
1782 "%sCPUSchedulingPriority: %i\n"
1783 "%sCPUSchedulingResetOnFork: %s\n",
1784 prefix, sched_policy_to_string(c->cpu_sched_policy),
1785 prefix, c->cpu_sched_priority,
1786 prefix, yes_no(c->cpu_sched_reset_on_fork));
1789 fprintf(f, "%sCPUAffinity:", prefix);
1790 for (i = 0; i < c->cpuset_ncpus; i++)
1791 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1792 fprintf(f, " %i", i);
1796 if (c->timer_slack_nsec != (nsec_t) -1)
1797 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1800 "%sStandardInput: %s\n"
1801 "%sStandardOutput: %s\n"
1802 "%sStandardError: %s\n",
1803 prefix, exec_input_to_string(c->std_input),
1804 prefix, exec_output_to_string(c->std_output),
1805 prefix, exec_output_to_string(c->std_error));
1811 "%sTTYVHangup: %s\n"
1812 "%sTTYVTDisallocate: %s\n",
1813 prefix, c->tty_path,
1814 prefix, yes_no(c->tty_reset),
1815 prefix, yes_no(c->tty_vhangup),
1816 prefix, yes_no(c->tty_vt_disallocate));
1818 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1819 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1820 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1821 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1823 "%sSyslogFacility: %s\n"
1824 "%sSyslogLevel: %s\n",
1825 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1826 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1828 if (c->capabilities) {
1830 if ((t = cap_to_text(c->capabilities, NULL))) {
1831 fprintf(f, "%sCapabilities: %s\n",
1838 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1840 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1841 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1842 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1843 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1844 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1845 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1847 if (c->capability_bounding_set_drop) {
1849 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1851 for (l = 0; l <= cap_last_cap(); l++)
1852 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1855 if ((t = cap_to_name(l))) {
1856 fprintf(f, " %s", t);
1865 fprintf(f, "%sUser: %s\n", prefix, c->user);
1867 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1869 if (strv_length(c->supplementary_groups) > 0) {
1870 fprintf(f, "%sSupplementaryGroups:", prefix);
1871 strv_fprintf(f, c->supplementary_groups);
1876 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1878 if (strv_length(c->read_write_dirs) > 0) {
1879 fprintf(f, "%sReadWriteDirs:", prefix);
1880 strv_fprintf(f, c->read_write_dirs);
1884 if (strv_length(c->read_only_dirs) > 0) {
1885 fprintf(f, "%sReadOnlyDirs:", prefix);
1886 strv_fprintf(f, c->read_only_dirs);
1890 if (strv_length(c->inaccessible_dirs) > 0) {
1891 fprintf(f, "%sInaccessibleDirs:", prefix);
1892 strv_fprintf(f, c->inaccessible_dirs);
1898 "%sUtmpIdentifier: %s\n",
1899 prefix, c->utmp_id);
1902 void exec_status_start(ExecStatus *s, pid_t pid) {
1907 dual_timestamp_get(&s->start_timestamp);
1910 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1913 if (s->pid && s->pid != pid)
1917 dual_timestamp_get(&s->exit_timestamp);
1923 if (context->utmp_id)
1924 utmp_put_dead_process(context->utmp_id, pid, code, status);
1926 exec_context_tty_reset(context);
1930 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1931 char buf[FORMAT_TIMESTAMP_MAX];
1944 prefix, (unsigned long) s->pid);
1946 if (s->start_timestamp.realtime > 0)
1948 "%sStart Timestamp: %s\n",
1949 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1951 if (s->exit_timestamp.realtime > 0)
1953 "%sExit Timestamp: %s\n"
1955 "%sExit Status: %i\n",
1956 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1957 prefix, sigchld_code_to_string(s->code),
1961 char *exec_command_line(char **argv) {
1969 STRV_FOREACH(a, argv)
1972 if (!(n = new(char, k)))
1976 STRV_FOREACH(a, argv) {
1983 if (strpbrk(*a, WHITESPACE)) {
1994 /* FIXME: this doesn't really handle arguments that have
1995 * spaces and ticks in them */
2000 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2002 const char *prefix2;
2011 p2 = strappend(prefix, "\t");
2012 prefix2 = p2 ? p2 : prefix;
2014 cmd = exec_command_line(c->argv);
2017 "%sCommand Line: %s\n",
2018 prefix, cmd ? cmd : strerror(ENOMEM));
2022 exec_status_dump(&c->exec_status, f, prefix2);
2027 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2033 LIST_FOREACH(command, c, c)
2034 exec_command_dump(c, f, prefix);
2037 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2044 /* It's kind of important, that we keep the order here */
2045 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2046 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2051 int exec_command_set(ExecCommand *c, const char *path, ...) {
2059 l = strv_new_ap(path, ap);
2065 if (!(p = strdup(path))) {
2079 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2080 [EXEC_INPUT_NULL] = "null",
2081 [EXEC_INPUT_TTY] = "tty",
2082 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2083 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2084 [EXEC_INPUT_SOCKET] = "socket"
2087 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2089 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2090 [EXEC_OUTPUT_INHERIT] = "inherit",
2091 [EXEC_OUTPUT_NULL] = "null",
2092 [EXEC_OUTPUT_TTY] = "tty",
2093 [EXEC_OUTPUT_SYSLOG] = "syslog",
2094 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2095 [EXEC_OUTPUT_KMSG] = "kmsg",
2096 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2097 [EXEC_OUTPUT_JOURNAL] = "journal",
2098 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2099 [EXEC_OUTPUT_SOCKET] = "socket"
2102 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);