1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
44 #include <security/pam_appl.h>
50 #include "capability.h"
53 #include "sd-messages.h"
55 #include "securebits.h"
57 #include "namespace.h"
59 #include "exit-status.h"
61 #include "utmp-wtmp.h"
63 #include "loopback-setup.h"
64 #include "path-util.h"
65 #include "syscall-list.h"
67 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
69 /* This assumes there is a 'tty' group */
72 static int shift_fds(int fds[], unsigned n_fds) {
73 int start, restart_from;
78 /* Modifies the fds array! (sorts it) */
88 for (i = start; i < (int) n_fds; i++) {
91 /* Already at right index? */
95 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
98 close_nointr_nofail(fds[i]);
101 /* Hmm, the fd we wanted isn't free? Then
102 * let's remember that and try again from here*/
103 if (nfd != i+3 && restart_from < 0)
107 if (restart_from < 0)
110 start = restart_from;
116 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
125 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
127 for (i = 0; i < n_fds; i++) {
129 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
132 /* We unconditionally drop FD_CLOEXEC from the fds,
133 * since after all we want to pass these fds to our
136 if ((r = fd_cloexec(fds[i], false)) < 0)
143 static const char *tty_path(const ExecContext *context) {
146 if (context->tty_path)
147 return context->tty_path;
149 return "/dev/console";
152 void exec_context_tty_reset(const ExecContext *context) {
155 if (context->tty_vhangup)
156 terminal_vhangup(tty_path(context));
158 if (context->tty_reset)
159 reset_terminal(tty_path(context));
161 if (context->tty_vt_disallocate && context->tty_path)
162 vt_disallocate(context->tty_path);
165 static int open_null_as(int flags, int nfd) {
170 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
174 r = dup2(fd, nfd) < 0 ? -errno : nfd;
175 close_nointr_nofail(fd);
182 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
184 union sockaddr_union sa;
187 assert(output < _EXEC_OUTPUT_MAX);
191 fd = socket(AF_UNIX, SOCK_STREAM, 0);
196 sa.un.sun_family = AF_UNIX;
197 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
199 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
201 close_nointr_nofail(fd);
205 if (shutdown(fd, SHUT_RD) < 0) {
206 close_nointr_nofail(fd);
218 context->syslog_identifier ? context->syslog_identifier : ident,
220 context->syslog_priority,
221 !!context->syslog_level_prefix,
222 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
223 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
224 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
227 r = dup2(fd, nfd) < 0 ? -errno : nfd;
228 close_nointr_nofail(fd);
234 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
240 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
244 r = dup2(fd, nfd) < 0 ? -errno : nfd;
245 close_nointr_nofail(fd);
252 static bool is_terminal_input(ExecInput i) {
254 i == EXEC_INPUT_TTY ||
255 i == EXEC_INPUT_TTY_FORCE ||
256 i == EXEC_INPUT_TTY_FAIL;
259 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
261 if (is_terminal_input(std_input) && !apply_tty_stdin)
262 return EXEC_INPUT_NULL;
264 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
265 return EXEC_INPUT_NULL;
270 static int fixup_output(ExecOutput std_output, int socket_fd) {
272 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
273 return EXEC_OUTPUT_INHERIT;
278 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
283 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
287 case EXEC_INPUT_NULL:
288 return open_null_as(O_RDONLY, STDIN_FILENO);
291 case EXEC_INPUT_TTY_FORCE:
292 case EXEC_INPUT_TTY_FAIL: {
295 if ((fd = acquire_terminal(
297 i == EXEC_INPUT_TTY_FAIL,
298 i == EXEC_INPUT_TTY_FORCE,
303 if (fd != STDIN_FILENO) {
304 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
305 close_nointr_nofail(fd);
312 case EXEC_INPUT_SOCKET:
313 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
316 assert_not_reached("Unknown input type");
320 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
327 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
328 o = fixup_output(context->std_output, socket_fd);
330 /* This expects the input is already set up */
334 case EXEC_OUTPUT_INHERIT:
336 /* If input got downgraded, inherit the original value */
337 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
338 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
340 /* If the input is connected to anything that's not a /dev/null, inherit that... */
341 if (i != EXEC_INPUT_NULL)
342 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
344 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
346 return STDOUT_FILENO;
348 /* We need to open /dev/null here anew, to get the
349 * right access mode. So we fall through */
351 case EXEC_OUTPUT_NULL:
352 return open_null_as(O_WRONLY, STDOUT_FILENO);
354 case EXEC_OUTPUT_TTY:
355 if (is_terminal_input(i))
356 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
358 /* We don't reset the terminal if this is just about output */
359 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
361 case EXEC_OUTPUT_SYSLOG:
362 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
363 case EXEC_OUTPUT_KMSG:
364 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
365 case EXEC_OUTPUT_JOURNAL:
366 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
367 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
369 case EXEC_OUTPUT_SOCKET:
370 assert(socket_fd >= 0);
371 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
374 assert_not_reached("Unknown output type");
378 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
385 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
386 o = fixup_output(context->std_output, socket_fd);
387 e = fixup_output(context->std_error, socket_fd);
389 /* This expects the input and output are already set up */
391 /* Don't change the stderr file descriptor if we inherit all
392 * the way and are not on a tty */
393 if (e == EXEC_OUTPUT_INHERIT &&
394 o == EXEC_OUTPUT_INHERIT &&
395 i == EXEC_INPUT_NULL &&
396 !is_terminal_input(context->std_input) &&
398 return STDERR_FILENO;
400 /* Duplicate from stdout if possible */
401 if (e == o || e == EXEC_OUTPUT_INHERIT)
402 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
406 case EXEC_OUTPUT_NULL:
407 return open_null_as(O_WRONLY, STDERR_FILENO);
409 case EXEC_OUTPUT_TTY:
410 if (is_terminal_input(i))
411 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
413 /* We don't reset the terminal if this is just about output */
414 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
416 case EXEC_OUTPUT_SYSLOG:
417 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
418 case EXEC_OUTPUT_KMSG:
419 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
420 case EXEC_OUTPUT_JOURNAL:
421 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
422 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
424 case EXEC_OUTPUT_SOCKET:
425 assert(socket_fd >= 0);
426 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
429 assert_not_reached("Unknown error type");
433 static int chown_terminal(int fd, uid_t uid) {
438 /* This might fail. What matters are the results. */
439 (void) fchown(fd, uid, -1);
440 (void) fchmod(fd, TTY_MODE);
442 if (fstat(fd, &st) < 0)
445 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
451 static int setup_confirm_stdio(int *_saved_stdin,
452 int *_saved_stdout) {
453 int fd = -1, saved_stdin, saved_stdout = -1, r;
455 assert(_saved_stdin);
456 assert(_saved_stdout);
458 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
462 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
463 if (saved_stdout < 0) {
468 fd = acquire_terminal(
473 DEFAULT_CONFIRM_USEC);
479 r = chown_terminal(fd, getuid());
483 if (dup2(fd, STDIN_FILENO) < 0) {
488 if (dup2(fd, STDOUT_FILENO) < 0) {
494 close_nointr_nofail(fd);
496 *_saved_stdin = saved_stdin;
497 *_saved_stdout = saved_stdout;
502 if (saved_stdout >= 0)
503 close_nointr_nofail(saved_stdout);
505 if (saved_stdin >= 0)
506 close_nointr_nofail(saved_stdin);
509 close_nointr_nofail(fd);
514 static int write_confirm_message(const char *format, ...) {
520 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
524 va_start(ap, format);
525 vdprintf(fd, format, ap);
528 close_nointr_nofail(fd);
533 static int restore_confirm_stdio(int *saved_stdin,
539 assert(saved_stdout);
543 if (*saved_stdin >= 0)
544 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
547 if (*saved_stdout >= 0)
548 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
551 if (*saved_stdin >= 0)
552 close_nointr_nofail(*saved_stdin);
554 if (*saved_stdout >= 0)
555 close_nointr_nofail(*saved_stdout);
560 static int ask_for_confirmation(char *response, char **argv) {
561 int saved_stdout = -1, saved_stdin = -1, r;
564 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
568 line = exec_command_line(argv);
572 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
575 restore_confirm_stdio(&saved_stdin, &saved_stdout);
580 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
581 bool keep_groups = false;
586 /* Lookup and set GID and supplementary group list. Here too
587 * we avoid NSS lookups for gid=0. */
589 if (context->group || username) {
591 if (context->group) {
592 const char *g = context->group;
594 if ((r = get_group_creds(&g, &gid)) < 0)
598 /* First step, initialize groups from /etc/groups */
599 if (username && gid != 0) {
600 if (initgroups(username, gid) < 0)
606 /* Second step, set our gids */
607 if (setresgid(gid, gid, gid) < 0)
611 if (context->supplementary_groups) {
616 /* Final step, initialize any manually set supplementary groups */
617 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
619 if (!(gids = new(gid_t, ngroups_max)))
623 if ((k = getgroups(ngroups_max, gids)) < 0) {
630 STRV_FOREACH(i, context->supplementary_groups) {
633 if (k >= ngroups_max) {
639 r = get_group_creds(&g, gids+k);
648 if (setgroups(k, gids) < 0) {
659 static int enforce_user(const ExecContext *context, uid_t uid) {
663 /* Sets (but doesn't lookup) the uid and make sure we keep the
664 * capabilities while doing so. */
666 if (context->capabilities) {
668 static const cap_value_t bits[] = {
669 CAP_SETUID, /* Necessary so that we can run setresuid() below */
670 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
673 /* First step: If we need to keep capabilities but
674 * drop privileges we need to make sure we keep our
675 * caps, whiel we drop privileges. */
677 int sb = context->secure_bits|SECURE_KEEP_CAPS;
679 if (prctl(PR_GET_SECUREBITS) != sb)
680 if (prctl(PR_SET_SECUREBITS, sb) < 0)
684 /* Second step: set the capabilities. This will reduce
685 * the capabilities to the minimum we need. */
687 if (!(d = cap_dup(context->capabilities)))
690 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
691 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
697 if (cap_set_proc(d) < 0) {
706 /* Third step: actually set the uids */
707 if (setresuid(uid, uid, uid) < 0)
710 /* At this point we should have all necessary capabilities but
711 are otherwise a normal user. However, the caps might got
712 corrupted due to the setresuid() so we need clean them up
713 later. This is done outside of this call. */
720 static int null_conv(
722 const struct pam_message **msg,
723 struct pam_response **resp,
726 /* We don't support conversations */
731 static int setup_pam(
737 int fds[], unsigned n_fds) {
739 static const struct pam_conv conv = {
744 pam_handle_t *handle = NULL;
746 int pam_code = PAM_SUCCESS;
749 bool close_session = false;
750 pid_t pam_pid = 0, parent_pid;
756 /* We set up PAM in the parent process, then fork. The child
757 * will then stay around until killed via PR_GET_PDEATHSIG or
758 * systemd via the cgroup logic. It will then remove the PAM
759 * session again. The parent process will exec() the actual
760 * daemon. We do things this way to ensure that the main PID
761 * of the daemon is the one we initially fork()ed. */
763 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
769 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
772 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
775 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
778 close_session = true;
780 if ((!(e = pam_getenvlist(handle)))) {
781 pam_code = PAM_BUF_ERR;
785 /* Block SIGTERM, so that we know that it won't get lost in
787 if (sigemptyset(&ss) < 0 ||
788 sigaddset(&ss, SIGTERM) < 0 ||
789 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
792 parent_pid = getpid();
794 if ((pam_pid = fork()) < 0)
801 /* The child's job is to reset the PAM session on
804 /* This string must fit in 10 chars (i.e. the length
805 * of "/sbin/init"), to look pretty in /bin/ps */
806 rename_process("(sd-pam)");
808 /* Make sure we don't keep open the passed fds in this
809 child. We assume that otherwise only those fds are
810 open here that have been opened by PAM. */
811 close_many(fds, n_fds);
813 /* Drop privileges - we don't need any to pam_close_session
814 * and this will make PR_SET_PDEATHSIG work in most cases.
815 * If this fails, ignore the error - but expect sd-pam threads
816 * to fail to exit normally */
817 if (setresuid(uid, uid, uid) < 0)
818 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
820 /* Wait until our parent died. This will only work if
821 * the above setresuid() succeeds, otherwise the kernel
822 * will not allow unprivileged parents kill their privileged
823 * children this way. We rely on the control groups kill logic
824 * to do the rest for us. */
825 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
828 /* Check if our parent process might already have
830 if (getppid() == parent_pid) {
832 if (sigwait(&ss, &sig) < 0) {
839 assert(sig == SIGTERM);
844 /* If our parent died we'll end the session */
845 if (getppid() != parent_pid)
846 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
852 pam_end(handle, pam_code | PAM_DATA_SILENT);
856 /* If the child was forked off successfully it will do all the
857 * cleanups, so forget about the handle here. */
860 /* Unblock SIGTERM again in the parent */
861 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
864 /* We close the log explicitly here, since the PAM modules
865 * might have opened it, but we don't want this fd around. */
874 if (pam_code != PAM_SUCCESS)
875 err = -EPERM; /* PAM errors do not map to errno */
881 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
883 pam_end(handle, pam_code | PAM_DATA_SILENT);
891 kill(pam_pid, SIGTERM);
892 kill(pam_pid, SIGCONT);
899 static void rename_process_from_path(const char *path) {
900 char process_name[11];
904 /* This resulting string must fit in 10 chars (i.e. the length
905 * of "/sbin/init") to look pretty in /bin/ps */
907 p = path_get_file_name(path);
909 rename_process("(...)");
915 /* The end of the process name is usually more
916 * interesting, since the first bit might just be
922 process_name[0] = '(';
923 memcpy(process_name+1, p, l);
924 process_name[1+l] = ')';
925 process_name[1+l+1] = 0;
927 rename_process(process_name);
930 static int apply_seccomp(uint32_t *syscall_filter) {
931 static const struct sock_filter header[] = {
932 VALIDATE_ARCHITECTURE,
935 static const struct sock_filter footer[] = {
941 struct sock_filter *f;
942 struct sock_fprog prog;
944 assert(syscall_filter);
946 /* First: count the syscalls to check for */
947 for (i = 0, n = 0; i < syscall_max(); i++)
948 if (syscall_filter[i >> 4] & (1 << (i & 31)))
951 /* Second: build the filter program from a header the syscall
952 * matches and the footer */
953 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
954 memcpy(f, header, sizeof(header));
956 for (i = 0, n = 0; i < syscall_max(); i++)
957 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
958 struct sock_filter item[] = {
959 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
960 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
963 assert_cc(ELEMENTSOF(item) == 2);
965 f[ELEMENTSOF(header) + 2*n] = item[0];
966 f[ELEMENTSOF(header) + 2*n+1] = item[1];
971 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
973 /* Third: install the filter */
975 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
977 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
983 int exec_spawn(ExecCommand *command,
985 const ExecContext *context,
986 int fds[], unsigned n_fds,
988 bool apply_permissions,
990 bool apply_tty_stdin,
992 CGroupBonding *cgroup_bondings,
993 CGroupAttribute *cgroup_attributes,
994 const char *cgroup_suffix,
1003 char _cleanup_strv_free_ **files_env = NULL;
1008 assert(fds || n_fds <= 0);
1010 if (context->std_input == EXEC_INPUT_SOCKET ||
1011 context->std_output == EXEC_OUTPUT_SOCKET ||
1012 context->std_error == EXEC_OUTPUT_SOCKET) {
1024 r = exec_context_load_environment(context, &files_env);
1028 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1035 argv = command->argv;
1037 line = exec_command_line(argv);
1041 log_struct(LOG_DEBUG,
1043 "MESSAGE=About to execute %s", line,
1047 r = cgroup_bonding_realize_list(cgroup_bondings);
1051 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1060 const char *username = NULL, *home = NULL;
1061 uid_t uid = (uid_t) -1;
1062 gid_t gid = (gid_t) -1;
1063 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1064 **final_env = NULL, **final_argv = NULL;
1066 bool set_access = false;
1070 rename_process_from_path(command->path);
1072 /* We reset exactly these signals, since they are the
1073 * only ones we set to SIG_IGN in the main daemon. All
1074 * others we leave untouched because we set them to
1075 * SIG_DFL or a valid handler initially, both of which
1076 * will be demoted to SIG_DFL. */
1077 default_signals(SIGNALS_CRASH_HANDLER,
1078 SIGNALS_IGNORE, -1);
1080 if (context->ignore_sigpipe)
1081 ignore_signals(SIGPIPE, -1);
1083 assert_se(sigemptyset(&ss) == 0);
1084 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1086 r = EXIT_SIGNAL_MASK;
1091 if (idle_pipe[1] >= 0)
1092 close_nointr_nofail(idle_pipe[1]);
1093 if (idle_pipe[0] >= 0) {
1094 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1095 close_nointr_nofail(idle_pipe[0]);
1099 /* Close sockets very early to make sure we don't
1100 * block init reexecution because it cannot bind its
1103 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1104 socket_fd >= 0 ? 1 : n_fds);
1110 if (!context->same_pgrp)
1117 if (context->tcpwrap_name) {
1119 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1125 for (i = 0; i < (int) n_fds; i++) {
1126 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1134 exec_context_tty_reset(context);
1136 if (confirm_spawn) {
1139 err = ask_for_confirmation(&response, argv);
1140 if (err == -ETIMEDOUT)
1141 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1143 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1144 else if (response == 's') {
1145 write_confirm_message("Skipping execution.\n");
1149 } else if (response == 'n') {
1150 write_confirm_message("Failing execution.\n");
1156 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1157 * must sure to drop O_NONBLOCK */
1159 fd_nonblock(socket_fd, false);
1161 err = setup_input(context, socket_fd, apply_tty_stdin);
1167 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1173 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1179 if (cgroup_bondings) {
1180 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1187 if (context->oom_score_adjust_set) {
1190 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1193 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1195 r = EXIT_OOM_ADJUST;
1200 if (context->nice_set)
1201 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1207 if (context->cpu_sched_set) {
1208 struct sched_param param;
1211 param.sched_priority = context->cpu_sched_priority;
1213 if (sched_setscheduler(0, context->cpu_sched_policy |
1214 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1216 r = EXIT_SETSCHEDULER;
1221 if (context->cpuset)
1222 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1224 r = EXIT_CPUAFFINITY;
1228 if (context->ioprio_set)
1229 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1235 if (context->timer_slack_nsec != (nsec_t) -1)
1236 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1238 r = EXIT_TIMERSLACK;
1242 if (context->utmp_id)
1243 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1245 if (context->user) {
1246 username = context->user;
1247 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1253 if (is_terminal_input(context->std_input)) {
1254 err = chown_terminal(STDIN_FILENO, uid);
1261 if (cgroup_bondings && context->control_group_modify) {
1262 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1264 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1274 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1275 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1282 if (apply_permissions) {
1283 err = enforce_groups(context, username, gid);
1290 umask(context->umask);
1293 if (apply_permissions && context->pam_name && username) {
1294 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1301 if (context->private_network) {
1302 if (unshare(CLONE_NEWNET) < 0) {
1311 if (strv_length(context->read_write_dirs) > 0 ||
1312 strv_length(context->read_only_dirs) > 0 ||
1313 strv_length(context->inaccessible_dirs) > 0 ||
1314 context->mount_flags != 0 ||
1315 context->private_tmp) {
1316 err = setup_namespace(context->read_write_dirs,
1317 context->read_only_dirs,
1318 context->inaccessible_dirs,
1319 context->private_tmp,
1320 context->mount_flags);
1328 if (context->root_directory)
1329 if (chroot(context->root_directory) < 0) {
1335 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1341 char _cleanup_free_ *d = NULL;
1343 if (asprintf(&d, "%s/%s",
1344 context->root_directory ? context->root_directory : "",
1345 context->working_directory ? context->working_directory : "") < 0) {
1358 /* We repeat the fd closing here, to make sure that
1359 * nothing is leaked from the PAM modules */
1360 err = close_all_fds(fds, n_fds);
1362 err = shift_fds(fds, n_fds);
1364 err = flags_fds(fds, n_fds, context->non_blocking);
1370 if (apply_permissions) {
1372 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1373 if (!context->rlimit[i])
1376 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1383 if (context->capability_bounding_set_drop) {
1384 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1386 r = EXIT_CAPABILITIES;
1391 if (context->user) {
1392 err = enforce_user(context, uid);
1399 /* PR_GET_SECUREBITS is not privileged, while
1400 * PR_SET_SECUREBITS is. So to suppress
1401 * potential EPERMs we'll try not to call
1402 * PR_SET_SECUREBITS unless necessary. */
1403 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1404 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1406 r = EXIT_SECUREBITS;
1410 if (context->capabilities)
1411 if (cap_set_proc(context->capabilities) < 0) {
1413 r = EXIT_CAPABILITIES;
1417 if (context->no_new_privileges)
1418 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1420 r = EXIT_NO_NEW_PRIVILEGES;
1424 if (context->syscall_filter) {
1425 err = apply_seccomp(context->syscall_filter);
1433 if (!(our_env = new0(char*, 7))) {
1440 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1441 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1448 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1455 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1456 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1462 if (is_terminal_input(context->std_input) ||
1463 context->std_output == EXEC_OUTPUT_TTY ||
1464 context->std_error == EXEC_OUTPUT_TTY)
1465 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1473 if (!(final_env = strv_env_merge(
1477 context->environment,
1486 if (!(final_argv = replace_env_argv(argv, final_env))) {
1492 final_env = strv_env_clean(final_env);
1494 execve(command->path, final_argv, final_env);
1501 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1502 "EXECUTABLE=%s", command->path,
1503 "MESSAGE=Failed at step %s spawning %s: %s",
1504 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1505 command->path, strerror(-err),
1514 log_struct(LOG_DEBUG,
1516 "MESSAGE=Forked %s as %lu",
1517 command->path, (unsigned long) pid,
1520 /* We add the new process to the cgroup both in the child (so
1521 * that we can be sure that no user code is ever executed
1522 * outside of the cgroup) and in the parent (so that we can be
1523 * sure that when we kill the cgroup the process will be
1525 if (cgroup_bondings)
1526 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1528 exec_status_start(&command->exec_status, pid);
1534 void exec_context_init(ExecContext *c) {
1538 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1539 c->cpu_sched_policy = SCHED_OTHER;
1540 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1541 c->syslog_level_prefix = true;
1542 c->control_group_persistent = -1;
1543 c->ignore_sigpipe = true;
1544 c->timer_slack_nsec = (nsec_t) -1;
1547 void exec_context_done(ExecContext *c) {
1552 strv_free(c->environment);
1553 c->environment = NULL;
1555 strv_free(c->environment_files);
1556 c->environment_files = NULL;
1558 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1560 c->rlimit[l] = NULL;
1563 free(c->working_directory);
1564 c->working_directory = NULL;
1565 free(c->root_directory);
1566 c->root_directory = NULL;
1571 free(c->tcpwrap_name);
1572 c->tcpwrap_name = NULL;
1574 free(c->syslog_identifier);
1575 c->syslog_identifier = NULL;
1583 strv_free(c->supplementary_groups);
1584 c->supplementary_groups = NULL;
1589 if (c->capabilities) {
1590 cap_free(c->capabilities);
1591 c->capabilities = NULL;
1594 strv_free(c->read_only_dirs);
1595 c->read_only_dirs = NULL;
1597 strv_free(c->read_write_dirs);
1598 c->read_write_dirs = NULL;
1600 strv_free(c->inaccessible_dirs);
1601 c->inaccessible_dirs = NULL;
1604 CPU_FREE(c->cpuset);
1609 free(c->syscall_filter);
1610 c->syscall_filter = NULL;
1613 void exec_command_done(ExecCommand *c) {
1623 void exec_command_done_array(ExecCommand *c, unsigned n) {
1626 for (i = 0; i < n; i++)
1627 exec_command_done(c+i);
1630 void exec_command_free_list(ExecCommand *c) {
1634 LIST_REMOVE(ExecCommand, command, c, i);
1635 exec_command_done(i);
1640 void exec_command_free_array(ExecCommand **c, unsigned n) {
1643 for (i = 0; i < n; i++) {
1644 exec_command_free_list(c[i]);
1649 int exec_context_load_environment(const ExecContext *c, char ***l) {
1650 char **i, **r = NULL;
1655 STRV_FOREACH(i, c->environment_files) {
1658 bool ignore = false;
1668 if (!path_is_absolute(fn)) {
1677 if ((k = load_env_file(fn, &p)) < 0) {
1691 m = strv_env_merge(2, r, p);
1707 static void strv_fprintf(FILE *f, char **l) {
1713 fprintf(f, " %s", *g);
1716 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1728 "%sWorkingDirectory: %s\n"
1729 "%sRootDirectory: %s\n"
1730 "%sNonBlocking: %s\n"
1731 "%sPrivateTmp: %s\n"
1732 "%sControlGroupModify: %s\n"
1733 "%sControlGroupPersistent: %s\n"
1734 "%sPrivateNetwork: %s\n"
1735 "%sIgnoreSIGPIPE: %s\n",
1737 prefix, c->working_directory ? c->working_directory : "/",
1738 prefix, c->root_directory ? c->root_directory : "/",
1739 prefix, yes_no(c->non_blocking),
1740 prefix, yes_no(c->private_tmp),
1741 prefix, yes_no(c->control_group_modify),
1742 prefix, yes_no(c->control_group_persistent),
1743 prefix, yes_no(c->private_network),
1744 prefix, yes_no(c->ignore_sigpipe));
1746 STRV_FOREACH(e, c->environment)
1747 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1749 STRV_FOREACH(e, c->environment_files)
1750 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1752 if (c->tcpwrap_name)
1754 "%sTCPWrapName: %s\n",
1755 prefix, c->tcpwrap_name);
1762 if (c->oom_score_adjust_set)
1764 "%sOOMScoreAdjust: %i\n",
1765 prefix, c->oom_score_adjust);
1767 for (i = 0; i < RLIM_NLIMITS; i++)
1769 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1771 if (c->ioprio_set) {
1775 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1779 "%sIOSchedulingClass: %s\n"
1780 "%sIOPriority: %i\n",
1781 prefix, strna(class_str),
1782 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1786 if (c->cpu_sched_set) {
1790 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1794 "%sCPUSchedulingPolicy: %s\n"
1795 "%sCPUSchedulingPriority: %i\n"
1796 "%sCPUSchedulingResetOnFork: %s\n",
1797 prefix, strna(policy_str),
1798 prefix, c->cpu_sched_priority,
1799 prefix, yes_no(c->cpu_sched_reset_on_fork));
1804 fprintf(f, "%sCPUAffinity:", prefix);
1805 for (i = 0; i < c->cpuset_ncpus; i++)
1806 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1807 fprintf(f, " %i", i);
1811 if (c->timer_slack_nsec != (nsec_t) -1)
1812 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1815 "%sStandardInput: %s\n"
1816 "%sStandardOutput: %s\n"
1817 "%sStandardError: %s\n",
1818 prefix, exec_input_to_string(c->std_input),
1819 prefix, exec_output_to_string(c->std_output),
1820 prefix, exec_output_to_string(c->std_error));
1826 "%sTTYVHangup: %s\n"
1827 "%sTTYVTDisallocate: %s\n",
1828 prefix, c->tty_path,
1829 prefix, yes_no(c->tty_reset),
1830 prefix, yes_no(c->tty_vhangup),
1831 prefix, yes_no(c->tty_vt_disallocate));
1833 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1834 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1835 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1836 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1837 char *fac_str, *lvl_str;
1840 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1844 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1849 "%sSyslogFacility: %s\n"
1850 "%sSyslogLevel: %s\n",
1851 prefix, strna(fac_str),
1852 prefix, strna(lvl_str));
1857 if (c->capabilities) {
1859 if ((t = cap_to_text(c->capabilities, NULL))) {
1860 fprintf(f, "%sCapabilities: %s\n",
1867 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1869 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1870 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1871 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1872 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1873 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1874 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1876 if (c->capability_bounding_set_drop) {
1878 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1880 for (l = 0; l <= cap_last_cap(); l++)
1881 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1884 if ((t = cap_to_name(l))) {
1885 fprintf(f, " %s", t);
1894 fprintf(f, "%sUser: %s\n", prefix, c->user);
1896 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1898 if (strv_length(c->supplementary_groups) > 0) {
1899 fprintf(f, "%sSupplementaryGroups:", prefix);
1900 strv_fprintf(f, c->supplementary_groups);
1905 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1907 if (strv_length(c->read_write_dirs) > 0) {
1908 fprintf(f, "%sReadWriteDirs:", prefix);
1909 strv_fprintf(f, c->read_write_dirs);
1913 if (strv_length(c->read_only_dirs) > 0) {
1914 fprintf(f, "%sReadOnlyDirs:", prefix);
1915 strv_fprintf(f, c->read_only_dirs);
1919 if (strv_length(c->inaccessible_dirs) > 0) {
1920 fprintf(f, "%sInaccessibleDirs:", prefix);
1921 strv_fprintf(f, c->inaccessible_dirs);
1927 "%sUtmpIdentifier: %s\n",
1928 prefix, c->utmp_id);
1931 void exec_status_start(ExecStatus *s, pid_t pid) {
1936 dual_timestamp_get(&s->start_timestamp);
1939 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1942 if (s->pid && s->pid != pid)
1946 dual_timestamp_get(&s->exit_timestamp);
1952 if (context->utmp_id)
1953 utmp_put_dead_process(context->utmp_id, pid, code, status);
1955 exec_context_tty_reset(context);
1959 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1960 char buf[FORMAT_TIMESTAMP_MAX];
1973 prefix, (unsigned long) s->pid);
1975 if (s->start_timestamp.realtime > 0)
1977 "%sStart Timestamp: %s\n",
1978 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1980 if (s->exit_timestamp.realtime > 0)
1982 "%sExit Timestamp: %s\n"
1984 "%sExit Status: %i\n",
1985 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1986 prefix, sigchld_code_to_string(s->code),
1990 char *exec_command_line(char **argv) {
1998 STRV_FOREACH(a, argv)
2001 if (!(n = new(char, k)))
2005 STRV_FOREACH(a, argv) {
2012 if (strpbrk(*a, WHITESPACE)) {
2023 /* FIXME: this doesn't really handle arguments that have
2024 * spaces and ticks in them */
2029 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2031 const char *prefix2;
2040 p2 = strappend(prefix, "\t");
2041 prefix2 = p2 ? p2 : prefix;
2043 cmd = exec_command_line(c->argv);
2046 "%sCommand Line: %s\n",
2047 prefix, cmd ? cmd : strerror(ENOMEM));
2051 exec_status_dump(&c->exec_status, f, prefix2);
2056 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2062 LIST_FOREACH(command, c, c)
2063 exec_command_dump(c, f, prefix);
2066 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2073 /* It's kind of important, that we keep the order here */
2074 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2075 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2080 int exec_command_set(ExecCommand *c, const char *path, ...) {
2088 l = strv_new_ap(path, ap);
2094 if (!(p = strdup(path))) {
2108 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2109 [EXEC_INPUT_NULL] = "null",
2110 [EXEC_INPUT_TTY] = "tty",
2111 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2112 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2113 [EXEC_INPUT_SOCKET] = "socket"
2116 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2118 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2119 [EXEC_OUTPUT_INHERIT] = "inherit",
2120 [EXEC_OUTPUT_NULL] = "null",
2121 [EXEC_OUTPUT_TTY] = "tty",
2122 [EXEC_OUTPUT_SYSLOG] = "syslog",
2123 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2124 [EXEC_OUTPUT_KMSG] = "kmsg",
2125 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2126 [EXEC_OUTPUT_JOURNAL] = "journal",
2127 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2128 [EXEC_OUTPUT_SOCKET] = "socket"
2131 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);