1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static int open_null_as(int flags, int nfd) {
173 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
177 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178 close_nointr_nofail(fd);
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
187 union sockaddr_union sa;
190 assert(output < _EXEC_OUTPUT_MAX);
194 fd = socket(AF_UNIX, SOCK_STREAM, 0);
199 sa.un.sun_family = AF_UNIX;
200 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
202 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
204 close_nointr_nofail(fd);
208 if (shutdown(fd, SHUT_RD) < 0) {
209 close_nointr_nofail(fd);
221 context->syslog_identifier ? context->syslog_identifier : ident,
223 context->syslog_priority,
224 !!context->syslog_level_prefix,
225 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231 close_nointr_nofail(fd);
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
243 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
247 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248 close_nointr_nofail(fd);
255 static bool is_terminal_input(ExecInput i) {
257 i == EXEC_INPUT_TTY ||
258 i == EXEC_INPUT_TTY_FORCE ||
259 i == EXEC_INPUT_TTY_FAIL;
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
264 if (is_terminal_input(std_input) && !apply_tty_stdin)
265 return EXEC_INPUT_NULL;
267 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268 return EXEC_INPUT_NULL;
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
275 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276 return EXEC_OUTPUT_INHERIT;
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
286 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
290 case EXEC_INPUT_NULL:
291 return open_null_as(O_RDONLY, STDIN_FILENO);
294 case EXEC_INPUT_TTY_FORCE:
295 case EXEC_INPUT_TTY_FAIL: {
298 if ((fd = acquire_terminal(
300 i == EXEC_INPUT_TTY_FAIL,
301 i == EXEC_INPUT_TTY_FORCE,
306 if (fd != STDIN_FILENO) {
307 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308 close_nointr_nofail(fd);
315 case EXEC_INPUT_SOCKET:
316 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
319 assert_not_reached("Unknown input type");
323 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
331 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332 o = fixup_output(context->std_output, socket_fd);
334 /* This expects the input is already set up */
338 case EXEC_OUTPUT_INHERIT:
340 /* If input got downgraded, inherit the original value */
341 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
342 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
344 /* If the input is connected to anything that's not a /dev/null, inherit that... */
345 if (i != EXEC_INPUT_NULL)
346 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
348 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
350 return STDOUT_FILENO;
352 /* We need to open /dev/null here anew, to get the
353 * right access mode. So we fall through */
355 case EXEC_OUTPUT_NULL:
356 return open_null_as(O_WRONLY, STDOUT_FILENO);
358 case EXEC_OUTPUT_TTY:
359 if (is_terminal_input(i))
360 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
362 /* We don't reset the terminal if this is just about output */
363 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
365 case EXEC_OUTPUT_SYSLOG:
366 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
367 case EXEC_OUTPUT_KMSG:
368 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
369 case EXEC_OUTPUT_JOURNAL:
370 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
371 r = connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
373 log_error("Failed to connect stdout of %s to the journal socket: %s", unit_id, strerror(-r));
374 r = open_null_as(O_WRONLY, STDOUT_FILENO);
378 case EXEC_OUTPUT_SOCKET:
379 assert(socket_fd >= 0);
380 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
383 assert_not_reached("Unknown output type");
387 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
395 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
396 o = fixup_output(context->std_output, socket_fd);
397 e = fixup_output(context->std_error, socket_fd);
399 /* This expects the input and output are already set up */
401 /* Don't change the stderr file descriptor if we inherit all
402 * the way and are not on a tty */
403 if (e == EXEC_OUTPUT_INHERIT &&
404 o == EXEC_OUTPUT_INHERIT &&
405 i == EXEC_INPUT_NULL &&
406 !is_terminal_input(context->std_input) &&
408 return STDERR_FILENO;
410 /* Duplicate from stdout if possible */
411 if (e == o || e == EXEC_OUTPUT_INHERIT)
412 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
416 case EXEC_OUTPUT_NULL:
417 return open_null_as(O_WRONLY, STDERR_FILENO);
419 case EXEC_OUTPUT_TTY:
420 if (is_terminal_input(i))
421 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
423 /* We don't reset the terminal if this is just about output */
424 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
426 case EXEC_OUTPUT_SYSLOG:
427 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
428 case EXEC_OUTPUT_KMSG:
429 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
430 case EXEC_OUTPUT_JOURNAL:
431 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
432 r = connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
434 log_error("Failed to connect stderr of %s to the journal socket: %s", unit_id, strerror(-r));
435 r = open_null_as(O_WRONLY, STDERR_FILENO);
439 case EXEC_OUTPUT_SOCKET:
440 assert(socket_fd >= 0);
441 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
444 assert_not_reached("Unknown error type");
448 static int chown_terminal(int fd, uid_t uid) {
453 /* This might fail. What matters are the results. */
454 (void) fchown(fd, uid, -1);
455 (void) fchmod(fd, TTY_MODE);
457 if (fstat(fd, &st) < 0)
460 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
466 static int setup_confirm_stdio(int *_saved_stdin,
467 int *_saved_stdout) {
468 int fd = -1, saved_stdin, saved_stdout = -1, r;
470 assert(_saved_stdin);
471 assert(_saved_stdout);
473 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
477 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
478 if (saved_stdout < 0) {
483 fd = acquire_terminal(
488 DEFAULT_CONFIRM_USEC);
494 r = chown_terminal(fd, getuid());
498 if (dup2(fd, STDIN_FILENO) < 0) {
503 if (dup2(fd, STDOUT_FILENO) < 0) {
509 close_nointr_nofail(fd);
511 *_saved_stdin = saved_stdin;
512 *_saved_stdout = saved_stdout;
517 if (saved_stdout >= 0)
518 close_nointr_nofail(saved_stdout);
520 if (saved_stdin >= 0)
521 close_nointr_nofail(saved_stdin);
524 close_nointr_nofail(fd);
529 static int write_confirm_message(const char *format, ...) {
535 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
539 va_start(ap, format);
540 vdprintf(fd, format, ap);
543 close_nointr_nofail(fd);
548 static int restore_confirm_stdio(int *saved_stdin,
554 assert(saved_stdout);
558 if (*saved_stdin >= 0)
559 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
562 if (*saved_stdout >= 0)
563 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
566 if (*saved_stdin >= 0)
567 close_nointr_nofail(*saved_stdin);
569 if (*saved_stdout >= 0)
570 close_nointr_nofail(*saved_stdout);
575 static int ask_for_confirmation(char *response, char **argv) {
576 int saved_stdout = -1, saved_stdin = -1, r;
579 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
583 line = exec_command_line(argv);
587 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
590 restore_confirm_stdio(&saved_stdin, &saved_stdout);
595 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
596 bool keep_groups = false;
601 /* Lookup and set GID and supplementary group list. Here too
602 * we avoid NSS lookups for gid=0. */
604 if (context->group || username) {
606 if (context->group) {
607 const char *g = context->group;
609 if ((r = get_group_creds(&g, &gid)) < 0)
613 /* First step, initialize groups from /etc/groups */
614 if (username && gid != 0) {
615 if (initgroups(username, gid) < 0)
621 /* Second step, set our gids */
622 if (setresgid(gid, gid, gid) < 0)
626 if (context->supplementary_groups) {
631 /* Final step, initialize any manually set supplementary groups */
632 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
634 if (!(gids = new(gid_t, ngroups_max)))
638 if ((k = getgroups(ngroups_max, gids)) < 0) {
645 STRV_FOREACH(i, context->supplementary_groups) {
648 if (k >= ngroups_max) {
654 r = get_group_creds(&g, gids+k);
663 if (setgroups(k, gids) < 0) {
674 static int enforce_user(const ExecContext *context, uid_t uid) {
678 /* Sets (but doesn't lookup) the uid and make sure we keep the
679 * capabilities while doing so. */
681 if (context->capabilities) {
683 static const cap_value_t bits[] = {
684 CAP_SETUID, /* Necessary so that we can run setresuid() below */
685 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
688 /* First step: If we need to keep capabilities but
689 * drop privileges we need to make sure we keep our
690 * caps, whiel we drop privileges. */
692 int sb = context->secure_bits|SECURE_KEEP_CAPS;
694 if (prctl(PR_GET_SECUREBITS) != sb)
695 if (prctl(PR_SET_SECUREBITS, sb) < 0)
699 /* Second step: set the capabilities. This will reduce
700 * the capabilities to the minimum we need. */
702 if (!(d = cap_dup(context->capabilities)))
705 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
706 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
712 if (cap_set_proc(d) < 0) {
721 /* Third step: actually set the uids */
722 if (setresuid(uid, uid, uid) < 0)
725 /* At this point we should have all necessary capabilities but
726 are otherwise a normal user. However, the caps might got
727 corrupted due to the setresuid() so we need clean them up
728 later. This is done outside of this call. */
735 static int null_conv(
737 const struct pam_message **msg,
738 struct pam_response **resp,
741 /* We don't support conversations */
746 static int setup_pam(
752 int fds[], unsigned n_fds) {
754 static const struct pam_conv conv = {
759 pam_handle_t *handle = NULL;
761 int pam_code = PAM_SUCCESS;
764 bool close_session = false;
765 pid_t pam_pid = 0, parent_pid;
771 /* We set up PAM in the parent process, then fork. The child
772 * will then stay around until killed via PR_GET_PDEATHSIG or
773 * systemd via the cgroup logic. It will then remove the PAM
774 * session again. The parent process will exec() the actual
775 * daemon. We do things this way to ensure that the main PID
776 * of the daemon is the one we initially fork()ed. */
778 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
784 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
787 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
790 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
793 close_session = true;
795 if ((!(e = pam_getenvlist(handle)))) {
796 pam_code = PAM_BUF_ERR;
800 /* Block SIGTERM, so that we know that it won't get lost in
802 if (sigemptyset(&ss) < 0 ||
803 sigaddset(&ss, SIGTERM) < 0 ||
804 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
807 parent_pid = getpid();
809 if ((pam_pid = fork()) < 0)
816 /* The child's job is to reset the PAM session on
819 /* This string must fit in 10 chars (i.e. the length
820 * of "/sbin/init"), to look pretty in /bin/ps */
821 rename_process("(sd-pam)");
823 /* Make sure we don't keep open the passed fds in this
824 child. We assume that otherwise only those fds are
825 open here that have been opened by PAM. */
826 close_many(fds, n_fds);
828 /* Drop privileges - we don't need any to pam_close_session
829 * and this will make PR_SET_PDEATHSIG work in most cases.
830 * If this fails, ignore the error - but expect sd-pam threads
831 * to fail to exit normally */
832 if (setresuid(uid, uid, uid) < 0)
833 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
835 /* Wait until our parent died. This will only work if
836 * the above setresuid() succeeds, otherwise the kernel
837 * will not allow unprivileged parents kill their privileged
838 * children this way. We rely on the control groups kill logic
839 * to do the rest for us. */
840 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
843 /* Check if our parent process might already have
845 if (getppid() == parent_pid) {
847 if (sigwait(&ss, &sig) < 0) {
854 assert(sig == SIGTERM);
859 /* If our parent died we'll end the session */
860 if (getppid() != parent_pid)
861 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
867 pam_end(handle, pam_code | PAM_DATA_SILENT);
871 /* If the child was forked off successfully it will do all the
872 * cleanups, so forget about the handle here. */
875 /* Unblock SIGTERM again in the parent */
876 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
879 /* We close the log explicitly here, since the PAM modules
880 * might have opened it, but we don't want this fd around. */
889 if (pam_code != PAM_SUCCESS)
890 err = -EPERM; /* PAM errors do not map to errno */
896 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
898 pam_end(handle, pam_code | PAM_DATA_SILENT);
906 kill(pam_pid, SIGTERM);
907 kill(pam_pid, SIGCONT);
914 static void rename_process_from_path(const char *path) {
915 char process_name[11];
919 /* This resulting string must fit in 10 chars (i.e. the length
920 * of "/sbin/init") to look pretty in /bin/ps */
922 p = path_get_file_name(path);
924 rename_process("(...)");
930 /* The end of the process name is usually more
931 * interesting, since the first bit might just be
937 process_name[0] = '(';
938 memcpy(process_name+1, p, l);
939 process_name[1+l] = ')';
940 process_name[1+l+1] = 0;
942 rename_process(process_name);
945 static int apply_seccomp(uint32_t *syscall_filter) {
946 static const struct sock_filter header[] = {
947 VALIDATE_ARCHITECTURE,
950 static const struct sock_filter footer[] = {
956 struct sock_filter *f;
957 struct sock_fprog prog;
959 assert(syscall_filter);
961 /* First: count the syscalls to check for */
962 for (i = 0, n = 0; i < syscall_max(); i++)
963 if (syscall_filter[i >> 4] & (1 << (i & 31)))
966 /* Second: build the filter program from a header the syscall
967 * matches and the footer */
968 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
969 memcpy(f, header, sizeof(header));
971 for (i = 0, n = 0; i < syscall_max(); i++)
972 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
973 struct sock_filter item[] = {
974 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
975 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
978 assert_cc(ELEMENTSOF(item) == 2);
980 f[ELEMENTSOF(header) + 2*n] = item[0];
981 f[ELEMENTSOF(header) + 2*n+1] = item[1];
986 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
988 /* Third: install the filter */
990 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
992 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
998 int exec_spawn(ExecCommand *command,
1000 const ExecContext *context,
1001 int fds[], unsigned n_fds,
1003 bool apply_permissions,
1005 bool apply_tty_stdin,
1007 CGroupBonding *cgroup_bondings,
1008 CGroupAttribute *cgroup_attributes,
1009 const char *cgroup_suffix,
1010 const char *unit_id,
1018 char _cleanup_strv_free_ **files_env = NULL;
1023 assert(fds || n_fds <= 0);
1025 if (context->std_input == EXEC_INPUT_SOCKET ||
1026 context->std_output == EXEC_OUTPUT_SOCKET ||
1027 context->std_error == EXEC_OUTPUT_SOCKET) {
1039 r = exec_context_load_environment(context, &files_env);
1041 log_struct_unit(LOG_ERR,
1043 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1050 argv = command->argv;
1052 line = exec_command_line(argv);
1056 log_struct_unit(LOG_DEBUG,
1058 "MESSAGE=About to execute %s", line,
1062 r = cgroup_bonding_realize_list(cgroup_bondings);
1066 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1075 const char *username = NULL, *home = NULL;
1076 uid_t uid = (uid_t) -1;
1077 gid_t gid = (gid_t) -1;
1078 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1079 **final_env = NULL, **final_argv = NULL;
1081 bool set_access = false;
1085 rename_process_from_path(command->path);
1087 /* We reset exactly these signals, since they are the
1088 * only ones we set to SIG_IGN in the main daemon. All
1089 * others we leave untouched because we set them to
1090 * SIG_DFL or a valid handler initially, both of which
1091 * will be demoted to SIG_DFL. */
1092 default_signals(SIGNALS_CRASH_HANDLER,
1093 SIGNALS_IGNORE, -1);
1095 if (context->ignore_sigpipe)
1096 ignore_signals(SIGPIPE, -1);
1098 assert_se(sigemptyset(&ss) == 0);
1099 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1101 r = EXIT_SIGNAL_MASK;
1106 if (idle_pipe[1] >= 0)
1107 close_nointr_nofail(idle_pipe[1]);
1108 if (idle_pipe[0] >= 0) {
1109 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1110 close_nointr_nofail(idle_pipe[0]);
1114 /* Close sockets very early to make sure we don't
1115 * block init reexecution because it cannot bind its
1118 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1119 socket_fd >= 0 ? 1 : n_fds);
1125 if (!context->same_pgrp)
1132 if (context->tcpwrap_name) {
1134 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1140 for (i = 0; i < (int) n_fds; i++) {
1141 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1149 exec_context_tty_reset(context);
1151 if (confirm_spawn) {
1154 err = ask_for_confirmation(&response, argv);
1155 if (err == -ETIMEDOUT)
1156 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1158 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1159 else if (response == 's') {
1160 write_confirm_message("Skipping execution.\n");
1164 } else if (response == 'n') {
1165 write_confirm_message("Failing execution.\n");
1171 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1172 * must sure to drop O_NONBLOCK */
1174 fd_nonblock(socket_fd, false);
1176 err = setup_input(context, socket_fd, apply_tty_stdin);
1182 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1188 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1194 if (cgroup_bondings) {
1195 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1202 if (context->oom_score_adjust_set) {
1205 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1208 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1210 r = EXIT_OOM_ADJUST;
1215 if (context->nice_set)
1216 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1222 if (context->cpu_sched_set) {
1223 struct sched_param param;
1226 param.sched_priority = context->cpu_sched_priority;
1228 if (sched_setscheduler(0, context->cpu_sched_policy |
1229 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1231 r = EXIT_SETSCHEDULER;
1236 if (context->cpuset)
1237 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1239 r = EXIT_CPUAFFINITY;
1243 if (context->ioprio_set)
1244 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1250 if (context->timer_slack_nsec != (nsec_t) -1)
1251 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1253 r = EXIT_TIMERSLACK;
1257 if (context->utmp_id)
1258 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1260 if (context->user) {
1261 username = context->user;
1262 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1268 if (is_terminal_input(context->std_input)) {
1269 err = chown_terminal(STDIN_FILENO, uid);
1276 if (cgroup_bondings && context->control_group_modify) {
1277 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1279 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1289 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1290 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1297 if (apply_permissions) {
1298 err = enforce_groups(context, username, gid);
1305 umask(context->umask);
1308 if (apply_permissions && context->pam_name && username) {
1309 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1316 if (context->private_network) {
1317 if (unshare(CLONE_NEWNET) < 0) {
1326 if (strv_length(context->read_write_dirs) > 0 ||
1327 strv_length(context->read_only_dirs) > 0 ||
1328 strv_length(context->inaccessible_dirs) > 0 ||
1329 context->mount_flags != 0 ||
1330 context->private_tmp) {
1331 err = setup_namespace(context->read_write_dirs,
1332 context->read_only_dirs,
1333 context->inaccessible_dirs,
1334 context->private_tmp,
1335 context->mount_flags);
1343 if (context->root_directory)
1344 if (chroot(context->root_directory) < 0) {
1350 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1356 char _cleanup_free_ *d = NULL;
1358 if (asprintf(&d, "%s/%s",
1359 context->root_directory ? context->root_directory : "",
1360 context->working_directory ? context->working_directory : "") < 0) {
1373 /* We repeat the fd closing here, to make sure that
1374 * nothing is leaked from the PAM modules */
1375 err = close_all_fds(fds, n_fds);
1377 err = shift_fds(fds, n_fds);
1379 err = flags_fds(fds, n_fds, context->non_blocking);
1385 if (apply_permissions) {
1387 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1388 if (!context->rlimit[i])
1391 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1398 if (context->capability_bounding_set_drop) {
1399 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1401 r = EXIT_CAPABILITIES;
1406 if (context->user) {
1407 err = enforce_user(context, uid);
1414 /* PR_GET_SECUREBITS is not privileged, while
1415 * PR_SET_SECUREBITS is. So to suppress
1416 * potential EPERMs we'll try not to call
1417 * PR_SET_SECUREBITS unless necessary. */
1418 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1419 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1421 r = EXIT_SECUREBITS;
1425 if (context->capabilities)
1426 if (cap_set_proc(context->capabilities) < 0) {
1428 r = EXIT_CAPABILITIES;
1432 if (context->no_new_privileges)
1433 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1435 r = EXIT_NO_NEW_PRIVILEGES;
1439 if (context->syscall_filter) {
1440 err = apply_seccomp(context->syscall_filter);
1448 if (!(our_env = new0(char*, 7))) {
1455 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1456 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1463 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1470 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1471 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1477 if (is_terminal_input(context->std_input) ||
1478 context->std_output == EXEC_OUTPUT_TTY ||
1479 context->std_error == EXEC_OUTPUT_TTY)
1480 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1488 if (!(final_env = strv_env_merge(
1492 context->environment,
1501 if (!(final_argv = replace_env_argv(argv, final_env))) {
1507 final_env = strv_env_clean(final_env);
1509 execve(command->path, final_argv, final_env);
1516 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1517 "EXECUTABLE=%s", command->path,
1518 "MESSAGE=Failed at step %s spawning %s: %s",
1519 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1520 command->path, strerror(-err),
1529 log_struct_unit(LOG_DEBUG,
1531 "MESSAGE=Forked %s as %lu",
1532 command->path, (unsigned long) pid,
1535 /* We add the new process to the cgroup both in the child (so
1536 * that we can be sure that no user code is ever executed
1537 * outside of the cgroup) and in the parent (so that we can be
1538 * sure that when we kill the cgroup the process will be
1540 if (cgroup_bondings)
1541 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1543 exec_status_start(&command->exec_status, pid);
1549 void exec_context_init(ExecContext *c) {
1553 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1554 c->cpu_sched_policy = SCHED_OTHER;
1555 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1556 c->syslog_level_prefix = true;
1557 c->control_group_persistent = -1;
1558 c->ignore_sigpipe = true;
1559 c->timer_slack_nsec = (nsec_t) -1;
1562 void exec_context_done(ExecContext *c) {
1567 strv_free(c->environment);
1568 c->environment = NULL;
1570 strv_free(c->environment_files);
1571 c->environment_files = NULL;
1573 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1575 c->rlimit[l] = NULL;
1578 free(c->working_directory);
1579 c->working_directory = NULL;
1580 free(c->root_directory);
1581 c->root_directory = NULL;
1586 free(c->tcpwrap_name);
1587 c->tcpwrap_name = NULL;
1589 free(c->syslog_identifier);
1590 c->syslog_identifier = NULL;
1598 strv_free(c->supplementary_groups);
1599 c->supplementary_groups = NULL;
1604 if (c->capabilities) {
1605 cap_free(c->capabilities);
1606 c->capabilities = NULL;
1609 strv_free(c->read_only_dirs);
1610 c->read_only_dirs = NULL;
1612 strv_free(c->read_write_dirs);
1613 c->read_write_dirs = NULL;
1615 strv_free(c->inaccessible_dirs);
1616 c->inaccessible_dirs = NULL;
1619 CPU_FREE(c->cpuset);
1624 free(c->syscall_filter);
1625 c->syscall_filter = NULL;
1628 void exec_command_done(ExecCommand *c) {
1638 void exec_command_done_array(ExecCommand *c, unsigned n) {
1641 for (i = 0; i < n; i++)
1642 exec_command_done(c+i);
1645 void exec_command_free_list(ExecCommand *c) {
1649 LIST_REMOVE(ExecCommand, command, c, i);
1650 exec_command_done(i);
1655 void exec_command_free_array(ExecCommand **c, unsigned n) {
1658 for (i = 0; i < n; i++) {
1659 exec_command_free_list(c[i]);
1664 int exec_context_load_environment(const ExecContext *c, char ***l) {
1665 char **i, **r = NULL;
1670 STRV_FOREACH(i, c->environment_files) {
1673 bool ignore = false;
1685 if (!path_is_absolute(fn)) {
1694 /* Filename supports globbing, take all matching files */
1697 if (glob(fn, 0, NULL, &pglob) != 0) {
1703 return errno ? -errno : -EINVAL;
1705 count = pglob.gl_pathc;
1714 for (n = 0; n < count; n++) {
1715 k = load_env_file(pglob.gl_pathv[n], &p);
1730 m = strv_env_merge(2, r, p);
1750 static void strv_fprintf(FILE *f, char **l) {
1756 fprintf(f, " %s", *g);
1759 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1771 "%sWorkingDirectory: %s\n"
1772 "%sRootDirectory: %s\n"
1773 "%sNonBlocking: %s\n"
1774 "%sPrivateTmp: %s\n"
1775 "%sControlGroupModify: %s\n"
1776 "%sControlGroupPersistent: %s\n"
1777 "%sPrivateNetwork: %s\n"
1778 "%sIgnoreSIGPIPE: %s\n",
1780 prefix, c->working_directory ? c->working_directory : "/",
1781 prefix, c->root_directory ? c->root_directory : "/",
1782 prefix, yes_no(c->non_blocking),
1783 prefix, yes_no(c->private_tmp),
1784 prefix, yes_no(c->control_group_modify),
1785 prefix, yes_no(c->control_group_persistent),
1786 prefix, yes_no(c->private_network),
1787 prefix, yes_no(c->ignore_sigpipe));
1789 STRV_FOREACH(e, c->environment)
1790 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1792 STRV_FOREACH(e, c->environment_files)
1793 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1795 if (c->tcpwrap_name)
1797 "%sTCPWrapName: %s\n",
1798 prefix, c->tcpwrap_name);
1805 if (c->oom_score_adjust_set)
1807 "%sOOMScoreAdjust: %i\n",
1808 prefix, c->oom_score_adjust);
1810 for (i = 0; i < RLIM_NLIMITS; i++)
1812 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1814 if (c->ioprio_set) {
1818 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1822 "%sIOSchedulingClass: %s\n"
1823 "%sIOPriority: %i\n",
1824 prefix, strna(class_str),
1825 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1829 if (c->cpu_sched_set) {
1833 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1837 "%sCPUSchedulingPolicy: %s\n"
1838 "%sCPUSchedulingPriority: %i\n"
1839 "%sCPUSchedulingResetOnFork: %s\n",
1840 prefix, strna(policy_str),
1841 prefix, c->cpu_sched_priority,
1842 prefix, yes_no(c->cpu_sched_reset_on_fork));
1847 fprintf(f, "%sCPUAffinity:", prefix);
1848 for (i = 0; i < c->cpuset_ncpus; i++)
1849 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1850 fprintf(f, " %i", i);
1854 if (c->timer_slack_nsec != (nsec_t) -1)
1855 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1858 "%sStandardInput: %s\n"
1859 "%sStandardOutput: %s\n"
1860 "%sStandardError: %s\n",
1861 prefix, exec_input_to_string(c->std_input),
1862 prefix, exec_output_to_string(c->std_output),
1863 prefix, exec_output_to_string(c->std_error));
1869 "%sTTYVHangup: %s\n"
1870 "%sTTYVTDisallocate: %s\n",
1871 prefix, c->tty_path,
1872 prefix, yes_no(c->tty_reset),
1873 prefix, yes_no(c->tty_vhangup),
1874 prefix, yes_no(c->tty_vt_disallocate));
1876 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1877 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1878 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1879 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1880 char *fac_str, *lvl_str;
1883 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1887 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1892 "%sSyslogFacility: %s\n"
1893 "%sSyslogLevel: %s\n",
1894 prefix, strna(fac_str),
1895 prefix, strna(lvl_str));
1900 if (c->capabilities) {
1902 if ((t = cap_to_text(c->capabilities, NULL))) {
1903 fprintf(f, "%sCapabilities: %s\n",
1910 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1912 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1913 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1914 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1915 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1916 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1917 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1919 if (c->capability_bounding_set_drop) {
1921 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1923 for (l = 0; l <= cap_last_cap(); l++)
1924 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1927 if ((t = cap_to_name(l))) {
1928 fprintf(f, " %s", t);
1937 fprintf(f, "%sUser: %s\n", prefix, c->user);
1939 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1941 if (strv_length(c->supplementary_groups) > 0) {
1942 fprintf(f, "%sSupplementaryGroups:", prefix);
1943 strv_fprintf(f, c->supplementary_groups);
1948 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1950 if (strv_length(c->read_write_dirs) > 0) {
1951 fprintf(f, "%sReadWriteDirs:", prefix);
1952 strv_fprintf(f, c->read_write_dirs);
1956 if (strv_length(c->read_only_dirs) > 0) {
1957 fprintf(f, "%sReadOnlyDirs:", prefix);
1958 strv_fprintf(f, c->read_only_dirs);
1962 if (strv_length(c->inaccessible_dirs) > 0) {
1963 fprintf(f, "%sInaccessibleDirs:", prefix);
1964 strv_fprintf(f, c->inaccessible_dirs);
1970 "%sUtmpIdentifier: %s\n",
1971 prefix, c->utmp_id);
1974 void exec_status_start(ExecStatus *s, pid_t pid) {
1979 dual_timestamp_get(&s->start_timestamp);
1982 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1985 if (s->pid && s->pid != pid)
1989 dual_timestamp_get(&s->exit_timestamp);
1995 if (context->utmp_id)
1996 utmp_put_dead_process(context->utmp_id, pid, code, status);
1998 exec_context_tty_reset(context);
2002 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2003 char buf[FORMAT_TIMESTAMP_MAX];
2016 prefix, (unsigned long) s->pid);
2018 if (s->start_timestamp.realtime > 0)
2020 "%sStart Timestamp: %s\n",
2021 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2023 if (s->exit_timestamp.realtime > 0)
2025 "%sExit Timestamp: %s\n"
2027 "%sExit Status: %i\n",
2028 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2029 prefix, sigchld_code_to_string(s->code),
2033 char *exec_command_line(char **argv) {
2041 STRV_FOREACH(a, argv)
2044 if (!(n = new(char, k)))
2048 STRV_FOREACH(a, argv) {
2055 if (strpbrk(*a, WHITESPACE)) {
2066 /* FIXME: this doesn't really handle arguments that have
2067 * spaces and ticks in them */
2072 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2074 const char *prefix2;
2083 p2 = strappend(prefix, "\t");
2084 prefix2 = p2 ? p2 : prefix;
2086 cmd = exec_command_line(c->argv);
2089 "%sCommand Line: %s\n",
2090 prefix, cmd ? cmd : strerror(ENOMEM));
2094 exec_status_dump(&c->exec_status, f, prefix2);
2099 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2105 LIST_FOREACH(command, c, c)
2106 exec_command_dump(c, f, prefix);
2109 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2116 /* It's kind of important, that we keep the order here */
2117 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2118 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2123 int exec_command_set(ExecCommand *c, const char *path, ...) {
2131 l = strv_new_ap(path, ap);
2137 if (!(p = strdup(path))) {
2151 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2152 [EXEC_INPUT_NULL] = "null",
2153 [EXEC_INPUT_TTY] = "tty",
2154 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2155 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2156 [EXEC_INPUT_SOCKET] = "socket"
2159 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2161 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2162 [EXEC_OUTPUT_INHERIT] = "inherit",
2163 [EXEC_OUTPUT_NULL] = "null",
2164 [EXEC_OUTPUT_TTY] = "tty",
2165 [EXEC_OUTPUT_SYSLOG] = "syslog",
2166 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2167 [EXEC_OUTPUT_KMSG] = "kmsg",
2168 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2169 [EXEC_OUTPUT_JOURNAL] = "journal",
2170 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2171 [EXEC_OUTPUT_SOCKET] = "socket"
2174 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);