1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static bool is_terminal_output(ExecOutput o) {
170 o == EXEC_OUTPUT_TTY ||
171 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
172 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
173 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
176 static int open_null_as(int flags, int nfd) {
181 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
185 r = dup2(fd, nfd) < 0 ? -errno : nfd;
186 close_nointr_nofail(fd);
193 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
195 union sockaddr_union sa;
198 assert(output < _EXEC_OUTPUT_MAX);
202 fd = socket(AF_UNIX, SOCK_STREAM, 0);
207 sa.un.sun_family = AF_UNIX;
208 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
210 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
212 close_nointr_nofail(fd);
216 if (shutdown(fd, SHUT_RD) < 0) {
217 close_nointr_nofail(fd);
229 context->syslog_identifier ? context->syslog_identifier : ident,
231 context->syslog_priority,
232 !!context->syslog_level_prefix,
233 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
234 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
235 is_terminal_output(output));
238 r = dup2(fd, nfd) < 0 ? -errno : nfd;
239 close_nointr_nofail(fd);
245 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
251 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
255 r = dup2(fd, nfd) < 0 ? -errno : nfd;
256 close_nointr_nofail(fd);
263 static bool is_terminal_input(ExecInput i) {
265 i == EXEC_INPUT_TTY ||
266 i == EXEC_INPUT_TTY_FORCE ||
267 i == EXEC_INPUT_TTY_FAIL;
270 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
272 if (is_terminal_input(std_input) && !apply_tty_stdin)
273 return EXEC_INPUT_NULL;
275 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
276 return EXEC_INPUT_NULL;
281 static int fixup_output(ExecOutput std_output, int socket_fd) {
283 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
284 return EXEC_OUTPUT_INHERIT;
289 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
294 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
298 case EXEC_INPUT_NULL:
299 return open_null_as(O_RDONLY, STDIN_FILENO);
302 case EXEC_INPUT_TTY_FORCE:
303 case EXEC_INPUT_TTY_FAIL: {
306 if ((fd = acquire_terminal(
308 i == EXEC_INPUT_TTY_FAIL,
309 i == EXEC_INPUT_TTY_FORCE,
314 if (fd != STDIN_FILENO) {
315 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
316 close_nointr_nofail(fd);
323 case EXEC_INPUT_SOCKET:
324 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
327 assert_not_reached("Unknown input type");
331 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
339 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
340 o = fixup_output(context->std_output, socket_fd);
342 if (fileno == STDERR_FILENO) {
344 e = fixup_output(context->std_error, socket_fd);
346 /* This expects the input and output are already set up */
348 /* Don't change the stderr file descriptor if we inherit all
349 * the way and are not on a tty */
350 if (e == EXEC_OUTPUT_INHERIT &&
351 o == EXEC_OUTPUT_INHERIT &&
352 i == EXEC_INPUT_NULL &&
353 !is_terminal_input(context->std_input) &&
357 /* Duplicate from stdout if possible */
358 if (e == o || e == EXEC_OUTPUT_INHERIT)
359 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
363 } else if (o == EXEC_OUTPUT_INHERIT) {
364 /* If input got downgraded, inherit the original value */
365 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
366 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
368 /* If the input is connected to anything that's not a /dev/null, inherit that... */
369 if (i != EXEC_INPUT_NULL)
370 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
372 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
376 /* We need to open /dev/null here anew, to get the right access mode. */
377 return open_null_as(O_WRONLY, fileno);
382 case EXEC_OUTPUT_NULL:
383 return open_null_as(O_WRONLY, fileno);
385 case EXEC_OUTPUT_TTY:
386 if (is_terminal_input(i))
387 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
389 /* We don't reset the terminal if this is just about output */
390 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
392 case EXEC_OUTPUT_SYSLOG:
393 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
394 case EXEC_OUTPUT_KMSG:
395 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
396 case EXEC_OUTPUT_JOURNAL:
397 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
398 r = connect_logger_as(context, o, ident, unit_id, fileno);
400 log_struct_unit(LOG_CRIT, unit_id,
401 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
402 fileno == STDOUT_FILENO ? "out" : "err",
403 unit_id, strerror(-r),
406 r = open_null_as(O_WRONLY, fileno);
410 case EXEC_OUTPUT_SOCKET:
411 assert(socket_fd >= 0);
412 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
415 assert_not_reached("Unknown error type");
419 static int chown_terminal(int fd, uid_t uid) {
424 /* This might fail. What matters are the results. */
425 (void) fchown(fd, uid, -1);
426 (void) fchmod(fd, TTY_MODE);
428 if (fstat(fd, &st) < 0)
431 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
437 static int setup_confirm_stdio(int *_saved_stdin,
438 int *_saved_stdout) {
439 int fd = -1, saved_stdin, saved_stdout = -1, r;
441 assert(_saved_stdin);
442 assert(_saved_stdout);
444 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
448 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
449 if (saved_stdout < 0) {
454 fd = acquire_terminal(
459 DEFAULT_CONFIRM_USEC);
465 r = chown_terminal(fd, getuid());
469 if (dup2(fd, STDIN_FILENO) < 0) {
474 if (dup2(fd, STDOUT_FILENO) < 0) {
480 close_nointr_nofail(fd);
482 *_saved_stdin = saved_stdin;
483 *_saved_stdout = saved_stdout;
488 if (saved_stdout >= 0)
489 close_nointr_nofail(saved_stdout);
491 if (saved_stdin >= 0)
492 close_nointr_nofail(saved_stdin);
495 close_nointr_nofail(fd);
500 static int write_confirm_message(const char *format, ...) {
506 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
510 va_start(ap, format);
511 vdprintf(fd, format, ap);
514 close_nointr_nofail(fd);
519 static int restore_confirm_stdio(int *saved_stdin,
525 assert(saved_stdout);
529 if (*saved_stdin >= 0)
530 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
533 if (*saved_stdout >= 0)
534 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
537 if (*saved_stdin >= 0)
538 close_nointr_nofail(*saved_stdin);
540 if (*saved_stdout >= 0)
541 close_nointr_nofail(*saved_stdout);
546 static int ask_for_confirmation(char *response, char **argv) {
547 int saved_stdout = -1, saved_stdin = -1, r;
550 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
554 line = exec_command_line(argv);
558 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
561 restore_confirm_stdio(&saved_stdin, &saved_stdout);
566 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
567 bool keep_groups = false;
572 /* Lookup and set GID and supplementary group list. Here too
573 * we avoid NSS lookups for gid=0. */
575 if (context->group || username) {
577 if (context->group) {
578 const char *g = context->group;
580 if ((r = get_group_creds(&g, &gid)) < 0)
584 /* First step, initialize groups from /etc/groups */
585 if (username && gid != 0) {
586 if (initgroups(username, gid) < 0)
592 /* Second step, set our gids */
593 if (setresgid(gid, gid, gid) < 0)
597 if (context->supplementary_groups) {
602 /* Final step, initialize any manually set supplementary groups */
603 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
605 if (!(gids = new(gid_t, ngroups_max)))
609 if ((k = getgroups(ngroups_max, gids)) < 0) {
616 STRV_FOREACH(i, context->supplementary_groups) {
619 if (k >= ngroups_max) {
625 r = get_group_creds(&g, gids+k);
634 if (setgroups(k, gids) < 0) {
645 static int enforce_user(const ExecContext *context, uid_t uid) {
649 /* Sets (but doesn't lookup) the uid and make sure we keep the
650 * capabilities while doing so. */
652 if (context->capabilities) {
654 static const cap_value_t bits[] = {
655 CAP_SETUID, /* Necessary so that we can run setresuid() below */
656 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
659 /* First step: If we need to keep capabilities but
660 * drop privileges we need to make sure we keep our
661 * caps, whiel we drop privileges. */
663 int sb = context->secure_bits|SECURE_KEEP_CAPS;
665 if (prctl(PR_GET_SECUREBITS) != sb)
666 if (prctl(PR_SET_SECUREBITS, sb) < 0)
670 /* Second step: set the capabilities. This will reduce
671 * the capabilities to the minimum we need. */
673 if (!(d = cap_dup(context->capabilities)))
676 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
677 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
683 if (cap_set_proc(d) < 0) {
692 /* Third step: actually set the uids */
693 if (setresuid(uid, uid, uid) < 0)
696 /* At this point we should have all necessary capabilities but
697 are otherwise a normal user. However, the caps might got
698 corrupted due to the setresuid() so we need clean them up
699 later. This is done outside of this call. */
706 static int null_conv(
708 const struct pam_message **msg,
709 struct pam_response **resp,
712 /* We don't support conversations */
717 static int setup_pam(
723 int fds[], unsigned n_fds) {
725 static const struct pam_conv conv = {
730 pam_handle_t *handle = NULL;
732 int pam_code = PAM_SUCCESS;
735 bool close_session = false;
736 pid_t pam_pid = 0, parent_pid;
742 /* We set up PAM in the parent process, then fork. The child
743 * will then stay around until killed via PR_GET_PDEATHSIG or
744 * systemd via the cgroup logic. It will then remove the PAM
745 * session again. The parent process will exec() the actual
746 * daemon. We do things this way to ensure that the main PID
747 * of the daemon is the one we initially fork()ed. */
749 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
755 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
758 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
761 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
764 close_session = true;
766 if ((!(e = pam_getenvlist(handle)))) {
767 pam_code = PAM_BUF_ERR;
771 /* Block SIGTERM, so that we know that it won't get lost in
773 if (sigemptyset(&ss) < 0 ||
774 sigaddset(&ss, SIGTERM) < 0 ||
775 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
778 parent_pid = getpid();
780 if ((pam_pid = fork()) < 0)
787 /* The child's job is to reset the PAM session on
790 /* This string must fit in 10 chars (i.e. the length
791 * of "/sbin/init"), to look pretty in /bin/ps */
792 rename_process("(sd-pam)");
794 /* Make sure we don't keep open the passed fds in this
795 child. We assume that otherwise only those fds are
796 open here that have been opened by PAM. */
797 close_many(fds, n_fds);
799 /* Drop privileges - we don't need any to pam_close_session
800 * and this will make PR_SET_PDEATHSIG work in most cases.
801 * If this fails, ignore the error - but expect sd-pam threads
802 * to fail to exit normally */
803 if (setresuid(uid, uid, uid) < 0)
804 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
806 /* Wait until our parent died. This will only work if
807 * the above setresuid() succeeds, otherwise the kernel
808 * will not allow unprivileged parents kill their privileged
809 * children this way. We rely on the control groups kill logic
810 * to do the rest for us. */
811 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
814 /* Check if our parent process might already have
816 if (getppid() == parent_pid) {
818 if (sigwait(&ss, &sig) < 0) {
825 assert(sig == SIGTERM);
830 /* If our parent died we'll end the session */
831 if (getppid() != parent_pid)
832 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
838 pam_end(handle, pam_code | PAM_DATA_SILENT);
842 /* If the child was forked off successfully it will do all the
843 * cleanups, so forget about the handle here. */
846 /* Unblock SIGTERM again in the parent */
847 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
850 /* We close the log explicitly here, since the PAM modules
851 * might have opened it, but we don't want this fd around. */
860 if (pam_code != PAM_SUCCESS)
861 err = -EPERM; /* PAM errors do not map to errno */
867 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
869 pam_end(handle, pam_code | PAM_DATA_SILENT);
877 kill(pam_pid, SIGTERM);
878 kill(pam_pid, SIGCONT);
885 static void rename_process_from_path(const char *path) {
886 char process_name[11];
890 /* This resulting string must fit in 10 chars (i.e. the length
891 * of "/sbin/init") to look pretty in /bin/ps */
893 p = path_get_file_name(path);
895 rename_process("(...)");
901 /* The end of the process name is usually more
902 * interesting, since the first bit might just be
908 process_name[0] = '(';
909 memcpy(process_name+1, p, l);
910 process_name[1+l] = ')';
911 process_name[1+l+1] = 0;
913 rename_process(process_name);
916 static int apply_seccomp(uint32_t *syscall_filter) {
917 static const struct sock_filter header[] = {
918 VALIDATE_ARCHITECTURE,
921 static const struct sock_filter footer[] = {
927 struct sock_filter *f;
928 struct sock_fprog prog;
930 assert(syscall_filter);
932 /* First: count the syscalls to check for */
933 for (i = 0, n = 0; i < syscall_max(); i++)
934 if (syscall_filter[i >> 4] & (1 << (i & 31)))
937 /* Second: build the filter program from a header the syscall
938 * matches and the footer */
939 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
940 memcpy(f, header, sizeof(header));
942 for (i = 0, n = 0; i < syscall_max(); i++)
943 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
944 struct sock_filter item[] = {
945 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
946 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
949 assert_cc(ELEMENTSOF(item) == 2);
951 f[ELEMENTSOF(header) + 2*n] = item[0];
952 f[ELEMENTSOF(header) + 2*n+1] = item[1];
957 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
959 /* Third: install the filter */
961 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
963 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
969 int exec_spawn(ExecCommand *command,
971 const ExecContext *context,
972 int fds[], unsigned n_fds,
974 bool apply_permissions,
976 bool apply_tty_stdin,
978 CGroupBonding *cgroup_bondings,
979 CGroupAttribute *cgroup_attributes,
980 const char *cgroup_suffix,
989 char _cleanup_strv_free_ **files_env = NULL;
994 assert(fds || n_fds <= 0);
996 if (context->std_input == EXEC_INPUT_SOCKET ||
997 context->std_output == EXEC_OUTPUT_SOCKET ||
998 context->std_error == EXEC_OUTPUT_SOCKET) {
1010 r = exec_context_load_environment(context, &files_env);
1012 log_struct_unit(LOG_ERR,
1014 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1021 argv = command->argv;
1023 line = exec_command_line(argv);
1027 log_struct_unit(LOG_DEBUG,
1029 "MESSAGE=About to execute %s", line,
1033 r = cgroup_bonding_realize_list(cgroup_bondings);
1037 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1046 const char *username = NULL, *home = NULL;
1047 uid_t uid = (uid_t) -1;
1048 gid_t gid = (gid_t) -1;
1049 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1050 **final_env = NULL, **final_argv = NULL;
1052 bool set_access = false;
1056 rename_process_from_path(command->path);
1058 /* We reset exactly these signals, since they are the
1059 * only ones we set to SIG_IGN in the main daemon. All
1060 * others we leave untouched because we set them to
1061 * SIG_DFL or a valid handler initially, both of which
1062 * will be demoted to SIG_DFL. */
1063 default_signals(SIGNALS_CRASH_HANDLER,
1064 SIGNALS_IGNORE, -1);
1066 if (context->ignore_sigpipe)
1067 ignore_signals(SIGPIPE, -1);
1069 assert_se(sigemptyset(&ss) == 0);
1070 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1072 r = EXIT_SIGNAL_MASK;
1077 if (idle_pipe[1] >= 0)
1078 close_nointr_nofail(idle_pipe[1]);
1079 if (idle_pipe[0] >= 0) {
1080 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1081 close_nointr_nofail(idle_pipe[0]);
1085 /* Close sockets very early to make sure we don't
1086 * block init reexecution because it cannot bind its
1089 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1090 socket_fd >= 0 ? 1 : n_fds);
1096 if (!context->same_pgrp)
1103 if (context->tcpwrap_name) {
1105 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1111 for (i = 0; i < (int) n_fds; i++) {
1112 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1120 exec_context_tty_reset(context);
1122 if (confirm_spawn) {
1125 err = ask_for_confirmation(&response, argv);
1126 if (err == -ETIMEDOUT)
1127 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1129 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1130 else if (response == 's') {
1131 write_confirm_message("Skipping execution.\n");
1135 } else if (response == 'n') {
1136 write_confirm_message("Failing execution.\n");
1142 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1143 * must sure to drop O_NONBLOCK */
1145 fd_nonblock(socket_fd, false);
1147 err = setup_input(context, socket_fd, apply_tty_stdin);
1153 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1159 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1165 if (cgroup_bondings) {
1166 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1173 if (context->oom_score_adjust_set) {
1176 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1179 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1181 r = EXIT_OOM_ADJUST;
1186 if (context->nice_set)
1187 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1193 if (context->cpu_sched_set) {
1194 struct sched_param param;
1197 param.sched_priority = context->cpu_sched_priority;
1199 if (sched_setscheduler(0, context->cpu_sched_policy |
1200 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1202 r = EXIT_SETSCHEDULER;
1207 if (context->cpuset)
1208 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1210 r = EXIT_CPUAFFINITY;
1214 if (context->ioprio_set)
1215 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1221 if (context->timer_slack_nsec != (nsec_t) -1)
1222 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1224 r = EXIT_TIMERSLACK;
1228 if (context->utmp_id)
1229 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1231 if (context->user) {
1232 username = context->user;
1233 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1239 if (is_terminal_input(context->std_input)) {
1240 err = chown_terminal(STDIN_FILENO, uid);
1247 if (cgroup_bondings && context->control_group_modify) {
1248 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1250 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1260 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1261 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1268 if (apply_permissions) {
1269 err = enforce_groups(context, username, gid);
1276 umask(context->umask);
1279 if (apply_permissions && context->pam_name && username) {
1280 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1287 if (context->private_network) {
1288 if (unshare(CLONE_NEWNET) < 0) {
1297 if (strv_length(context->read_write_dirs) > 0 ||
1298 strv_length(context->read_only_dirs) > 0 ||
1299 strv_length(context->inaccessible_dirs) > 0 ||
1300 context->mount_flags != 0 ||
1301 context->private_tmp) {
1302 err = setup_namespace(context->read_write_dirs,
1303 context->read_only_dirs,
1304 context->inaccessible_dirs,
1305 context->private_tmp,
1306 context->mount_flags);
1314 if (context->root_directory)
1315 if (chroot(context->root_directory) < 0) {
1321 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1327 char _cleanup_free_ *d = NULL;
1329 if (asprintf(&d, "%s/%s",
1330 context->root_directory ? context->root_directory : "",
1331 context->working_directory ? context->working_directory : "") < 0) {
1344 /* We repeat the fd closing here, to make sure that
1345 * nothing is leaked from the PAM modules */
1346 err = close_all_fds(fds, n_fds);
1348 err = shift_fds(fds, n_fds);
1350 err = flags_fds(fds, n_fds, context->non_blocking);
1356 if (apply_permissions) {
1358 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1359 if (!context->rlimit[i])
1362 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1369 if (context->capability_bounding_set_drop) {
1370 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1372 r = EXIT_CAPABILITIES;
1377 if (context->user) {
1378 err = enforce_user(context, uid);
1385 /* PR_GET_SECUREBITS is not privileged, while
1386 * PR_SET_SECUREBITS is. So to suppress
1387 * potential EPERMs we'll try not to call
1388 * PR_SET_SECUREBITS unless necessary. */
1389 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1390 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1392 r = EXIT_SECUREBITS;
1396 if (context->capabilities)
1397 if (cap_set_proc(context->capabilities) < 0) {
1399 r = EXIT_CAPABILITIES;
1403 if (context->no_new_privileges)
1404 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1406 r = EXIT_NO_NEW_PRIVILEGES;
1410 if (context->syscall_filter) {
1411 err = apply_seccomp(context->syscall_filter);
1419 if (!(our_env = new0(char*, 7))) {
1426 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1427 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1434 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1441 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1442 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1448 if (is_terminal_input(context->std_input) ||
1449 context->std_output == EXEC_OUTPUT_TTY ||
1450 context->std_error == EXEC_OUTPUT_TTY)
1451 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1459 if (!(final_env = strv_env_merge(
1463 context->environment,
1472 if (!(final_argv = replace_env_argv(argv, final_env))) {
1478 final_env = strv_env_clean(final_env);
1480 execve(command->path, final_argv, final_env);
1487 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1488 "EXECUTABLE=%s", command->path,
1489 "MESSAGE=Failed at step %s spawning %s: %s",
1490 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1491 command->path, strerror(-err),
1500 log_struct_unit(LOG_DEBUG,
1502 "MESSAGE=Forked %s as %lu",
1503 command->path, (unsigned long) pid,
1506 /* We add the new process to the cgroup both in the child (so
1507 * that we can be sure that no user code is ever executed
1508 * outside of the cgroup) and in the parent (so that we can be
1509 * sure that when we kill the cgroup the process will be
1511 if (cgroup_bondings)
1512 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1514 exec_status_start(&command->exec_status, pid);
1520 void exec_context_init(ExecContext *c) {
1524 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1525 c->cpu_sched_policy = SCHED_OTHER;
1526 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1527 c->syslog_level_prefix = true;
1528 c->control_group_persistent = -1;
1529 c->ignore_sigpipe = true;
1530 c->timer_slack_nsec = (nsec_t) -1;
1533 void exec_context_done(ExecContext *c) {
1538 strv_free(c->environment);
1539 c->environment = NULL;
1541 strv_free(c->environment_files);
1542 c->environment_files = NULL;
1544 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1546 c->rlimit[l] = NULL;
1549 free(c->working_directory);
1550 c->working_directory = NULL;
1551 free(c->root_directory);
1552 c->root_directory = NULL;
1557 free(c->tcpwrap_name);
1558 c->tcpwrap_name = NULL;
1560 free(c->syslog_identifier);
1561 c->syslog_identifier = NULL;
1569 strv_free(c->supplementary_groups);
1570 c->supplementary_groups = NULL;
1575 if (c->capabilities) {
1576 cap_free(c->capabilities);
1577 c->capabilities = NULL;
1580 strv_free(c->read_only_dirs);
1581 c->read_only_dirs = NULL;
1583 strv_free(c->read_write_dirs);
1584 c->read_write_dirs = NULL;
1586 strv_free(c->inaccessible_dirs);
1587 c->inaccessible_dirs = NULL;
1590 CPU_FREE(c->cpuset);
1595 free(c->syscall_filter);
1596 c->syscall_filter = NULL;
1599 void exec_command_done(ExecCommand *c) {
1609 void exec_command_done_array(ExecCommand *c, unsigned n) {
1612 for (i = 0; i < n; i++)
1613 exec_command_done(c+i);
1616 void exec_command_free_list(ExecCommand *c) {
1620 LIST_REMOVE(ExecCommand, command, c, i);
1621 exec_command_done(i);
1626 void exec_command_free_array(ExecCommand **c, unsigned n) {
1629 for (i = 0; i < n; i++) {
1630 exec_command_free_list(c[i]);
1635 int exec_context_load_environment(const ExecContext *c, char ***l) {
1636 char **i, **r = NULL;
1641 STRV_FOREACH(i, c->environment_files) {
1644 bool ignore = false;
1656 if (!path_is_absolute(fn)) {
1665 /* Filename supports globbing, take all matching files */
1668 if (glob(fn, 0, NULL, &pglob) != 0) {
1674 return errno ? -errno : -EINVAL;
1676 count = pglob.gl_pathc;
1685 for (n = 0; n < count; n++) {
1686 k = load_env_file(pglob.gl_pathv[n], &p);
1701 m = strv_env_merge(2, r, p);
1721 static bool tty_may_match_dev_console(const char *tty) {
1722 char *active = NULL, *console;
1725 if (startswith(tty, "/dev/"))
1728 /* trivial identity? */
1729 if (streq(tty, "console"))
1732 console = resolve_dev_console(&active);
1733 /* if we could not resolve, assume it may */
1737 /* "tty0" means the active VC, so it may be the same sometimes */
1738 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1744 bool exec_context_may_touch_console(ExecContext *ec) {
1745 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1746 is_terminal_input(ec->std_input) ||
1747 is_terminal_output(ec->std_output) ||
1748 is_terminal_output(ec->std_error)) &&
1749 tty_may_match_dev_console(tty_path(ec));
1752 static void strv_fprintf(FILE *f, char **l) {
1758 fprintf(f, " %s", *g);
1761 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1773 "%sWorkingDirectory: %s\n"
1774 "%sRootDirectory: %s\n"
1775 "%sNonBlocking: %s\n"
1776 "%sPrivateTmp: %s\n"
1777 "%sControlGroupModify: %s\n"
1778 "%sControlGroupPersistent: %s\n"
1779 "%sPrivateNetwork: %s\n"
1780 "%sIgnoreSIGPIPE: %s\n",
1782 prefix, c->working_directory ? c->working_directory : "/",
1783 prefix, c->root_directory ? c->root_directory : "/",
1784 prefix, yes_no(c->non_blocking),
1785 prefix, yes_no(c->private_tmp),
1786 prefix, yes_no(c->control_group_modify),
1787 prefix, yes_no(c->control_group_persistent),
1788 prefix, yes_no(c->private_network),
1789 prefix, yes_no(c->ignore_sigpipe));
1791 STRV_FOREACH(e, c->environment)
1792 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1794 STRV_FOREACH(e, c->environment_files)
1795 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1797 if (c->tcpwrap_name)
1799 "%sTCPWrapName: %s\n",
1800 prefix, c->tcpwrap_name);
1807 if (c->oom_score_adjust_set)
1809 "%sOOMScoreAdjust: %i\n",
1810 prefix, c->oom_score_adjust);
1812 for (i = 0; i < RLIM_NLIMITS; i++)
1814 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1816 if (c->ioprio_set) {
1820 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1824 "%sIOSchedulingClass: %s\n"
1825 "%sIOPriority: %i\n",
1826 prefix, strna(class_str),
1827 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1831 if (c->cpu_sched_set) {
1835 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1839 "%sCPUSchedulingPolicy: %s\n"
1840 "%sCPUSchedulingPriority: %i\n"
1841 "%sCPUSchedulingResetOnFork: %s\n",
1842 prefix, strna(policy_str),
1843 prefix, c->cpu_sched_priority,
1844 prefix, yes_no(c->cpu_sched_reset_on_fork));
1849 fprintf(f, "%sCPUAffinity:", prefix);
1850 for (i = 0; i < c->cpuset_ncpus; i++)
1851 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1852 fprintf(f, " %i", i);
1856 if (c->timer_slack_nsec != (nsec_t) -1)
1857 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1860 "%sStandardInput: %s\n"
1861 "%sStandardOutput: %s\n"
1862 "%sStandardError: %s\n",
1863 prefix, exec_input_to_string(c->std_input),
1864 prefix, exec_output_to_string(c->std_output),
1865 prefix, exec_output_to_string(c->std_error));
1871 "%sTTYVHangup: %s\n"
1872 "%sTTYVTDisallocate: %s\n",
1873 prefix, c->tty_path,
1874 prefix, yes_no(c->tty_reset),
1875 prefix, yes_no(c->tty_vhangup),
1876 prefix, yes_no(c->tty_vt_disallocate));
1878 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1879 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1880 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1881 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1882 char *fac_str, *lvl_str;
1885 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1889 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1894 "%sSyslogFacility: %s\n"
1895 "%sSyslogLevel: %s\n",
1896 prefix, strna(fac_str),
1897 prefix, strna(lvl_str));
1902 if (c->capabilities) {
1904 if ((t = cap_to_text(c->capabilities, NULL))) {
1905 fprintf(f, "%sCapabilities: %s\n",
1912 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1914 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1915 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1916 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1917 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1918 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1919 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1921 if (c->capability_bounding_set_drop) {
1923 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1925 for (l = 0; l <= cap_last_cap(); l++)
1926 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1929 if ((t = cap_to_name(l))) {
1930 fprintf(f, " %s", t);
1939 fprintf(f, "%sUser: %s\n", prefix, c->user);
1941 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1943 if (strv_length(c->supplementary_groups) > 0) {
1944 fprintf(f, "%sSupplementaryGroups:", prefix);
1945 strv_fprintf(f, c->supplementary_groups);
1950 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1952 if (strv_length(c->read_write_dirs) > 0) {
1953 fprintf(f, "%sReadWriteDirs:", prefix);
1954 strv_fprintf(f, c->read_write_dirs);
1958 if (strv_length(c->read_only_dirs) > 0) {
1959 fprintf(f, "%sReadOnlyDirs:", prefix);
1960 strv_fprintf(f, c->read_only_dirs);
1964 if (strv_length(c->inaccessible_dirs) > 0) {
1965 fprintf(f, "%sInaccessibleDirs:", prefix);
1966 strv_fprintf(f, c->inaccessible_dirs);
1972 "%sUtmpIdentifier: %s\n",
1973 prefix, c->utmp_id);
1976 void exec_status_start(ExecStatus *s, pid_t pid) {
1981 dual_timestamp_get(&s->start_timestamp);
1984 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1987 if (s->pid && s->pid != pid)
1991 dual_timestamp_get(&s->exit_timestamp);
1997 if (context->utmp_id)
1998 utmp_put_dead_process(context->utmp_id, pid, code, status);
2000 exec_context_tty_reset(context);
2004 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2005 char buf[FORMAT_TIMESTAMP_MAX];
2018 prefix, (unsigned long) s->pid);
2020 if (s->start_timestamp.realtime > 0)
2022 "%sStart Timestamp: %s\n",
2023 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2025 if (s->exit_timestamp.realtime > 0)
2027 "%sExit Timestamp: %s\n"
2029 "%sExit Status: %i\n",
2030 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2031 prefix, sigchld_code_to_string(s->code),
2035 char *exec_command_line(char **argv) {
2043 STRV_FOREACH(a, argv)
2046 if (!(n = new(char, k)))
2050 STRV_FOREACH(a, argv) {
2057 if (strpbrk(*a, WHITESPACE)) {
2068 /* FIXME: this doesn't really handle arguments that have
2069 * spaces and ticks in them */
2074 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2076 const char *prefix2;
2085 p2 = strappend(prefix, "\t");
2086 prefix2 = p2 ? p2 : prefix;
2088 cmd = exec_command_line(c->argv);
2091 "%sCommand Line: %s\n",
2092 prefix, cmd ? cmd : strerror(ENOMEM));
2096 exec_status_dump(&c->exec_status, f, prefix2);
2101 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2107 LIST_FOREACH(command, c, c)
2108 exec_command_dump(c, f, prefix);
2111 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2118 /* It's kind of important, that we keep the order here */
2119 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2120 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2125 int exec_command_set(ExecCommand *c, const char *path, ...) {
2133 l = strv_new_ap(path, ap);
2139 if (!(p = strdup(path))) {
2153 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2154 [EXEC_INPUT_NULL] = "null",
2155 [EXEC_INPUT_TTY] = "tty",
2156 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2157 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2158 [EXEC_INPUT_SOCKET] = "socket"
2161 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2163 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2164 [EXEC_OUTPUT_INHERIT] = "inherit",
2165 [EXEC_OUTPUT_NULL] = "null",
2166 [EXEC_OUTPUT_TTY] = "tty",
2167 [EXEC_OUTPUT_SYSLOG] = "syslog",
2168 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2169 [EXEC_OUTPUT_KMSG] = "kmsg",
2170 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2171 [EXEC_OUTPUT_JOURNAL] = "journal",
2172 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2173 [EXEC_OUTPUT_SOCKET] = "socket"
2176 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);