1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
46 #include <security/pam_appl.h>
52 #include "capability.h"
55 #include "sd-messages.h"
57 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
71 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
74 /* This assumes there is a 'tty' group */
77 static int shift_fds(int fds[], unsigned n_fds) {
78 int start, restart_from;
83 /* Modifies the fds array! (sorts it) */
93 for (i = start; i < (int) n_fds; i++) {
96 /* Already at right index? */
100 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
103 close_nointr_nofail(fds[i]);
106 /* Hmm, the fd we wanted isn't free? Then
107 * let's remember that and try again from here*/
108 if (nfd != i+3 && restart_from < 0)
112 if (restart_from < 0)
115 start = restart_from;
121 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
130 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
132 for (i = 0; i < n_fds; i++) {
134 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
137 /* We unconditionally drop FD_CLOEXEC from the fds,
138 * since after all we want to pass these fds to our
141 if ((r = fd_cloexec(fds[i], false)) < 0)
148 _pure_ static const char *tty_path(const ExecContext *context) {
151 if (context->tty_path)
152 return context->tty_path;
154 return "/dev/console";
157 void exec_context_tty_reset(const ExecContext *context) {
160 if (context->tty_vhangup)
161 terminal_vhangup(tty_path(context));
163 if (context->tty_reset)
164 reset_terminal(tty_path(context));
166 if (context->tty_vt_disallocate && context->tty_path)
167 vt_disallocate(context->tty_path);
170 static bool is_terminal_output(ExecOutput o) {
172 o == EXEC_OUTPUT_TTY ||
173 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
174 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
175 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
178 void exec_context_serialize(const ExecContext *context, Unit *u, FILE *f) {
183 if (context->tmp_dir)
184 unit_serialize_item(u, f, "tmp-dir", context->tmp_dir);
186 if (context->var_tmp_dir)
187 unit_serialize_item(u, f, "var-tmp-dir", context->var_tmp_dir);
190 static int open_null_as(int flags, int nfd) {
195 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
199 r = dup2(fd, nfd) < 0 ? -errno : nfd;
200 close_nointr_nofail(fd);
207 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
209 union sockaddr_union sa = {
210 .un.sun_family = AF_UNIX,
211 .un.sun_path = "/run/systemd/journal/stdout",
215 assert(output < _EXEC_OUTPUT_MAX);
219 fd = socket(AF_UNIX, SOCK_STREAM, 0);
223 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
225 close_nointr_nofail(fd);
229 if (shutdown(fd, SHUT_RD) < 0) {
230 close_nointr_nofail(fd);
242 context->syslog_identifier ? context->syslog_identifier : ident,
244 context->syslog_priority,
245 !!context->syslog_level_prefix,
246 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
247 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
248 is_terminal_output(output));
251 r = dup2(fd, nfd) < 0 ? -errno : nfd;
252 close_nointr_nofail(fd);
258 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
264 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
268 r = dup2(fd, nfd) < 0 ? -errno : nfd;
269 close_nointr_nofail(fd);
276 static bool is_terminal_input(ExecInput i) {
278 i == EXEC_INPUT_TTY ||
279 i == EXEC_INPUT_TTY_FORCE ||
280 i == EXEC_INPUT_TTY_FAIL;
283 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
285 if (is_terminal_input(std_input) && !apply_tty_stdin)
286 return EXEC_INPUT_NULL;
288 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
289 return EXEC_INPUT_NULL;
294 static int fixup_output(ExecOutput std_output, int socket_fd) {
296 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
297 return EXEC_OUTPUT_INHERIT;
302 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
307 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
311 case EXEC_INPUT_NULL:
312 return open_null_as(O_RDONLY, STDIN_FILENO);
315 case EXEC_INPUT_TTY_FORCE:
316 case EXEC_INPUT_TTY_FAIL: {
319 if ((fd = acquire_terminal(
321 i == EXEC_INPUT_TTY_FAIL,
322 i == EXEC_INPUT_TTY_FORCE,
327 if (fd != STDIN_FILENO) {
328 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
329 close_nointr_nofail(fd);
336 case EXEC_INPUT_SOCKET:
337 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
340 assert_not_reached("Unknown input type");
344 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
352 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
353 o = fixup_output(context->std_output, socket_fd);
355 if (fileno == STDERR_FILENO) {
357 e = fixup_output(context->std_error, socket_fd);
359 /* This expects the input and output are already set up */
361 /* Don't change the stderr file descriptor if we inherit all
362 * the way and are not on a tty */
363 if (e == EXEC_OUTPUT_INHERIT &&
364 o == EXEC_OUTPUT_INHERIT &&
365 i == EXEC_INPUT_NULL &&
366 !is_terminal_input(context->std_input) &&
370 /* Duplicate from stdout if possible */
371 if (e == o || e == EXEC_OUTPUT_INHERIT)
372 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
376 } else if (o == EXEC_OUTPUT_INHERIT) {
377 /* If input got downgraded, inherit the original value */
378 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
379 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
381 /* If the input is connected to anything that's not a /dev/null, inherit that... */
382 if (i != EXEC_INPUT_NULL)
383 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
385 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
389 /* We need to open /dev/null here anew, to get the right access mode. */
390 return open_null_as(O_WRONLY, fileno);
395 case EXEC_OUTPUT_NULL:
396 return open_null_as(O_WRONLY, fileno);
398 case EXEC_OUTPUT_TTY:
399 if (is_terminal_input(i))
400 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
402 /* We don't reset the terminal if this is just about output */
403 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
405 case EXEC_OUTPUT_SYSLOG:
406 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
407 case EXEC_OUTPUT_KMSG:
408 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
409 case EXEC_OUTPUT_JOURNAL:
410 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
411 r = connect_logger_as(context, o, ident, unit_id, fileno);
413 log_struct_unit(LOG_CRIT, unit_id,
414 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
415 fileno == STDOUT_FILENO ? "out" : "err",
416 unit_id, strerror(-r),
419 r = open_null_as(O_WRONLY, fileno);
423 case EXEC_OUTPUT_SOCKET:
424 assert(socket_fd >= 0);
425 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
428 assert_not_reached("Unknown error type");
432 static int chown_terminal(int fd, uid_t uid) {
437 /* This might fail. What matters are the results. */
438 (void) fchown(fd, uid, -1);
439 (void) fchmod(fd, TTY_MODE);
441 if (fstat(fd, &st) < 0)
444 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
450 static int setup_confirm_stdio(int *_saved_stdin,
451 int *_saved_stdout) {
452 int fd = -1, saved_stdin, saved_stdout = -1, r;
454 assert(_saved_stdin);
455 assert(_saved_stdout);
457 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
461 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
462 if (saved_stdout < 0) {
467 fd = acquire_terminal(
472 DEFAULT_CONFIRM_USEC);
478 r = chown_terminal(fd, getuid());
482 if (dup2(fd, STDIN_FILENO) < 0) {
487 if (dup2(fd, STDOUT_FILENO) < 0) {
493 close_nointr_nofail(fd);
495 *_saved_stdin = saved_stdin;
496 *_saved_stdout = saved_stdout;
501 if (saved_stdout >= 0)
502 close_nointr_nofail(saved_stdout);
504 if (saved_stdin >= 0)
505 close_nointr_nofail(saved_stdin);
508 close_nointr_nofail(fd);
513 _printf_attr_(1, 2) static int write_confirm_message(const char *format, ...) {
519 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
523 va_start(ap, format);
524 vdprintf(fd, format, ap);
527 close_nointr_nofail(fd);
532 static int restore_confirm_stdio(int *saved_stdin,
538 assert(saved_stdout);
542 if (*saved_stdin >= 0)
543 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
546 if (*saved_stdout >= 0)
547 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550 if (*saved_stdin >= 0)
551 close_nointr_nofail(*saved_stdin);
553 if (*saved_stdout >= 0)
554 close_nointr_nofail(*saved_stdout);
559 static int ask_for_confirmation(char *response, char **argv) {
560 int saved_stdout = -1, saved_stdin = -1, r;
563 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
567 line = exec_command_line(argv);
571 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
574 restore_confirm_stdio(&saved_stdin, &saved_stdout);
579 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
580 bool keep_groups = false;
585 /* Lookup and set GID and supplementary group list. Here too
586 * we avoid NSS lookups for gid=0. */
588 if (context->group || username) {
590 if (context->group) {
591 const char *g = context->group;
593 if ((r = get_group_creds(&g, &gid)) < 0)
597 /* First step, initialize groups from /etc/groups */
598 if (username && gid != 0) {
599 if (initgroups(username, gid) < 0)
605 /* Second step, set our gids */
606 if (setresgid(gid, gid, gid) < 0)
610 if (context->supplementary_groups) {
615 /* Final step, initialize any manually set supplementary groups */
616 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
618 if (!(gids = new(gid_t, ngroups_max)))
622 if ((k = getgroups(ngroups_max, gids)) < 0) {
629 STRV_FOREACH(i, context->supplementary_groups) {
632 if (k >= ngroups_max) {
638 r = get_group_creds(&g, gids+k);
647 if (setgroups(k, gids) < 0) {
658 static int enforce_user(const ExecContext *context, uid_t uid) {
662 /* Sets (but doesn't lookup) the uid and make sure we keep the
663 * capabilities while doing so. */
665 if (context->capabilities) {
667 static const cap_value_t bits[] = {
668 CAP_SETUID, /* Necessary so that we can run setresuid() below */
669 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
672 /* First step: If we need to keep capabilities but
673 * drop privileges we need to make sure we keep our
674 * caps, while we drop privileges. */
676 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
678 if (prctl(PR_GET_SECUREBITS) != sb)
679 if (prctl(PR_SET_SECUREBITS, sb) < 0)
683 /* Second step: set the capabilities. This will reduce
684 * the capabilities to the minimum we need. */
686 if (!(d = cap_dup(context->capabilities)))
689 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
690 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
696 if (cap_set_proc(d) < 0) {
705 /* Third step: actually set the uids */
706 if (setresuid(uid, uid, uid) < 0)
709 /* At this point we should have all necessary capabilities but
710 are otherwise a normal user. However, the caps might got
711 corrupted due to the setresuid() so we need clean them up
712 later. This is done outside of this call. */
719 static int null_conv(
721 const struct pam_message **msg,
722 struct pam_response **resp,
725 /* We don't support conversations */
730 static int setup_pam(
736 int fds[], unsigned n_fds) {
738 static const struct pam_conv conv = {
743 pam_handle_t *handle = NULL;
745 int pam_code = PAM_SUCCESS;
748 bool close_session = false;
749 pid_t pam_pid = 0, parent_pid;
755 /* We set up PAM in the parent process, then fork. The child
756 * will then stay around until killed via PR_GET_PDEATHSIG or
757 * systemd via the cgroup logic. It will then remove the PAM
758 * session again. The parent process will exec() the actual
759 * daemon. We do things this way to ensure that the main PID
760 * of the daemon is the one we initially fork()ed. */
762 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
768 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
771 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
774 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
777 close_session = true;
779 if ((!(e = pam_getenvlist(handle)))) {
780 pam_code = PAM_BUF_ERR;
784 /* Block SIGTERM, so that we know that it won't get lost in
786 if (sigemptyset(&ss) < 0 ||
787 sigaddset(&ss, SIGTERM) < 0 ||
788 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
791 parent_pid = getpid();
793 if ((pam_pid = fork()) < 0)
800 /* The child's job is to reset the PAM session on
803 /* This string must fit in 10 chars (i.e. the length
804 * of "/sbin/init"), to look pretty in /bin/ps */
805 rename_process("(sd-pam)");
807 /* Make sure we don't keep open the passed fds in this
808 child. We assume that otherwise only those fds are
809 open here that have been opened by PAM. */
810 close_many(fds, n_fds);
812 /* Drop privileges - we don't need any to pam_close_session
813 * and this will make PR_SET_PDEATHSIG work in most cases.
814 * If this fails, ignore the error - but expect sd-pam threads
815 * to fail to exit normally */
816 if (setresuid(uid, uid, uid) < 0)
817 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
819 /* Wait until our parent died. This will only work if
820 * the above setresuid() succeeds, otherwise the kernel
821 * will not allow unprivileged parents kill their privileged
822 * children this way. We rely on the control groups kill logic
823 * to do the rest for us. */
824 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
827 /* Check if our parent process might already have
829 if (getppid() == parent_pid) {
831 if (sigwait(&ss, &sig) < 0) {
838 assert(sig == SIGTERM);
843 /* If our parent died we'll end the session */
844 if (getppid() != parent_pid)
845 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
851 pam_end(handle, pam_code | PAM_DATA_SILENT);
855 /* If the child was forked off successfully it will do all the
856 * cleanups, so forget about the handle here. */
859 /* Unblock SIGTERM again in the parent */
860 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
863 /* We close the log explicitly here, since the PAM modules
864 * might have opened it, but we don't want this fd around. */
873 if (pam_code != PAM_SUCCESS)
874 err = -EPERM; /* PAM errors do not map to errno */
880 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
882 pam_end(handle, pam_code | PAM_DATA_SILENT);
890 kill(pam_pid, SIGTERM);
891 kill(pam_pid, SIGCONT);
898 static void rename_process_from_path(const char *path) {
899 char process_name[11];
903 /* This resulting string must fit in 10 chars (i.e. the length
904 * of "/sbin/init") to look pretty in /bin/ps */
906 p = path_get_file_name(path);
908 rename_process("(...)");
914 /* The end of the process name is usually more
915 * interesting, since the first bit might just be
921 process_name[0] = '(';
922 memcpy(process_name+1, p, l);
923 process_name[1+l] = ')';
924 process_name[1+l+1] = 0;
926 rename_process(process_name);
929 static int apply_seccomp(uint32_t *syscall_filter) {
930 static const struct sock_filter header[] = {
931 VALIDATE_ARCHITECTURE,
934 static const struct sock_filter footer[] = {
940 struct sock_filter *f;
941 struct sock_fprog prog = {};
943 assert(syscall_filter);
945 /* First: count the syscalls to check for */
946 for (i = 0, n = 0; i < syscall_max(); i++)
947 if (syscall_filter[i >> 4] & (1 << (i & 31)))
950 /* Second: build the filter program from a header the syscall
951 * matches and the footer */
952 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
953 memcpy(f, header, sizeof(header));
955 for (i = 0, n = 0; i < syscall_max(); i++)
956 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
957 struct sock_filter item[] = {
958 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
959 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
962 assert_cc(ELEMENTSOF(item) == 2);
964 f[ELEMENTSOF(header) + 2*n] = item[0];
965 f[ELEMENTSOF(header) + 2*n+1] = item[1];
970 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
972 /* Third: install the filter */
973 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
975 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
981 static void do_idle_pipe_dance(int idle_pipe[4]) {
984 if (idle_pipe[1] >= 0)
985 close_nointr_nofail(idle_pipe[1]);
986 if (idle_pipe[2] >= 0)
987 close_nointr_nofail(idle_pipe[2]);
989 if (idle_pipe[0] >= 0) {
992 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
994 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
995 /* Signal systemd that we are bored and want to continue. */
996 write(idle_pipe[3], "x", 1);
998 /* Wait for systemd to react to the signal above. */
999 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1002 close_nointr_nofail(idle_pipe[0]);
1006 if (idle_pipe[3] >= 0)
1007 close_nointr_nofail(idle_pipe[3]);
1010 int exec_spawn(ExecCommand *command,
1012 ExecContext *context,
1013 int fds[], unsigned n_fds,
1015 bool apply_permissions,
1017 bool apply_tty_stdin,
1019 CGroupControllerMask cgroup_mask,
1020 const char *cgroup_path,
1021 const char *unit_id,
1025 _cleanup_strv_free_ char **files_env = NULL;
1034 assert(fds || n_fds <= 0);
1036 if (context->std_input == EXEC_INPUT_SOCKET ||
1037 context->std_output == EXEC_OUTPUT_SOCKET ||
1038 context->std_error == EXEC_OUTPUT_SOCKET) {
1050 r = exec_context_load_environment(context, &files_env);
1052 log_struct_unit(LOG_ERR,
1054 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1061 argv = command->argv;
1063 line = exec_command_line(argv);
1067 log_struct_unit(LOG_DEBUG,
1069 "EXECUTABLE=%s", command->path,
1070 "MESSAGE=About to execute: %s", line,
1074 if (context->private_tmp && !context->tmp_dir && !context->var_tmp_dir) {
1075 r = setup_tmpdirs(&context->tmp_dir, &context->var_tmp_dir);
1087 const char *username = NULL, *home = NULL;
1088 uid_t uid = (uid_t) -1;
1089 gid_t gid = (gid_t) -1;
1090 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL,
1091 **final_env = NULL, **final_argv = NULL;
1096 rename_process_from_path(command->path);
1098 /* We reset exactly these signals, since they are the
1099 * only ones we set to SIG_IGN in the main daemon. All
1100 * others we leave untouched because we set them to
1101 * SIG_DFL or a valid handler initially, both of which
1102 * will be demoted to SIG_DFL. */
1103 default_signals(SIGNALS_CRASH_HANDLER,
1104 SIGNALS_IGNORE, -1);
1106 if (context->ignore_sigpipe)
1107 ignore_signals(SIGPIPE, -1);
1109 assert_se(sigemptyset(&ss) == 0);
1110 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1112 r = EXIT_SIGNAL_MASK;
1117 do_idle_pipe_dance(idle_pipe);
1119 /* Close sockets very early to make sure we don't
1120 * block init reexecution because it cannot bind its
1123 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1124 socket_fd >= 0 ? 1 : n_fds);
1130 if (!context->same_pgrp)
1137 if (context->tcpwrap_name) {
1139 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1145 for (i = 0; i < (int) n_fds; i++) {
1146 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1154 exec_context_tty_reset(context);
1156 if (confirm_spawn) {
1159 err = ask_for_confirmation(&response, argv);
1160 if (err == -ETIMEDOUT)
1161 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1163 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1164 else if (response == 's') {
1165 write_confirm_message("Skipping execution.\n");
1169 } else if (response == 'n') {
1170 write_confirm_message("Failing execution.\n");
1176 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1177 * must sure to drop O_NONBLOCK */
1179 fd_nonblock(socket_fd, false);
1181 err = setup_input(context, socket_fd, apply_tty_stdin);
1187 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1193 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1200 err = cg_attach_with_mask(cgroup_mask, cgroup_path, 0);
1207 if (context->oom_score_adjust_set) {
1210 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1213 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1215 r = EXIT_OOM_ADJUST;
1220 if (context->nice_set)
1221 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1227 if (context->cpu_sched_set) {
1228 struct sched_param param = {
1229 .sched_priority = context->cpu_sched_priority,
1232 r = sched_setscheduler(0,
1233 context->cpu_sched_policy |
1234 (context->cpu_sched_reset_on_fork ?
1235 SCHED_RESET_ON_FORK : 0),
1239 r = EXIT_SETSCHEDULER;
1244 if (context->cpuset)
1245 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1247 r = EXIT_CPUAFFINITY;
1251 if (context->ioprio_set)
1252 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1258 if (context->timer_slack_nsec != (nsec_t) -1)
1259 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1261 r = EXIT_TIMERSLACK;
1265 if (context->utmp_id)
1266 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1268 if (context->user) {
1269 username = context->user;
1270 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1276 if (is_terminal_input(context->std_input)) {
1277 err = chown_terminal(STDIN_FILENO, uid);
1286 if (cgroup_path && context->user && context->pam_name) {
1287 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0644, uid, gid);
1294 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, 0755, uid, gid);
1302 if (apply_permissions) {
1303 err = enforce_groups(context, username, gid);
1310 umask(context->umask);
1313 if (apply_permissions && context->pam_name && username) {
1314 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1321 if (context->private_network) {
1322 if (unshare(CLONE_NEWNET) < 0) {
1331 if (strv_length(context->read_write_dirs) > 0 ||
1332 strv_length(context->read_only_dirs) > 0 ||
1333 strv_length(context->inaccessible_dirs) > 0 ||
1334 context->mount_flags != 0 ||
1335 context->private_tmp) {
1336 err = setup_namespace(context->read_write_dirs,
1337 context->read_only_dirs,
1338 context->inaccessible_dirs,
1340 context->var_tmp_dir,
1341 context->private_tmp,
1342 context->mount_flags);
1350 if (context->root_directory)
1351 if (chroot(context->root_directory) < 0) {
1357 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1363 _cleanup_free_ char *d = NULL;
1365 if (asprintf(&d, "%s/%s",
1366 context->root_directory ? context->root_directory : "",
1367 context->working_directory ? context->working_directory : "") < 0) {
1380 /* We repeat the fd closing here, to make sure that
1381 * nothing is leaked from the PAM modules */
1382 err = close_all_fds(fds, n_fds);
1384 err = shift_fds(fds, n_fds);
1386 err = flags_fds(fds, n_fds, context->non_blocking);
1392 if (apply_permissions) {
1394 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1395 if (!context->rlimit[i])
1398 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1405 if (context->capability_bounding_set_drop) {
1406 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1408 r = EXIT_CAPABILITIES;
1413 if (context->user) {
1414 err = enforce_user(context, uid);
1421 /* PR_GET_SECUREBITS is not privileged, while
1422 * PR_SET_SECUREBITS is. So to suppress
1423 * potential EPERMs we'll try not to call
1424 * PR_SET_SECUREBITS unless necessary. */
1425 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1426 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1428 r = EXIT_SECUREBITS;
1432 if (context->capabilities)
1433 if (cap_set_proc(context->capabilities) < 0) {
1435 r = EXIT_CAPABILITIES;
1439 if (context->no_new_privileges)
1440 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1442 r = EXIT_NO_NEW_PRIVILEGES;
1446 if (context->syscall_filter) {
1447 err = apply_seccomp(context->syscall_filter);
1455 our_env = new0(char*, 7);
1463 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1464 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1471 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1478 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1479 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1485 if (is_terminal_input(context->std_input) ||
1486 context->std_output == EXEC_OUTPUT_TTY ||
1487 context->std_error == EXEC_OUTPUT_TTY)
1488 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1496 final_env = strv_env_merge(5,
1499 context->environment,
1509 final_argv = replace_env_argv(argv, final_env);
1516 final_env = strv_env_clean(final_env);
1518 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1519 line = exec_command_line(final_argv);
1522 log_struct_unit(LOG_DEBUG,
1524 "EXECUTABLE=%s", command->path,
1525 "MESSAGE=Executing: %s", line,
1532 execve(command->path, final_argv, final_env);
1539 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1540 "EXECUTABLE=%s", command->path,
1541 "MESSAGE=Failed at step %s spawning %s: %s",
1542 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1543 command->path, strerror(-err),
1552 log_struct_unit(LOG_DEBUG,
1554 "MESSAGE=Forked %s as %lu",
1555 command->path, (unsigned long) pid,
1558 /* We add the new process to the cgroup both in the child (so
1559 * that we can be sure that no user code is ever executed
1560 * outside of the cgroup) and in the parent (so that we can be
1561 * sure that when we kill the cgroup the process will be
1564 cg_attach(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, pid);
1566 exec_status_start(&command->exec_status, pid);
1572 void exec_context_init(ExecContext *c) {
1576 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1577 c->cpu_sched_policy = SCHED_OTHER;
1578 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1579 c->syslog_level_prefix = true;
1580 c->ignore_sigpipe = true;
1581 c->timer_slack_nsec = (nsec_t) -1;
1584 void exec_context_tmp_dirs_done(ExecContext *c) {
1585 char* dirs[] = {c->tmp_dir ? c->tmp_dir : c->var_tmp_dir,
1586 c->tmp_dir ? c->var_tmp_dir : NULL,
1590 for(dirp = dirs; *dirp; dirp++) {
1594 r = rm_rf_dangerous(*dirp, false, true, false);
1595 dir = dirname(*dirp);
1597 log_warning("Failed to remove content of temporary directory %s: %s",
1602 log_warning("Failed to remove temporary directory %s: %s",
1609 c->tmp_dir = c->var_tmp_dir = NULL;
1612 void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) {
1617 strv_free(c->environment);
1618 c->environment = NULL;
1620 strv_free(c->environment_files);
1621 c->environment_files = NULL;
1623 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1625 c->rlimit[l] = NULL;
1628 free(c->working_directory);
1629 c->working_directory = NULL;
1630 free(c->root_directory);
1631 c->root_directory = NULL;
1636 free(c->tcpwrap_name);
1637 c->tcpwrap_name = NULL;
1639 free(c->syslog_identifier);
1640 c->syslog_identifier = NULL;
1648 strv_free(c->supplementary_groups);
1649 c->supplementary_groups = NULL;
1654 if (c->capabilities) {
1655 cap_free(c->capabilities);
1656 c->capabilities = NULL;
1659 strv_free(c->read_only_dirs);
1660 c->read_only_dirs = NULL;
1662 strv_free(c->read_write_dirs);
1663 c->read_write_dirs = NULL;
1665 strv_free(c->inaccessible_dirs);
1666 c->inaccessible_dirs = NULL;
1669 CPU_FREE(c->cpuset);
1674 free(c->syscall_filter);
1675 c->syscall_filter = NULL;
1677 if (!reloading_or_reexecuting)
1678 exec_context_tmp_dirs_done(c);
1681 void exec_command_done(ExecCommand *c) {
1691 void exec_command_done_array(ExecCommand *c, unsigned n) {
1694 for (i = 0; i < n; i++)
1695 exec_command_done(c+i);
1698 void exec_command_free_list(ExecCommand *c) {
1702 LIST_REMOVE(ExecCommand, command, c, i);
1703 exec_command_done(i);
1708 void exec_command_free_array(ExecCommand **c, unsigned n) {
1711 for (i = 0; i < n; i++) {
1712 exec_command_free_list(c[i]);
1717 int exec_context_load_environment(const ExecContext *c, char ***l) {
1718 char **i, **r = NULL;
1723 STRV_FOREACH(i, c->environment_files) {
1726 bool ignore = false;
1728 _cleanup_globfree_ glob_t pglob = {};
1738 if (!path_is_absolute(fn)) {
1746 /* Filename supports globbing, take all matching files */
1748 if (glob(fn, 0, NULL, &pglob) != 0) {
1753 return errno ? -errno : -EINVAL;
1755 count = pglob.gl_pathc;
1763 for (n = 0; n < count; n++) {
1764 k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1772 /* Log invalid environment variables with filename */
1774 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1781 m = strv_env_merge(2, r, p);
1797 static bool tty_may_match_dev_console(const char *tty) {
1798 char *active = NULL, *console;
1801 if (startswith(tty, "/dev/"))
1804 /* trivial identity? */
1805 if (streq(tty, "console"))
1808 console = resolve_dev_console(&active);
1809 /* if we could not resolve, assume it may */
1813 /* "tty0" means the active VC, so it may be the same sometimes */
1814 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1820 bool exec_context_may_touch_console(ExecContext *ec) {
1821 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1822 is_terminal_input(ec->std_input) ||
1823 is_terminal_output(ec->std_output) ||
1824 is_terminal_output(ec->std_error)) &&
1825 tty_may_match_dev_console(tty_path(ec));
1828 static void strv_fprintf(FILE *f, char **l) {
1834 fprintf(f, " %s", *g);
1837 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1844 prefix = strempty(prefix);
1848 "%sWorkingDirectory: %s\n"
1849 "%sRootDirectory: %s\n"
1850 "%sNonBlocking: %s\n"
1851 "%sPrivateTmp: %s\n"
1852 "%sPrivateNetwork: %s\n"
1853 "%sIgnoreSIGPIPE: %s\n",
1855 prefix, c->working_directory ? c->working_directory : "/",
1856 prefix, c->root_directory ? c->root_directory : "/",
1857 prefix, yes_no(c->non_blocking),
1858 prefix, yes_no(c->private_tmp),
1859 prefix, yes_no(c->private_network),
1860 prefix, yes_no(c->ignore_sigpipe));
1862 STRV_FOREACH(e, c->environment)
1863 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1865 STRV_FOREACH(e, c->environment_files)
1866 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1868 if (c->tcpwrap_name)
1870 "%sTCPWrapName: %s\n",
1871 prefix, c->tcpwrap_name);
1878 if (c->oom_score_adjust_set)
1880 "%sOOMScoreAdjust: %i\n",
1881 prefix, c->oom_score_adjust);
1883 for (i = 0; i < RLIM_NLIMITS; i++)
1885 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1887 if (c->ioprio_set) {
1891 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1895 "%sIOSchedulingClass: %s\n"
1896 "%sIOPriority: %i\n",
1897 prefix, strna(class_str),
1898 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1902 if (c->cpu_sched_set) {
1906 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1910 "%sCPUSchedulingPolicy: %s\n"
1911 "%sCPUSchedulingPriority: %i\n"
1912 "%sCPUSchedulingResetOnFork: %s\n",
1913 prefix, strna(policy_str),
1914 prefix, c->cpu_sched_priority,
1915 prefix, yes_no(c->cpu_sched_reset_on_fork));
1920 fprintf(f, "%sCPUAffinity:", prefix);
1921 for (i = 0; i < c->cpuset_ncpus; i++)
1922 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1923 fprintf(f, " %i", i);
1927 if (c->timer_slack_nsec != (nsec_t) -1)
1928 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1931 "%sStandardInput: %s\n"
1932 "%sStandardOutput: %s\n"
1933 "%sStandardError: %s\n",
1934 prefix, exec_input_to_string(c->std_input),
1935 prefix, exec_output_to_string(c->std_output),
1936 prefix, exec_output_to_string(c->std_error));
1942 "%sTTYVHangup: %s\n"
1943 "%sTTYVTDisallocate: %s\n",
1944 prefix, c->tty_path,
1945 prefix, yes_no(c->tty_reset),
1946 prefix, yes_no(c->tty_vhangup),
1947 prefix, yes_no(c->tty_vt_disallocate));
1949 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1950 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1951 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1952 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1953 char *fac_str, *lvl_str;
1956 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1960 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1965 "%sSyslogFacility: %s\n"
1966 "%sSyslogLevel: %s\n",
1967 prefix, strna(fac_str),
1968 prefix, strna(lvl_str));
1973 if (c->capabilities) {
1975 if ((t = cap_to_text(c->capabilities, NULL))) {
1976 fprintf(f, "%sCapabilities: %s\n",
1983 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1985 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
1986 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1987 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1988 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1989 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
1990 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1992 if (c->capability_bounding_set_drop) {
1994 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1996 for (l = 0; l <= cap_last_cap(); l++)
1997 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2000 if ((t = cap_to_name(l))) {
2001 fprintf(f, " %s", t);
2010 fprintf(f, "%sUser: %s\n", prefix, c->user);
2012 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2014 if (strv_length(c->supplementary_groups) > 0) {
2015 fprintf(f, "%sSupplementaryGroups:", prefix);
2016 strv_fprintf(f, c->supplementary_groups);
2021 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2023 if (strv_length(c->read_write_dirs) > 0) {
2024 fprintf(f, "%sReadWriteDirs:", prefix);
2025 strv_fprintf(f, c->read_write_dirs);
2029 if (strv_length(c->read_only_dirs) > 0) {
2030 fprintf(f, "%sReadOnlyDirs:", prefix);
2031 strv_fprintf(f, c->read_only_dirs);
2035 if (strv_length(c->inaccessible_dirs) > 0) {
2036 fprintf(f, "%sInaccessibleDirs:", prefix);
2037 strv_fprintf(f, c->inaccessible_dirs);
2043 "%sUtmpIdentifier: %s\n",
2044 prefix, c->utmp_id);
2047 void exec_status_start(ExecStatus *s, pid_t pid) {
2052 dual_timestamp_get(&s->start_timestamp);
2055 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2058 if (s->pid && s->pid != pid)
2062 dual_timestamp_get(&s->exit_timestamp);
2068 if (context->utmp_id)
2069 utmp_put_dead_process(context->utmp_id, pid, code, status);
2071 exec_context_tty_reset(context);
2075 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2076 char buf[FORMAT_TIMESTAMP_MAX];
2089 prefix, (unsigned long) s->pid);
2091 if (s->start_timestamp.realtime > 0)
2093 "%sStart Timestamp: %s\n",
2094 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2096 if (s->exit_timestamp.realtime > 0)
2098 "%sExit Timestamp: %s\n"
2100 "%sExit Status: %i\n",
2101 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2102 prefix, sigchld_code_to_string(s->code),
2106 char *exec_command_line(char **argv) {
2114 STRV_FOREACH(a, argv)
2117 if (!(n = new(char, k)))
2121 STRV_FOREACH(a, argv) {
2128 if (strpbrk(*a, WHITESPACE)) {
2139 /* FIXME: this doesn't really handle arguments that have
2140 * spaces and ticks in them */
2145 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2147 const char *prefix2;
2156 p2 = strappend(prefix, "\t");
2157 prefix2 = p2 ? p2 : prefix;
2159 cmd = exec_command_line(c->argv);
2162 "%sCommand Line: %s\n",
2163 prefix, cmd ? cmd : strerror(ENOMEM));
2167 exec_status_dump(&c->exec_status, f, prefix2);
2172 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2178 LIST_FOREACH(command, c, c)
2179 exec_command_dump(c, f, prefix);
2182 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2189 /* It's kind of important, that we keep the order here */
2190 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2191 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2196 int exec_command_set(ExecCommand *c, const char *path, ...) {
2204 l = strv_new_ap(path, ap);
2210 if (!(p = strdup(path))) {
2224 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2225 [EXEC_INPUT_NULL] = "null",
2226 [EXEC_INPUT_TTY] = "tty",
2227 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2228 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2229 [EXEC_INPUT_SOCKET] = "socket"
2232 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2234 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2235 [EXEC_OUTPUT_INHERIT] = "inherit",
2236 [EXEC_OUTPUT_NULL] = "null",
2237 [EXEC_OUTPUT_TTY] = "tty",
2238 [EXEC_OUTPUT_SYSLOG] = "syslog",
2239 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2240 [EXEC_OUTPUT_KMSG] = "kmsg",
2241 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2242 [EXEC_OUTPUT_JOURNAL] = "journal",
2243 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2244 [EXEC_OUTPUT_SOCKET] = "socket"
2247 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);