1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static bool is_terminal_output(ExecOutput o) {
170 o == EXEC_OUTPUT_TTY ||
171 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
172 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
173 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
176 void exec_context_serialize(const ExecContext *context, Unit *u, FILE *f) {
181 if (context->tmp_dir)
182 unit_serialize_item(u, f, "tmp-dir", context->tmp_dir);
184 if (context->var_tmp_dir)
185 unit_serialize_item(u, f, "var-tmp-dir", context->var_tmp_dir);
188 static int open_null_as(int flags, int nfd) {
193 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
197 r = dup2(fd, nfd) < 0 ? -errno : nfd;
198 close_nointr_nofail(fd);
205 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
207 union sockaddr_union sa;
210 assert(output < _EXEC_OUTPUT_MAX);
214 fd = socket(AF_UNIX, SOCK_STREAM, 0);
219 sa.un.sun_family = AF_UNIX;
220 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
222 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
224 close_nointr_nofail(fd);
228 if (shutdown(fd, SHUT_RD) < 0) {
229 close_nointr_nofail(fd);
241 context->syslog_identifier ? context->syslog_identifier : ident,
243 context->syslog_priority,
244 !!context->syslog_level_prefix,
245 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
246 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
247 is_terminal_output(output));
250 r = dup2(fd, nfd) < 0 ? -errno : nfd;
251 close_nointr_nofail(fd);
257 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
263 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
267 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268 close_nointr_nofail(fd);
275 static bool is_terminal_input(ExecInput i) {
277 i == EXEC_INPUT_TTY ||
278 i == EXEC_INPUT_TTY_FORCE ||
279 i == EXEC_INPUT_TTY_FAIL;
282 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
284 if (is_terminal_input(std_input) && !apply_tty_stdin)
285 return EXEC_INPUT_NULL;
287 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
288 return EXEC_INPUT_NULL;
293 static int fixup_output(ExecOutput std_output, int socket_fd) {
295 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
296 return EXEC_OUTPUT_INHERIT;
301 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
306 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
310 case EXEC_INPUT_NULL:
311 return open_null_as(O_RDONLY, STDIN_FILENO);
314 case EXEC_INPUT_TTY_FORCE:
315 case EXEC_INPUT_TTY_FAIL: {
318 if ((fd = acquire_terminal(
320 i == EXEC_INPUT_TTY_FAIL,
321 i == EXEC_INPUT_TTY_FORCE,
326 if (fd != STDIN_FILENO) {
327 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
328 close_nointr_nofail(fd);
335 case EXEC_INPUT_SOCKET:
336 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
339 assert_not_reached("Unknown input type");
343 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
351 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
352 o = fixup_output(context->std_output, socket_fd);
354 if (fileno == STDERR_FILENO) {
356 e = fixup_output(context->std_error, socket_fd);
358 /* This expects the input and output are already set up */
360 /* Don't change the stderr file descriptor if we inherit all
361 * the way and are not on a tty */
362 if (e == EXEC_OUTPUT_INHERIT &&
363 o == EXEC_OUTPUT_INHERIT &&
364 i == EXEC_INPUT_NULL &&
365 !is_terminal_input(context->std_input) &&
369 /* Duplicate from stdout if possible */
370 if (e == o || e == EXEC_OUTPUT_INHERIT)
371 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
375 } else if (o == EXEC_OUTPUT_INHERIT) {
376 /* If input got downgraded, inherit the original value */
377 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
378 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
380 /* If the input is connected to anything that's not a /dev/null, inherit that... */
381 if (i != EXEC_INPUT_NULL)
382 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
384 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
388 /* We need to open /dev/null here anew, to get the right access mode. */
389 return open_null_as(O_WRONLY, fileno);
394 case EXEC_OUTPUT_NULL:
395 return open_null_as(O_WRONLY, fileno);
397 case EXEC_OUTPUT_TTY:
398 if (is_terminal_input(i))
399 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401 /* We don't reset the terminal if this is just about output */
402 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
404 case EXEC_OUTPUT_SYSLOG:
405 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
406 case EXEC_OUTPUT_KMSG:
407 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
408 case EXEC_OUTPUT_JOURNAL:
409 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
410 r = connect_logger_as(context, o, ident, unit_id, fileno);
412 log_struct_unit(LOG_CRIT, unit_id,
413 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
414 fileno == STDOUT_FILENO ? "out" : "err",
415 unit_id, strerror(-r),
418 r = open_null_as(O_WRONLY, fileno);
422 case EXEC_OUTPUT_SOCKET:
423 assert(socket_fd >= 0);
424 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
427 assert_not_reached("Unknown error type");
431 static int chown_terminal(int fd, uid_t uid) {
436 /* This might fail. What matters are the results. */
437 (void) fchown(fd, uid, -1);
438 (void) fchmod(fd, TTY_MODE);
440 if (fstat(fd, &st) < 0)
443 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
449 static int setup_confirm_stdio(int *_saved_stdin,
450 int *_saved_stdout) {
451 int fd = -1, saved_stdin, saved_stdout = -1, r;
453 assert(_saved_stdin);
454 assert(_saved_stdout);
456 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
461 if (saved_stdout < 0) {
466 fd = acquire_terminal(
471 DEFAULT_CONFIRM_USEC);
477 r = chown_terminal(fd, getuid());
481 if (dup2(fd, STDIN_FILENO) < 0) {
486 if (dup2(fd, STDOUT_FILENO) < 0) {
492 close_nointr_nofail(fd);
494 *_saved_stdin = saved_stdin;
495 *_saved_stdout = saved_stdout;
500 if (saved_stdout >= 0)
501 close_nointr_nofail(saved_stdout);
503 if (saved_stdin >= 0)
504 close_nointr_nofail(saved_stdin);
507 close_nointr_nofail(fd);
512 static int write_confirm_message(const char *format, ...) {
518 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522 va_start(ap, format);
523 vdprintf(fd, format, ap);
526 close_nointr_nofail(fd);
531 static int restore_confirm_stdio(int *saved_stdin,
537 assert(saved_stdout);
541 if (*saved_stdin >= 0)
542 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
545 if (*saved_stdout >= 0)
546 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
549 if (*saved_stdin >= 0)
550 close_nointr_nofail(*saved_stdin);
552 if (*saved_stdout >= 0)
553 close_nointr_nofail(*saved_stdout);
558 static int ask_for_confirmation(char *response, char **argv) {
559 int saved_stdout = -1, saved_stdin = -1, r;
562 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566 line = exec_command_line(argv);
570 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
573 restore_confirm_stdio(&saved_stdin, &saved_stdout);
578 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
579 bool keep_groups = false;
584 /* Lookup and set GID and supplementary group list. Here too
585 * we avoid NSS lookups for gid=0. */
587 if (context->group || username) {
589 if (context->group) {
590 const char *g = context->group;
592 if ((r = get_group_creds(&g, &gid)) < 0)
596 /* First step, initialize groups from /etc/groups */
597 if (username && gid != 0) {
598 if (initgroups(username, gid) < 0)
604 /* Second step, set our gids */
605 if (setresgid(gid, gid, gid) < 0)
609 if (context->supplementary_groups) {
614 /* Final step, initialize any manually set supplementary groups */
615 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
617 if (!(gids = new(gid_t, ngroups_max)))
621 if ((k = getgroups(ngroups_max, gids)) < 0) {
628 STRV_FOREACH(i, context->supplementary_groups) {
631 if (k >= ngroups_max) {
637 r = get_group_creds(&g, gids+k);
646 if (setgroups(k, gids) < 0) {
657 static int enforce_user(const ExecContext *context, uid_t uid) {
661 /* Sets (but doesn't lookup) the uid and make sure we keep the
662 * capabilities while doing so. */
664 if (context->capabilities) {
666 static const cap_value_t bits[] = {
667 CAP_SETUID, /* Necessary so that we can run setresuid() below */
668 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
671 /* First step: If we need to keep capabilities but
672 * drop privileges we need to make sure we keep our
673 * caps, whiel we drop privileges. */
675 int sb = context->secure_bits|SECURE_KEEP_CAPS;
677 if (prctl(PR_GET_SECUREBITS) != sb)
678 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682 /* Second step: set the capabilities. This will reduce
683 * the capabilities to the minimum we need. */
685 if (!(d = cap_dup(context->capabilities)))
688 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
689 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
695 if (cap_set_proc(d) < 0) {
704 /* Third step: actually set the uids */
705 if (setresuid(uid, uid, uid) < 0)
708 /* At this point we should have all necessary capabilities but
709 are otherwise a normal user. However, the caps might got
710 corrupted due to the setresuid() so we need clean them up
711 later. This is done outside of this call. */
718 static int null_conv(
720 const struct pam_message **msg,
721 struct pam_response **resp,
724 /* We don't support conversations */
729 static int setup_pam(
735 int fds[], unsigned n_fds) {
737 static const struct pam_conv conv = {
742 pam_handle_t *handle = NULL;
744 int pam_code = PAM_SUCCESS;
747 bool close_session = false;
748 pid_t pam_pid = 0, parent_pid;
754 /* We set up PAM in the parent process, then fork. The child
755 * will then stay around until killed via PR_GET_PDEATHSIG or
756 * systemd via the cgroup logic. It will then remove the PAM
757 * session again. The parent process will exec() the actual
758 * daemon. We do things this way to ensure that the main PID
759 * of the daemon is the one we initially fork()ed. */
761 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
767 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
770 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
773 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
776 close_session = true;
778 if ((!(e = pam_getenvlist(handle)))) {
779 pam_code = PAM_BUF_ERR;
783 /* Block SIGTERM, so that we know that it won't get lost in
785 if (sigemptyset(&ss) < 0 ||
786 sigaddset(&ss, SIGTERM) < 0 ||
787 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
790 parent_pid = getpid();
792 if ((pam_pid = fork()) < 0)
799 /* The child's job is to reset the PAM session on
802 /* This string must fit in 10 chars (i.e. the length
803 * of "/sbin/init"), to look pretty in /bin/ps */
804 rename_process("(sd-pam)");
806 /* Make sure we don't keep open the passed fds in this
807 child. We assume that otherwise only those fds are
808 open here that have been opened by PAM. */
809 close_many(fds, n_fds);
811 /* Drop privileges - we don't need any to pam_close_session
812 * and this will make PR_SET_PDEATHSIG work in most cases.
813 * If this fails, ignore the error - but expect sd-pam threads
814 * to fail to exit normally */
815 if (setresuid(uid, uid, uid) < 0)
816 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
818 /* Wait until our parent died. This will only work if
819 * the above setresuid() succeeds, otherwise the kernel
820 * will not allow unprivileged parents kill their privileged
821 * children this way. We rely on the control groups kill logic
822 * to do the rest for us. */
823 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
826 /* Check if our parent process might already have
828 if (getppid() == parent_pid) {
830 if (sigwait(&ss, &sig) < 0) {
837 assert(sig == SIGTERM);
842 /* If our parent died we'll end the session */
843 if (getppid() != parent_pid)
844 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
850 pam_end(handle, pam_code | PAM_DATA_SILENT);
854 /* If the child was forked off successfully it will do all the
855 * cleanups, so forget about the handle here. */
858 /* Unblock SIGTERM again in the parent */
859 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
862 /* We close the log explicitly here, since the PAM modules
863 * might have opened it, but we don't want this fd around. */
872 if (pam_code != PAM_SUCCESS)
873 err = -EPERM; /* PAM errors do not map to errno */
879 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
881 pam_end(handle, pam_code | PAM_DATA_SILENT);
889 kill(pam_pid, SIGTERM);
890 kill(pam_pid, SIGCONT);
897 static void rename_process_from_path(const char *path) {
898 char process_name[11];
902 /* This resulting string must fit in 10 chars (i.e. the length
903 * of "/sbin/init") to look pretty in /bin/ps */
905 p = path_get_file_name(path);
907 rename_process("(...)");
913 /* The end of the process name is usually more
914 * interesting, since the first bit might just be
920 process_name[0] = '(';
921 memcpy(process_name+1, p, l);
922 process_name[1+l] = ')';
923 process_name[1+l+1] = 0;
925 rename_process(process_name);
928 static int apply_seccomp(uint32_t *syscall_filter) {
929 static const struct sock_filter header[] = {
930 VALIDATE_ARCHITECTURE,
933 static const struct sock_filter footer[] = {
939 struct sock_filter *f;
940 struct sock_fprog prog;
942 assert(syscall_filter);
944 /* First: count the syscalls to check for */
945 for (i = 0, n = 0; i < syscall_max(); i++)
946 if (syscall_filter[i >> 4] & (1 << (i & 31)))
949 /* Second: build the filter program from a header the syscall
950 * matches and the footer */
951 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
952 memcpy(f, header, sizeof(header));
954 for (i = 0, n = 0; i < syscall_max(); i++)
955 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
956 struct sock_filter item[] = {
957 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
958 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
961 assert_cc(ELEMENTSOF(item) == 2);
963 f[ELEMENTSOF(header) + 2*n] = item[0];
964 f[ELEMENTSOF(header) + 2*n+1] = item[1];
969 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
971 /* Third: install the filter */
973 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
975 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
981 int exec_spawn(ExecCommand *command,
983 ExecContext *context,
984 int fds[], unsigned n_fds,
986 bool apply_permissions,
988 bool apply_tty_stdin,
990 CGroupBonding *cgroup_bondings,
991 CGroupAttribute *cgroup_attributes,
992 const char *cgroup_suffix,
1001 char _cleanup_strv_free_ **files_env = NULL;
1006 assert(fds || n_fds <= 0);
1008 if (context->std_input == EXEC_INPUT_SOCKET ||
1009 context->std_output == EXEC_OUTPUT_SOCKET ||
1010 context->std_error == EXEC_OUTPUT_SOCKET) {
1022 r = exec_context_load_environment(context, &files_env);
1024 log_struct_unit(LOG_ERR,
1026 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1033 argv = command->argv;
1035 line = exec_command_line(argv);
1039 log_struct_unit(LOG_DEBUG,
1041 "MESSAGE=About to execute %s", line,
1045 r = cgroup_bonding_realize_list(cgroup_bondings);
1049 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1051 if (context->private_tmp && !context->tmp_dir && !context->var_tmp_dir) {
1052 r = setup_tmpdirs(&context->tmp_dir, &context->var_tmp_dir);
1064 const char *username = NULL, *home = NULL;
1065 uid_t uid = (uid_t) -1;
1066 gid_t gid = (gid_t) -1;
1067 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1068 **final_env = NULL, **final_argv = NULL;
1070 bool set_access = false;
1074 rename_process_from_path(command->path);
1076 /* We reset exactly these signals, since they are the
1077 * only ones we set to SIG_IGN in the main daemon. All
1078 * others we leave untouched because we set them to
1079 * SIG_DFL or a valid handler initially, both of which
1080 * will be demoted to SIG_DFL. */
1081 default_signals(SIGNALS_CRASH_HANDLER,
1082 SIGNALS_IGNORE, -1);
1084 if (context->ignore_sigpipe)
1085 ignore_signals(SIGPIPE, -1);
1087 assert_se(sigemptyset(&ss) == 0);
1088 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1090 r = EXIT_SIGNAL_MASK;
1095 if (idle_pipe[1] >= 0)
1096 close_nointr_nofail(idle_pipe[1]);
1097 if (idle_pipe[0] >= 0) {
1098 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1099 close_nointr_nofail(idle_pipe[0]);
1103 /* Close sockets very early to make sure we don't
1104 * block init reexecution because it cannot bind its
1107 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1108 socket_fd >= 0 ? 1 : n_fds);
1114 if (!context->same_pgrp)
1121 if (context->tcpwrap_name) {
1123 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1129 for (i = 0; i < (int) n_fds; i++) {
1130 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1138 exec_context_tty_reset(context);
1140 if (confirm_spawn) {
1143 err = ask_for_confirmation(&response, argv);
1144 if (err == -ETIMEDOUT)
1145 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1147 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1148 else if (response == 's') {
1149 write_confirm_message("Skipping execution.\n");
1153 } else if (response == 'n') {
1154 write_confirm_message("Failing execution.\n");
1160 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1161 * must sure to drop O_NONBLOCK */
1163 fd_nonblock(socket_fd, false);
1165 err = setup_input(context, socket_fd, apply_tty_stdin);
1171 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1177 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1183 if (cgroup_bondings) {
1184 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1191 if (context->oom_score_adjust_set) {
1194 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1197 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1199 r = EXIT_OOM_ADJUST;
1204 if (context->nice_set)
1205 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1211 if (context->cpu_sched_set) {
1212 struct sched_param param;
1215 param.sched_priority = context->cpu_sched_priority;
1217 if (sched_setscheduler(0, context->cpu_sched_policy |
1218 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1220 r = EXIT_SETSCHEDULER;
1225 if (context->cpuset)
1226 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1228 r = EXIT_CPUAFFINITY;
1232 if (context->ioprio_set)
1233 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1239 if (context->timer_slack_nsec != (nsec_t) -1)
1240 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1242 r = EXIT_TIMERSLACK;
1246 if (context->utmp_id)
1247 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1249 if (context->user) {
1250 username = context->user;
1251 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1257 if (is_terminal_input(context->std_input)) {
1258 err = chown_terminal(STDIN_FILENO, uid);
1265 if (cgroup_bondings && context->control_group_modify) {
1266 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1268 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1278 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1279 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1286 if (apply_permissions) {
1287 err = enforce_groups(context, username, gid);
1294 umask(context->umask);
1297 if (apply_permissions && context->pam_name && username) {
1298 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1305 if (context->private_network) {
1306 if (unshare(CLONE_NEWNET) < 0) {
1315 if (strv_length(context->read_write_dirs) > 0 ||
1316 strv_length(context->read_only_dirs) > 0 ||
1317 strv_length(context->inaccessible_dirs) > 0 ||
1318 context->mount_flags != 0 ||
1319 context->private_tmp) {
1320 err = setup_namespace(context->read_write_dirs,
1321 context->read_only_dirs,
1322 context->inaccessible_dirs,
1324 context->var_tmp_dir,
1325 context->private_tmp,
1326 context->mount_flags);
1334 if (context->root_directory)
1335 if (chroot(context->root_directory) < 0) {
1341 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1347 char _cleanup_free_ *d = NULL;
1349 if (asprintf(&d, "%s/%s",
1350 context->root_directory ? context->root_directory : "",
1351 context->working_directory ? context->working_directory : "") < 0) {
1364 /* We repeat the fd closing here, to make sure that
1365 * nothing is leaked from the PAM modules */
1366 err = close_all_fds(fds, n_fds);
1368 err = shift_fds(fds, n_fds);
1370 err = flags_fds(fds, n_fds, context->non_blocking);
1376 if (apply_permissions) {
1378 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1379 if (!context->rlimit[i])
1382 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1389 if (context->capability_bounding_set_drop) {
1390 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1392 r = EXIT_CAPABILITIES;
1397 if (context->user) {
1398 err = enforce_user(context, uid);
1405 /* PR_GET_SECUREBITS is not privileged, while
1406 * PR_SET_SECUREBITS is. So to suppress
1407 * potential EPERMs we'll try not to call
1408 * PR_SET_SECUREBITS unless necessary. */
1409 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1410 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1412 r = EXIT_SECUREBITS;
1416 if (context->capabilities)
1417 if (cap_set_proc(context->capabilities) < 0) {
1419 r = EXIT_CAPABILITIES;
1423 if (context->no_new_privileges)
1424 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1426 r = EXIT_NO_NEW_PRIVILEGES;
1430 if (context->syscall_filter) {
1431 err = apply_seccomp(context->syscall_filter);
1439 if (!(our_env = new0(char*, 7))) {
1446 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1447 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1454 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1461 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1462 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1468 if (is_terminal_input(context->std_input) ||
1469 context->std_output == EXEC_OUTPUT_TTY ||
1470 context->std_error == EXEC_OUTPUT_TTY)
1471 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1479 if (!(final_env = strv_env_merge(
1483 context->environment,
1492 if (!(final_argv = replace_env_argv(argv, final_env))) {
1498 final_env = strv_env_clean(final_env);
1500 execve(command->path, final_argv, final_env);
1507 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1508 "EXECUTABLE=%s", command->path,
1509 "MESSAGE=Failed at step %s spawning %s: %s",
1510 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1511 command->path, strerror(-err),
1520 log_struct_unit(LOG_DEBUG,
1522 "MESSAGE=Forked %s as %lu",
1523 command->path, (unsigned long) pid,
1526 /* We add the new process to the cgroup both in the child (so
1527 * that we can be sure that no user code is ever executed
1528 * outside of the cgroup) and in the parent (so that we can be
1529 * sure that when we kill the cgroup the process will be
1531 if (cgroup_bondings)
1532 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1534 exec_status_start(&command->exec_status, pid);
1540 void exec_context_init(ExecContext *c) {
1544 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1545 c->cpu_sched_policy = SCHED_OTHER;
1546 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1547 c->syslog_level_prefix = true;
1548 c->control_group_persistent = -1;
1549 c->ignore_sigpipe = true;
1550 c->timer_slack_nsec = (nsec_t) -1;
1553 void exec_context_tmp_dirs_done(ExecContext *c) {
1557 rm_rf_dangerous(c->tmp_dir, false, true, false);
1562 if (c->var_tmp_dir) {
1563 rm_rf_dangerous(c->var_tmp_dir, false, true, false);
1564 free(c->var_tmp_dir);
1565 c->var_tmp_dir = NULL;
1569 void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) {
1574 strv_free(c->environment);
1575 c->environment = NULL;
1577 strv_free(c->environment_files);
1578 c->environment_files = NULL;
1580 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1582 c->rlimit[l] = NULL;
1585 free(c->working_directory);
1586 c->working_directory = NULL;
1587 free(c->root_directory);
1588 c->root_directory = NULL;
1593 free(c->tcpwrap_name);
1594 c->tcpwrap_name = NULL;
1596 free(c->syslog_identifier);
1597 c->syslog_identifier = NULL;
1605 strv_free(c->supplementary_groups);
1606 c->supplementary_groups = NULL;
1611 if (c->capabilities) {
1612 cap_free(c->capabilities);
1613 c->capabilities = NULL;
1616 strv_free(c->read_only_dirs);
1617 c->read_only_dirs = NULL;
1619 strv_free(c->read_write_dirs);
1620 c->read_write_dirs = NULL;
1622 strv_free(c->inaccessible_dirs);
1623 c->inaccessible_dirs = NULL;
1626 CPU_FREE(c->cpuset);
1631 free(c->syscall_filter);
1632 c->syscall_filter = NULL;
1634 if (!reloading_or_reexecuting)
1635 exec_context_tmp_dirs_done(c);
1638 void exec_command_done(ExecCommand *c) {
1648 void exec_command_done_array(ExecCommand *c, unsigned n) {
1651 for (i = 0; i < n; i++)
1652 exec_command_done(c+i);
1655 void exec_command_free_list(ExecCommand *c) {
1659 LIST_REMOVE(ExecCommand, command, c, i);
1660 exec_command_done(i);
1665 void exec_command_free_array(ExecCommand **c, unsigned n) {
1668 for (i = 0; i < n; i++) {
1669 exec_command_free_list(c[i]);
1674 int exec_context_load_environment(const ExecContext *c, char ***l) {
1675 char **i, **r = NULL;
1680 STRV_FOREACH(i, c->environment_files) {
1683 bool ignore = false;
1695 if (!path_is_absolute(fn)) {
1704 /* Filename supports globbing, take all matching files */
1707 if (glob(fn, 0, NULL, &pglob) != 0) {
1713 return errno ? -errno : -EINVAL;
1715 count = pglob.gl_pathc;
1724 for (n = 0; n < count; n++) {
1725 k = load_env_file(pglob.gl_pathv[n], &p);
1740 m = strv_env_merge(2, r, p);
1760 static bool tty_may_match_dev_console(const char *tty) {
1761 char *active = NULL, *console;
1764 if (startswith(tty, "/dev/"))
1767 /* trivial identity? */
1768 if (streq(tty, "console"))
1771 console = resolve_dev_console(&active);
1772 /* if we could not resolve, assume it may */
1776 /* "tty0" means the active VC, so it may be the same sometimes */
1777 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1783 bool exec_context_may_touch_console(ExecContext *ec) {
1784 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1785 is_terminal_input(ec->std_input) ||
1786 is_terminal_output(ec->std_output) ||
1787 is_terminal_output(ec->std_error)) &&
1788 tty_may_match_dev_console(tty_path(ec));
1791 static void strv_fprintf(FILE *f, char **l) {
1797 fprintf(f, " %s", *g);
1800 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1812 "%sWorkingDirectory: %s\n"
1813 "%sRootDirectory: %s\n"
1814 "%sNonBlocking: %s\n"
1815 "%sPrivateTmp: %s\n"
1816 "%sControlGroupModify: %s\n"
1817 "%sControlGroupPersistent: %s\n"
1818 "%sPrivateNetwork: %s\n"
1819 "%sIgnoreSIGPIPE: %s\n",
1821 prefix, c->working_directory ? c->working_directory : "/",
1822 prefix, c->root_directory ? c->root_directory : "/",
1823 prefix, yes_no(c->non_blocking),
1824 prefix, yes_no(c->private_tmp),
1825 prefix, yes_no(c->control_group_modify),
1826 prefix, yes_no(c->control_group_persistent),
1827 prefix, yes_no(c->private_network),
1828 prefix, yes_no(c->ignore_sigpipe));
1830 STRV_FOREACH(e, c->environment)
1831 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1833 STRV_FOREACH(e, c->environment_files)
1834 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1836 if (c->tcpwrap_name)
1838 "%sTCPWrapName: %s\n",
1839 prefix, c->tcpwrap_name);
1846 if (c->oom_score_adjust_set)
1848 "%sOOMScoreAdjust: %i\n",
1849 prefix, c->oom_score_adjust);
1851 for (i = 0; i < RLIM_NLIMITS; i++)
1853 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1855 if (c->ioprio_set) {
1859 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1863 "%sIOSchedulingClass: %s\n"
1864 "%sIOPriority: %i\n",
1865 prefix, strna(class_str),
1866 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1870 if (c->cpu_sched_set) {
1874 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1878 "%sCPUSchedulingPolicy: %s\n"
1879 "%sCPUSchedulingPriority: %i\n"
1880 "%sCPUSchedulingResetOnFork: %s\n",
1881 prefix, strna(policy_str),
1882 prefix, c->cpu_sched_priority,
1883 prefix, yes_no(c->cpu_sched_reset_on_fork));
1888 fprintf(f, "%sCPUAffinity:", prefix);
1889 for (i = 0; i < c->cpuset_ncpus; i++)
1890 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1891 fprintf(f, " %i", i);
1895 if (c->timer_slack_nsec != (nsec_t) -1)
1896 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1899 "%sStandardInput: %s\n"
1900 "%sStandardOutput: %s\n"
1901 "%sStandardError: %s\n",
1902 prefix, exec_input_to_string(c->std_input),
1903 prefix, exec_output_to_string(c->std_output),
1904 prefix, exec_output_to_string(c->std_error));
1910 "%sTTYVHangup: %s\n"
1911 "%sTTYVTDisallocate: %s\n",
1912 prefix, c->tty_path,
1913 prefix, yes_no(c->tty_reset),
1914 prefix, yes_no(c->tty_vhangup),
1915 prefix, yes_no(c->tty_vt_disallocate));
1917 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1918 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1919 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1920 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1921 char *fac_str, *lvl_str;
1924 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1928 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1933 "%sSyslogFacility: %s\n"
1934 "%sSyslogLevel: %s\n",
1935 prefix, strna(fac_str),
1936 prefix, strna(lvl_str));
1941 if (c->capabilities) {
1943 if ((t = cap_to_text(c->capabilities, NULL))) {
1944 fprintf(f, "%sCapabilities: %s\n",
1951 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1953 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1954 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1955 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1956 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1957 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1958 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1960 if (c->capability_bounding_set_drop) {
1962 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1964 for (l = 0; l <= cap_last_cap(); l++)
1965 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1968 if ((t = cap_to_name(l))) {
1969 fprintf(f, " %s", t);
1978 fprintf(f, "%sUser: %s\n", prefix, c->user);
1980 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1982 if (strv_length(c->supplementary_groups) > 0) {
1983 fprintf(f, "%sSupplementaryGroups:", prefix);
1984 strv_fprintf(f, c->supplementary_groups);
1989 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1991 if (strv_length(c->read_write_dirs) > 0) {
1992 fprintf(f, "%sReadWriteDirs:", prefix);
1993 strv_fprintf(f, c->read_write_dirs);
1997 if (strv_length(c->read_only_dirs) > 0) {
1998 fprintf(f, "%sReadOnlyDirs:", prefix);
1999 strv_fprintf(f, c->read_only_dirs);
2003 if (strv_length(c->inaccessible_dirs) > 0) {
2004 fprintf(f, "%sInaccessibleDirs:", prefix);
2005 strv_fprintf(f, c->inaccessible_dirs);
2011 "%sUtmpIdentifier: %s\n",
2012 prefix, c->utmp_id);
2015 void exec_status_start(ExecStatus *s, pid_t pid) {
2020 dual_timestamp_get(&s->start_timestamp);
2023 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2026 if (s->pid && s->pid != pid)
2030 dual_timestamp_get(&s->exit_timestamp);
2036 if (context->utmp_id)
2037 utmp_put_dead_process(context->utmp_id, pid, code, status);
2039 exec_context_tty_reset(context);
2043 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2044 char buf[FORMAT_TIMESTAMP_MAX];
2057 prefix, (unsigned long) s->pid);
2059 if (s->start_timestamp.realtime > 0)
2061 "%sStart Timestamp: %s\n",
2062 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2064 if (s->exit_timestamp.realtime > 0)
2066 "%sExit Timestamp: %s\n"
2068 "%sExit Status: %i\n",
2069 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2070 prefix, sigchld_code_to_string(s->code),
2074 char *exec_command_line(char **argv) {
2082 STRV_FOREACH(a, argv)
2085 if (!(n = new(char, k)))
2089 STRV_FOREACH(a, argv) {
2096 if (strpbrk(*a, WHITESPACE)) {
2107 /* FIXME: this doesn't really handle arguments that have
2108 * spaces and ticks in them */
2113 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2115 const char *prefix2;
2124 p2 = strappend(prefix, "\t");
2125 prefix2 = p2 ? p2 : prefix;
2127 cmd = exec_command_line(c->argv);
2130 "%sCommand Line: %s\n",
2131 prefix, cmd ? cmd : strerror(ENOMEM));
2135 exec_status_dump(&c->exec_status, f, prefix2);
2140 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2146 LIST_FOREACH(command, c, c)
2147 exec_command_dump(c, f, prefix);
2150 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2157 /* It's kind of important, that we keep the order here */
2158 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2159 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2164 int exec_command_set(ExecCommand *c, const char *path, ...) {
2172 l = strv_new_ap(path, ap);
2178 if (!(p = strdup(path))) {
2192 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2193 [EXEC_INPUT_NULL] = "null",
2194 [EXEC_INPUT_TTY] = "tty",
2195 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2196 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2197 [EXEC_INPUT_SOCKET] = "socket"
2200 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2202 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2203 [EXEC_OUTPUT_INHERIT] = "inherit",
2204 [EXEC_OUTPUT_NULL] = "null",
2205 [EXEC_OUTPUT_TTY] = "tty",
2206 [EXEC_OUTPUT_SYSLOG] = "syslog",
2207 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2208 [EXEC_OUTPUT_KMSG] = "kmsg",
2209 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2210 [EXEC_OUTPUT_JOURNAL] = "journal",
2211 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2212 [EXEC_OUTPUT_SOCKET] = "socket"
2215 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);