1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
46 #include <security/pam_appl.h>
52 #include "capability.h"
55 #include "sd-messages.h"
57 #include "securebits.h"
59 #include "namespace.h"
61 #include "exit-status.h"
63 #include "utmp-wtmp.h"
65 #include "loopback-setup.h"
66 #include "path-util.h"
67 #include "syscall-list.h"
71 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
73 /* This assumes there is a 'tty' group */
76 static int shift_fds(int fds[], unsigned n_fds) {
77 int start, restart_from;
82 /* Modifies the fds array! (sorts it) */
92 for (i = start; i < (int) n_fds; i++) {
95 /* Already at right index? */
99 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
102 close_nointr_nofail(fds[i]);
105 /* Hmm, the fd we wanted isn't free? Then
106 * let's remember that and try again from here*/
107 if (nfd != i+3 && restart_from < 0)
111 if (restart_from < 0)
114 start = restart_from;
120 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
129 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
131 for (i = 0; i < n_fds; i++) {
133 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
136 /* We unconditionally drop FD_CLOEXEC from the fds,
137 * since after all we want to pass these fds to our
140 if ((r = fd_cloexec(fds[i], false)) < 0)
147 _pure_ static const char *tty_path(const ExecContext *context) {
150 if (context->tty_path)
151 return context->tty_path;
153 return "/dev/console";
156 void exec_context_tty_reset(const ExecContext *context) {
159 if (context->tty_vhangup)
160 terminal_vhangup(tty_path(context));
162 if (context->tty_reset)
163 reset_terminal(tty_path(context));
165 if (context->tty_vt_disallocate && context->tty_path)
166 vt_disallocate(context->tty_path);
169 static bool is_terminal_output(ExecOutput o) {
171 o == EXEC_OUTPUT_TTY ||
172 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
173 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
174 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
177 void exec_context_serialize(const ExecContext *context, Unit *u, FILE *f) {
182 if (context->tmp_dir)
183 unit_serialize_item(u, f, "tmp-dir", context->tmp_dir);
185 if (context->var_tmp_dir)
186 unit_serialize_item(u, f, "var-tmp-dir", context->var_tmp_dir);
189 static int open_null_as(int flags, int nfd) {
194 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
198 r = dup2(fd, nfd) < 0 ? -errno : nfd;
199 close_nointr_nofail(fd);
206 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
208 union sockaddr_union sa = {
209 .un.sun_family = AF_UNIX,
210 .un.sun_path = "/run/systemd/journal/stdout",
214 assert(output < _EXEC_OUTPUT_MAX);
218 fd = socket(AF_UNIX, SOCK_STREAM, 0);
222 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
224 close_nointr_nofail(fd);
228 if (shutdown(fd, SHUT_RD) < 0) {
229 close_nointr_nofail(fd);
241 context->syslog_identifier ? context->syslog_identifier : ident,
243 context->syslog_priority,
244 !!context->syslog_level_prefix,
245 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
246 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
247 is_terminal_output(output));
250 r = dup2(fd, nfd) < 0 ? -errno : nfd;
251 close_nointr_nofail(fd);
257 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
263 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
267 r = dup2(fd, nfd) < 0 ? -errno : nfd;
268 close_nointr_nofail(fd);
275 static bool is_terminal_input(ExecInput i) {
277 i == EXEC_INPUT_TTY ||
278 i == EXEC_INPUT_TTY_FORCE ||
279 i == EXEC_INPUT_TTY_FAIL;
282 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
284 if (is_terminal_input(std_input) && !apply_tty_stdin)
285 return EXEC_INPUT_NULL;
287 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
288 return EXEC_INPUT_NULL;
293 static int fixup_output(ExecOutput std_output, int socket_fd) {
295 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
296 return EXEC_OUTPUT_INHERIT;
301 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
306 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
310 case EXEC_INPUT_NULL:
311 return open_null_as(O_RDONLY, STDIN_FILENO);
314 case EXEC_INPUT_TTY_FORCE:
315 case EXEC_INPUT_TTY_FAIL: {
318 if ((fd = acquire_terminal(
320 i == EXEC_INPUT_TTY_FAIL,
321 i == EXEC_INPUT_TTY_FORCE,
326 if (fd != STDIN_FILENO) {
327 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
328 close_nointr_nofail(fd);
335 case EXEC_INPUT_SOCKET:
336 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
339 assert_not_reached("Unknown input type");
343 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
351 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
352 o = fixup_output(context->std_output, socket_fd);
354 if (fileno == STDERR_FILENO) {
356 e = fixup_output(context->std_error, socket_fd);
358 /* This expects the input and output are already set up */
360 /* Don't change the stderr file descriptor if we inherit all
361 * the way and are not on a tty */
362 if (e == EXEC_OUTPUT_INHERIT &&
363 o == EXEC_OUTPUT_INHERIT &&
364 i == EXEC_INPUT_NULL &&
365 !is_terminal_input(context->std_input) &&
369 /* Duplicate from stdout if possible */
370 if (e == o || e == EXEC_OUTPUT_INHERIT)
371 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
375 } else if (o == EXEC_OUTPUT_INHERIT) {
376 /* If input got downgraded, inherit the original value */
377 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
378 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
380 /* If the input is connected to anything that's not a /dev/null, inherit that... */
381 if (i != EXEC_INPUT_NULL)
382 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
384 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
388 /* We need to open /dev/null here anew, to get the right access mode. */
389 return open_null_as(O_WRONLY, fileno);
394 case EXEC_OUTPUT_NULL:
395 return open_null_as(O_WRONLY, fileno);
397 case EXEC_OUTPUT_TTY:
398 if (is_terminal_input(i))
399 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
401 /* We don't reset the terminal if this is just about output */
402 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
404 case EXEC_OUTPUT_SYSLOG:
405 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
406 case EXEC_OUTPUT_KMSG:
407 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
408 case EXEC_OUTPUT_JOURNAL:
409 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
410 r = connect_logger_as(context, o, ident, unit_id, fileno);
412 log_struct_unit(LOG_CRIT, unit_id,
413 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
414 fileno == STDOUT_FILENO ? "out" : "err",
415 unit_id, strerror(-r),
418 r = open_null_as(O_WRONLY, fileno);
422 case EXEC_OUTPUT_SOCKET:
423 assert(socket_fd >= 0);
424 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
427 assert_not_reached("Unknown error type");
431 static int chown_terminal(int fd, uid_t uid) {
436 /* This might fail. What matters are the results. */
437 (void) fchown(fd, uid, -1);
438 (void) fchmod(fd, TTY_MODE);
440 if (fstat(fd, &st) < 0)
443 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
449 static int setup_confirm_stdio(int *_saved_stdin,
450 int *_saved_stdout) {
451 int fd = -1, saved_stdin, saved_stdout = -1, r;
453 assert(_saved_stdin);
454 assert(_saved_stdout);
456 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
460 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
461 if (saved_stdout < 0) {
466 fd = acquire_terminal(
471 DEFAULT_CONFIRM_USEC);
477 r = chown_terminal(fd, getuid());
481 if (dup2(fd, STDIN_FILENO) < 0) {
486 if (dup2(fd, STDOUT_FILENO) < 0) {
492 close_nointr_nofail(fd);
494 *_saved_stdin = saved_stdin;
495 *_saved_stdout = saved_stdout;
500 if (saved_stdout >= 0)
501 close_nointr_nofail(saved_stdout);
503 if (saved_stdin >= 0)
504 close_nointr_nofail(saved_stdin);
507 close_nointr_nofail(fd);
512 _printf_attr_(1, 2) static int write_confirm_message(const char *format, ...) {
518 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
522 va_start(ap, format);
523 vdprintf(fd, format, ap);
526 close_nointr_nofail(fd);
531 static int restore_confirm_stdio(int *saved_stdin,
537 assert(saved_stdout);
541 if (*saved_stdin >= 0)
542 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
545 if (*saved_stdout >= 0)
546 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
549 if (*saved_stdin >= 0)
550 close_nointr_nofail(*saved_stdin);
552 if (*saved_stdout >= 0)
553 close_nointr_nofail(*saved_stdout);
558 static int ask_for_confirmation(char *response, char **argv) {
559 int saved_stdout = -1, saved_stdin = -1, r;
562 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
566 line = exec_command_line(argv);
570 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
573 restore_confirm_stdio(&saved_stdin, &saved_stdout);
578 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
579 bool keep_groups = false;
584 /* Lookup and set GID and supplementary group list. Here too
585 * we avoid NSS lookups for gid=0. */
587 if (context->group || username) {
589 if (context->group) {
590 const char *g = context->group;
592 if ((r = get_group_creds(&g, &gid)) < 0)
596 /* First step, initialize groups from /etc/groups */
597 if (username && gid != 0) {
598 if (initgroups(username, gid) < 0)
604 /* Second step, set our gids */
605 if (setresgid(gid, gid, gid) < 0)
609 if (context->supplementary_groups) {
614 /* Final step, initialize any manually set supplementary groups */
615 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
617 if (!(gids = new(gid_t, ngroups_max)))
621 if ((k = getgroups(ngroups_max, gids)) < 0) {
628 STRV_FOREACH(i, context->supplementary_groups) {
631 if (k >= ngroups_max) {
637 r = get_group_creds(&g, gids+k);
646 if (setgroups(k, gids) < 0) {
657 static int enforce_user(const ExecContext *context, uid_t uid) {
661 /* Sets (but doesn't lookup) the uid and make sure we keep the
662 * capabilities while doing so. */
664 if (context->capabilities) {
666 static const cap_value_t bits[] = {
667 CAP_SETUID, /* Necessary so that we can run setresuid() below */
668 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
671 /* First step: If we need to keep capabilities but
672 * drop privileges we need to make sure we keep our
673 * caps, while we drop privileges. */
675 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
677 if (prctl(PR_GET_SECUREBITS) != sb)
678 if (prctl(PR_SET_SECUREBITS, sb) < 0)
682 /* Second step: set the capabilities. This will reduce
683 * the capabilities to the minimum we need. */
685 if (!(d = cap_dup(context->capabilities)))
688 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
689 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
695 if (cap_set_proc(d) < 0) {
704 /* Third step: actually set the uids */
705 if (setresuid(uid, uid, uid) < 0)
708 /* At this point we should have all necessary capabilities but
709 are otherwise a normal user. However, the caps might got
710 corrupted due to the setresuid() so we need clean them up
711 later. This is done outside of this call. */
718 static int null_conv(
720 const struct pam_message **msg,
721 struct pam_response **resp,
724 /* We don't support conversations */
729 static int setup_pam(
735 int fds[], unsigned n_fds) {
737 static const struct pam_conv conv = {
742 pam_handle_t *handle = NULL;
744 int pam_code = PAM_SUCCESS;
747 bool close_session = false;
748 pid_t pam_pid = 0, parent_pid;
754 /* We set up PAM in the parent process, then fork. The child
755 * will then stay around until killed via PR_GET_PDEATHSIG or
756 * systemd via the cgroup logic. It will then remove the PAM
757 * session again. The parent process will exec() the actual
758 * daemon. We do things this way to ensure that the main PID
759 * of the daemon is the one we initially fork()ed. */
761 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
767 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
770 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
773 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
776 close_session = true;
778 if ((!(e = pam_getenvlist(handle)))) {
779 pam_code = PAM_BUF_ERR;
783 /* Block SIGTERM, so that we know that it won't get lost in
785 if (sigemptyset(&ss) < 0 ||
786 sigaddset(&ss, SIGTERM) < 0 ||
787 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
790 parent_pid = getpid();
792 if ((pam_pid = fork()) < 0)
799 /* The child's job is to reset the PAM session on
802 /* This string must fit in 10 chars (i.e. the length
803 * of "/sbin/init"), to look pretty in /bin/ps */
804 rename_process("(sd-pam)");
806 /* Make sure we don't keep open the passed fds in this
807 child. We assume that otherwise only those fds are
808 open here that have been opened by PAM. */
809 close_many(fds, n_fds);
811 /* Drop privileges - we don't need any to pam_close_session
812 * and this will make PR_SET_PDEATHSIG work in most cases.
813 * If this fails, ignore the error - but expect sd-pam threads
814 * to fail to exit normally */
815 if (setresuid(uid, uid, uid) < 0)
816 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
818 /* Wait until our parent died. This will only work if
819 * the above setresuid() succeeds, otherwise the kernel
820 * will not allow unprivileged parents kill their privileged
821 * children this way. We rely on the control groups kill logic
822 * to do the rest for us. */
823 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
826 /* Check if our parent process might already have
828 if (getppid() == parent_pid) {
830 if (sigwait(&ss, &sig) < 0) {
837 assert(sig == SIGTERM);
842 /* If our parent died we'll end the session */
843 if (getppid() != parent_pid)
844 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
850 pam_end(handle, pam_code | PAM_DATA_SILENT);
854 /* If the child was forked off successfully it will do all the
855 * cleanups, so forget about the handle here. */
858 /* Unblock SIGTERM again in the parent */
859 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
862 /* We close the log explicitly here, since the PAM modules
863 * might have opened it, but we don't want this fd around. */
872 if (pam_code != PAM_SUCCESS)
873 err = -EPERM; /* PAM errors do not map to errno */
879 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
881 pam_end(handle, pam_code | PAM_DATA_SILENT);
889 kill(pam_pid, SIGTERM);
890 kill(pam_pid, SIGCONT);
897 static void rename_process_from_path(const char *path) {
898 char process_name[11];
902 /* This resulting string must fit in 10 chars (i.e. the length
903 * of "/sbin/init") to look pretty in /bin/ps */
905 p = path_get_file_name(path);
907 rename_process("(...)");
913 /* The end of the process name is usually more
914 * interesting, since the first bit might just be
920 process_name[0] = '(';
921 memcpy(process_name+1, p, l);
922 process_name[1+l] = ')';
923 process_name[1+l+1] = 0;
925 rename_process(process_name);
928 static int apply_seccomp(uint32_t *syscall_filter) {
929 static const struct sock_filter header[] = {
930 VALIDATE_ARCHITECTURE,
933 static const struct sock_filter footer[] = {
939 struct sock_filter *f;
940 struct sock_fprog prog = {};
942 assert(syscall_filter);
944 /* First: count the syscalls to check for */
945 for (i = 0, n = 0; i < syscall_max(); i++)
946 if (syscall_filter[i >> 4] & (1 << (i & 31)))
949 /* Second: build the filter program from a header the syscall
950 * matches and the footer */
951 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
952 memcpy(f, header, sizeof(header));
954 for (i = 0, n = 0; i < syscall_max(); i++)
955 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
956 struct sock_filter item[] = {
957 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
958 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
961 assert_cc(ELEMENTSOF(item) == 2);
963 f[ELEMENTSOF(header) + 2*n] = item[0];
964 f[ELEMENTSOF(header) + 2*n+1] = item[1];
969 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
971 /* Third: install the filter */
972 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
974 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
980 int exec_spawn(ExecCommand *command,
982 ExecContext *context,
983 int fds[], unsigned n_fds,
985 bool apply_permissions,
987 bool apply_tty_stdin,
989 CGroupBonding *cgroup_bondings,
990 CGroupAttribute *cgroup_attributes,
991 const char *cgroup_suffix,
1000 _cleanup_strv_free_ char **files_env = NULL;
1005 assert(fds || n_fds <= 0);
1007 if (context->std_input == EXEC_INPUT_SOCKET ||
1008 context->std_output == EXEC_OUTPUT_SOCKET ||
1009 context->std_error == EXEC_OUTPUT_SOCKET) {
1021 r = exec_context_load_environment(context, &files_env);
1023 log_struct_unit(LOG_ERR,
1025 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1032 argv = command->argv;
1034 line = exec_command_line(argv);
1038 log_struct_unit(LOG_DEBUG,
1040 "EXECUTABLE=%s", command->path,
1041 "MESSAGE=About to execute: %s", line,
1045 r = cgroup_bonding_realize_list(cgroup_bondings);
1049 /* We must initialize the attributes in the parent, before we
1050 fork, because we really need them initialized before making
1051 the process a member of the group (which we do in both the
1052 child and the parent), and we cannot really apply them twice
1053 (due to 'append' style attributes) */
1054 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1056 if (context->private_tmp && !context->tmp_dir && !context->var_tmp_dir) {
1057 r = setup_tmpdirs(&context->tmp_dir, &context->var_tmp_dir);
1069 const char *username = NULL, *home = NULL;
1070 uid_t uid = (uid_t) -1;
1071 gid_t gid = (gid_t) -1;
1072 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL,
1073 **final_env = NULL, **final_argv = NULL;
1075 bool set_access = false;
1079 rename_process_from_path(command->path);
1081 /* We reset exactly these signals, since they are the
1082 * only ones we set to SIG_IGN in the main daemon. All
1083 * others we leave untouched because we set them to
1084 * SIG_DFL or a valid handler initially, both of which
1085 * will be demoted to SIG_DFL. */
1086 default_signals(SIGNALS_CRASH_HANDLER,
1087 SIGNALS_IGNORE, -1);
1089 if (context->ignore_sigpipe)
1090 ignore_signals(SIGPIPE, -1);
1092 assert_se(sigemptyset(&ss) == 0);
1093 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1095 r = EXIT_SIGNAL_MASK;
1100 if (idle_pipe[1] >= 0)
1101 close_nointr_nofail(idle_pipe[1]);
1102 if (idle_pipe[0] >= 0) {
1103 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1104 close_nointr_nofail(idle_pipe[0]);
1108 /* Close sockets very early to make sure we don't
1109 * block init reexecution because it cannot bind its
1112 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1113 socket_fd >= 0 ? 1 : n_fds);
1119 if (!context->same_pgrp)
1126 if (context->tcpwrap_name) {
1128 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1134 for (i = 0; i < (int) n_fds; i++) {
1135 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1143 exec_context_tty_reset(context);
1145 if (confirm_spawn) {
1148 err = ask_for_confirmation(&response, argv);
1149 if (err == -ETIMEDOUT)
1150 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1152 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1153 else if (response == 's') {
1154 write_confirm_message("Skipping execution.\n");
1158 } else if (response == 'n') {
1159 write_confirm_message("Failing execution.\n");
1165 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1166 * must sure to drop O_NONBLOCK */
1168 fd_nonblock(socket_fd, false);
1170 err = setup_input(context, socket_fd, apply_tty_stdin);
1176 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1182 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1188 if (cgroup_bondings) {
1189 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1196 if (context->oom_score_adjust_set) {
1199 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1202 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1204 r = EXIT_OOM_ADJUST;
1209 if (context->nice_set)
1210 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1216 if (context->cpu_sched_set) {
1217 struct sched_param param = {
1218 .sched_priority = context->cpu_sched_priority,
1221 r = sched_setscheduler(0,
1222 context->cpu_sched_policy |
1223 (context->cpu_sched_reset_on_fork ?
1224 SCHED_RESET_ON_FORK : 0),
1228 r = EXIT_SETSCHEDULER;
1233 if (context->cpuset)
1234 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1236 r = EXIT_CPUAFFINITY;
1240 if (context->ioprio_set)
1241 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1247 if (context->timer_slack_nsec != (nsec_t) -1)
1248 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1250 r = EXIT_TIMERSLACK;
1254 if (context->utmp_id)
1255 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1257 if (context->user) {
1258 username = context->user;
1259 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1265 if (is_terminal_input(context->std_input)) {
1266 err = chown_terminal(STDIN_FILENO, uid);
1273 if (cgroup_bondings && context->control_group_modify) {
1274 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1276 err = cgroup_bonding_set_task_access_list(
1281 context->control_group_persistent);
1291 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1292 err = cgroup_bonding_set_task_access_list(
1297 context->control_group_persistent);
1304 if (apply_permissions) {
1305 err = enforce_groups(context, username, gid);
1312 umask(context->umask);
1315 if (apply_permissions && context->pam_name && username) {
1316 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1323 if (context->private_network) {
1324 if (unshare(CLONE_NEWNET) < 0) {
1333 if (strv_length(context->read_write_dirs) > 0 ||
1334 strv_length(context->read_only_dirs) > 0 ||
1335 strv_length(context->inaccessible_dirs) > 0 ||
1336 context->mount_flags != 0 ||
1337 context->private_tmp) {
1338 err = setup_namespace(context->read_write_dirs,
1339 context->read_only_dirs,
1340 context->inaccessible_dirs,
1342 context->var_tmp_dir,
1343 context->private_tmp,
1344 context->mount_flags);
1352 if (context->root_directory)
1353 if (chroot(context->root_directory) < 0) {
1359 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1365 _cleanup_free_ char *d = NULL;
1367 if (asprintf(&d, "%s/%s",
1368 context->root_directory ? context->root_directory : "",
1369 context->working_directory ? context->working_directory : "") < 0) {
1382 /* We repeat the fd closing here, to make sure that
1383 * nothing is leaked from the PAM modules */
1384 err = close_all_fds(fds, n_fds);
1386 err = shift_fds(fds, n_fds);
1388 err = flags_fds(fds, n_fds, context->non_blocking);
1394 if (apply_permissions) {
1396 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1397 if (!context->rlimit[i])
1400 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1407 if (context->capability_bounding_set_drop) {
1408 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1410 r = EXIT_CAPABILITIES;
1415 if (context->user) {
1416 err = enforce_user(context, uid);
1423 /* PR_GET_SECUREBITS is not privileged, while
1424 * PR_SET_SECUREBITS is. So to suppress
1425 * potential EPERMs we'll try not to call
1426 * PR_SET_SECUREBITS unless necessary. */
1427 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1428 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1430 r = EXIT_SECUREBITS;
1434 if (context->capabilities)
1435 if (cap_set_proc(context->capabilities) < 0) {
1437 r = EXIT_CAPABILITIES;
1441 if (context->no_new_privileges)
1442 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1444 r = EXIT_NO_NEW_PRIVILEGES;
1448 if (context->syscall_filter) {
1449 err = apply_seccomp(context->syscall_filter);
1457 our_env = new0(char*, 7);
1465 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1466 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1473 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1480 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1481 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1487 if (is_terminal_input(context->std_input) ||
1488 context->std_output == EXEC_OUTPUT_TTY ||
1489 context->std_error == EXEC_OUTPUT_TTY)
1490 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1498 final_env = strv_env_merge(5,
1501 context->environment,
1511 final_argv = replace_env_argv(argv, final_env);
1518 final_env = strv_env_clean(final_env);
1520 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1521 line = exec_command_line(final_argv);
1524 log_struct_unit(LOG_DEBUG,
1526 "EXECUTABLE=%s", command->path,
1527 "MESSAGE=Executing: %s", line,
1534 execve(command->path, final_argv, final_env);
1541 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1542 "EXECUTABLE=%s", command->path,
1543 "MESSAGE=Failed at step %s spawning %s: %s",
1544 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1545 command->path, strerror(-err),
1554 log_struct_unit(LOG_DEBUG,
1556 "MESSAGE=Forked %s as %lu",
1557 command->path, (unsigned long) pid,
1560 /* We add the new process to the cgroup both in the child (so
1561 * that we can be sure that no user code is ever executed
1562 * outside of the cgroup) and in the parent (so that we can be
1563 * sure that when we kill the cgroup the process will be
1565 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1567 exec_status_start(&command->exec_status, pid);
1573 void exec_context_init(ExecContext *c) {
1577 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1578 c->cpu_sched_policy = SCHED_OTHER;
1579 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1580 c->syslog_level_prefix = true;
1581 c->control_group_persistent = -1;
1582 c->ignore_sigpipe = true;
1583 c->timer_slack_nsec = (nsec_t) -1;
1586 void exec_context_tmp_dirs_done(ExecContext *c) {
1587 char* dirs[] = {c->tmp_dir ? c->tmp_dir : c->var_tmp_dir,
1588 c->tmp_dir ? c->var_tmp_dir : NULL,
1592 for(dirp = dirs; *dirp; dirp++) {
1596 r = rm_rf_dangerous(*dirp, false, true, false);
1597 dir = dirname(*dirp);
1599 log_warning("Failed to remove content of temporary directory %s: %s",
1604 log_warning("Failed to remove temporary directory %s: %s",
1611 c->tmp_dir = c->var_tmp_dir = NULL;
1614 void exec_context_done(ExecContext *c, bool reloading_or_reexecuting) {
1619 strv_free(c->environment);
1620 c->environment = NULL;
1622 strv_free(c->environment_files);
1623 c->environment_files = NULL;
1625 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1627 c->rlimit[l] = NULL;
1630 free(c->working_directory);
1631 c->working_directory = NULL;
1632 free(c->root_directory);
1633 c->root_directory = NULL;
1638 free(c->tcpwrap_name);
1639 c->tcpwrap_name = NULL;
1641 free(c->syslog_identifier);
1642 c->syslog_identifier = NULL;
1650 strv_free(c->supplementary_groups);
1651 c->supplementary_groups = NULL;
1656 if (c->capabilities) {
1657 cap_free(c->capabilities);
1658 c->capabilities = NULL;
1661 strv_free(c->read_only_dirs);
1662 c->read_only_dirs = NULL;
1664 strv_free(c->read_write_dirs);
1665 c->read_write_dirs = NULL;
1667 strv_free(c->inaccessible_dirs);
1668 c->inaccessible_dirs = NULL;
1671 CPU_FREE(c->cpuset);
1676 free(c->syscall_filter);
1677 c->syscall_filter = NULL;
1679 if (!reloading_or_reexecuting)
1680 exec_context_tmp_dirs_done(c);
1683 void exec_command_done(ExecCommand *c) {
1693 void exec_command_done_array(ExecCommand *c, unsigned n) {
1696 for (i = 0; i < n; i++)
1697 exec_command_done(c+i);
1700 void exec_command_free_list(ExecCommand *c) {
1704 LIST_REMOVE(ExecCommand, command, c, i);
1705 exec_command_done(i);
1710 void exec_command_free_array(ExecCommand **c, unsigned n) {
1713 for (i = 0; i < n; i++) {
1714 exec_command_free_list(c[i]);
1719 int exec_context_load_environment(const ExecContext *c, char ***l) {
1720 char **i, **r = NULL;
1725 STRV_FOREACH(i, c->environment_files) {
1728 bool ignore = false;
1730 _cleanup_globfree_ glob_t pglob = {};
1740 if (!path_is_absolute(fn)) {
1748 /* Filename supports globbing, take all matching files */
1750 if (glob(fn, 0, NULL, &pglob) != 0) {
1755 return errno ? -errno : -EINVAL;
1757 count = pglob.gl_pathc;
1765 for (n = 0; n < count; n++) {
1766 k = load_env_file(pglob.gl_pathv[n], NULL, &p);
1774 /* Log invalid environment variables with filename */
1776 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
1783 m = strv_env_merge(2, r, p);
1799 static bool tty_may_match_dev_console(const char *tty) {
1800 char *active = NULL, *console;
1803 if (startswith(tty, "/dev/"))
1806 /* trivial identity? */
1807 if (streq(tty, "console"))
1810 console = resolve_dev_console(&active);
1811 /* if we could not resolve, assume it may */
1815 /* "tty0" means the active VC, so it may be the same sometimes */
1816 b = streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
1822 bool exec_context_may_touch_console(ExecContext *ec) {
1823 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
1824 is_terminal_input(ec->std_input) ||
1825 is_terminal_output(ec->std_output) ||
1826 is_terminal_output(ec->std_error)) &&
1827 tty_may_match_dev_console(tty_path(ec));
1830 static void strv_fprintf(FILE *f, char **l) {
1836 fprintf(f, " %s", *g);
1839 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1851 "%sWorkingDirectory: %s\n"
1852 "%sRootDirectory: %s\n"
1853 "%sNonBlocking: %s\n"
1854 "%sPrivateTmp: %s\n"
1855 "%sControlGroupModify: %s\n"
1856 "%sControlGroupPersistent: %s\n"
1857 "%sPrivateNetwork: %s\n"
1858 "%sIgnoreSIGPIPE: %s\n",
1860 prefix, c->working_directory ? c->working_directory : "/",
1861 prefix, c->root_directory ? c->root_directory : "/",
1862 prefix, yes_no(c->non_blocking),
1863 prefix, yes_no(c->private_tmp),
1864 prefix, yes_no(c->control_group_modify),
1865 prefix, yes_no(c->control_group_persistent),
1866 prefix, yes_no(c->private_network),
1867 prefix, yes_no(c->ignore_sigpipe));
1869 STRV_FOREACH(e, c->environment)
1870 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1872 STRV_FOREACH(e, c->environment_files)
1873 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1875 if (c->tcpwrap_name)
1877 "%sTCPWrapName: %s\n",
1878 prefix, c->tcpwrap_name);
1885 if (c->oom_score_adjust_set)
1887 "%sOOMScoreAdjust: %i\n",
1888 prefix, c->oom_score_adjust);
1890 for (i = 0; i < RLIM_NLIMITS; i++)
1892 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1894 if (c->ioprio_set) {
1898 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1902 "%sIOSchedulingClass: %s\n"
1903 "%sIOPriority: %i\n",
1904 prefix, strna(class_str),
1905 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1909 if (c->cpu_sched_set) {
1913 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1917 "%sCPUSchedulingPolicy: %s\n"
1918 "%sCPUSchedulingPriority: %i\n"
1919 "%sCPUSchedulingResetOnFork: %s\n",
1920 prefix, strna(policy_str),
1921 prefix, c->cpu_sched_priority,
1922 prefix, yes_no(c->cpu_sched_reset_on_fork));
1927 fprintf(f, "%sCPUAffinity:", prefix);
1928 for (i = 0; i < c->cpuset_ncpus; i++)
1929 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1930 fprintf(f, " %i", i);
1934 if (c->timer_slack_nsec != (nsec_t) -1)
1935 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1938 "%sStandardInput: %s\n"
1939 "%sStandardOutput: %s\n"
1940 "%sStandardError: %s\n",
1941 prefix, exec_input_to_string(c->std_input),
1942 prefix, exec_output_to_string(c->std_output),
1943 prefix, exec_output_to_string(c->std_error));
1949 "%sTTYVHangup: %s\n"
1950 "%sTTYVTDisallocate: %s\n",
1951 prefix, c->tty_path,
1952 prefix, yes_no(c->tty_reset),
1953 prefix, yes_no(c->tty_vhangup),
1954 prefix, yes_no(c->tty_vt_disallocate));
1956 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1957 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1958 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1959 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1960 char *fac_str, *lvl_str;
1963 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1967 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1972 "%sSyslogFacility: %s\n"
1973 "%sSyslogLevel: %s\n",
1974 prefix, strna(fac_str),
1975 prefix, strna(lvl_str));
1980 if (c->capabilities) {
1982 if ((t = cap_to_text(c->capabilities, NULL))) {
1983 fprintf(f, "%sCapabilities: %s\n",
1990 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1992 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
1993 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1994 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1995 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1996 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
1997 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1999 if (c->capability_bounding_set_drop) {
2001 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2003 for (l = 0; l <= cap_last_cap(); l++)
2004 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2007 if ((t = cap_to_name(l))) {
2008 fprintf(f, " %s", t);
2017 fprintf(f, "%sUser: %s\n", prefix, c->user);
2019 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2021 if (strv_length(c->supplementary_groups) > 0) {
2022 fprintf(f, "%sSupplementaryGroups:", prefix);
2023 strv_fprintf(f, c->supplementary_groups);
2028 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2030 if (strv_length(c->read_write_dirs) > 0) {
2031 fprintf(f, "%sReadWriteDirs:", prefix);
2032 strv_fprintf(f, c->read_write_dirs);
2036 if (strv_length(c->read_only_dirs) > 0) {
2037 fprintf(f, "%sReadOnlyDirs:", prefix);
2038 strv_fprintf(f, c->read_only_dirs);
2042 if (strv_length(c->inaccessible_dirs) > 0) {
2043 fprintf(f, "%sInaccessibleDirs:", prefix);
2044 strv_fprintf(f, c->inaccessible_dirs);
2050 "%sUtmpIdentifier: %s\n",
2051 prefix, c->utmp_id);
2054 void exec_status_start(ExecStatus *s, pid_t pid) {
2059 dual_timestamp_get(&s->start_timestamp);
2062 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2065 if (s->pid && s->pid != pid)
2069 dual_timestamp_get(&s->exit_timestamp);
2075 if (context->utmp_id)
2076 utmp_put_dead_process(context->utmp_id, pid, code, status);
2078 exec_context_tty_reset(context);
2082 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2083 char buf[FORMAT_TIMESTAMP_MAX];
2096 prefix, (unsigned long) s->pid);
2098 if (s->start_timestamp.realtime > 0)
2100 "%sStart Timestamp: %s\n",
2101 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2103 if (s->exit_timestamp.realtime > 0)
2105 "%sExit Timestamp: %s\n"
2107 "%sExit Status: %i\n",
2108 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2109 prefix, sigchld_code_to_string(s->code),
2113 char *exec_command_line(char **argv) {
2121 STRV_FOREACH(a, argv)
2124 if (!(n = new(char, k)))
2128 STRV_FOREACH(a, argv) {
2135 if (strpbrk(*a, WHITESPACE)) {
2146 /* FIXME: this doesn't really handle arguments that have
2147 * spaces and ticks in them */
2152 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2154 const char *prefix2;
2163 p2 = strappend(prefix, "\t");
2164 prefix2 = p2 ? p2 : prefix;
2166 cmd = exec_command_line(c->argv);
2169 "%sCommand Line: %s\n",
2170 prefix, cmd ? cmd : strerror(ENOMEM));
2174 exec_status_dump(&c->exec_status, f, prefix2);
2179 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2185 LIST_FOREACH(command, c, c)
2186 exec_command_dump(c, f, prefix);
2189 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2196 /* It's kind of important, that we keep the order here */
2197 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2198 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2203 int exec_command_set(ExecCommand *c, const char *path, ...) {
2211 l = strv_new_ap(path, ap);
2217 if (!(p = strdup(path))) {
2231 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2232 [EXEC_INPUT_NULL] = "null",
2233 [EXEC_INPUT_TTY] = "tty",
2234 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2235 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2236 [EXEC_INPUT_SOCKET] = "socket"
2239 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2241 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2242 [EXEC_OUTPUT_INHERIT] = "inherit",
2243 [EXEC_OUTPUT_NULL] = "null",
2244 [EXEC_OUTPUT_TTY] = "tty",
2245 [EXEC_OUTPUT_SYSLOG] = "syslog",
2246 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2247 [EXEC_OUTPUT_KMSG] = "kmsg",
2248 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2249 [EXEC_OUTPUT_JOURNAL] = "journal",
2250 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2251 [EXEC_OUTPUT_SOCKET] = "socket"
2254 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);