1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
44 #include <security/pam_appl.h>
50 #include "capability.h"
54 #include "securebits.h"
56 #include "namespace.h"
58 #include "exit-status.h"
60 #include "utmp-wtmp.h"
62 #include "loopback-setup.h"
63 #include "path-util.h"
64 #include "syscall-list.h"
66 #include "sd-messages.h"
68 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
70 /* This assumes there is a 'tty' group */
73 static int shift_fds(int fds[], unsigned n_fds) {
74 int start, restart_from;
79 /* Modifies the fds array! (sorts it) */
89 for (i = start; i < (int) n_fds; i++) {
92 /* Already at right index? */
96 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
99 close_nointr_nofail(fds[i]);
102 /* Hmm, the fd we wanted isn't free? Then
103 * let's remember that and try again from here*/
104 if (nfd != i+3 && restart_from < 0)
108 if (restart_from < 0)
111 start = restart_from;
117 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
126 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
128 for (i = 0; i < n_fds; i++) {
130 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
133 /* We unconditionally drop FD_CLOEXEC from the fds,
134 * since after all we want to pass these fds to our
137 if ((r = fd_cloexec(fds[i], false)) < 0)
144 static const char *tty_path(const ExecContext *context) {
147 if (context->tty_path)
148 return context->tty_path;
150 return "/dev/console";
153 void exec_context_tty_reset(const ExecContext *context) {
156 if (context->tty_vhangup)
157 terminal_vhangup(tty_path(context));
159 if (context->tty_reset)
160 reset_terminal(tty_path(context));
162 if (context->tty_vt_disallocate && context->tty_path)
163 vt_disallocate(context->tty_path);
166 static int open_null_as(int flags, int nfd) {
171 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
175 r = dup2(fd, nfd) < 0 ? -errno : nfd;
176 close_nointr_nofail(fd);
183 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
185 union sockaddr_union sa;
188 assert(output < _EXEC_OUTPUT_MAX);
192 fd = socket(AF_UNIX, SOCK_STREAM, 0);
197 sa.un.sun_family = AF_UNIX;
198 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
200 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
202 close_nointr_nofail(fd);
206 if (shutdown(fd, SHUT_RD) < 0) {
207 close_nointr_nofail(fd);
219 context->syslog_identifier ? context->syslog_identifier : ident,
221 context->syslog_priority,
222 !!context->syslog_level_prefix,
223 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
224 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
225 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
228 r = dup2(fd, nfd) < 0 ? -errno : nfd;
229 close_nointr_nofail(fd);
235 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
241 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
245 r = dup2(fd, nfd) < 0 ? -errno : nfd;
246 close_nointr_nofail(fd);
253 static bool is_terminal_input(ExecInput i) {
255 i == EXEC_INPUT_TTY ||
256 i == EXEC_INPUT_TTY_FORCE ||
257 i == EXEC_INPUT_TTY_FAIL;
260 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
262 if (is_terminal_input(std_input) && !apply_tty_stdin)
263 return EXEC_INPUT_NULL;
265 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
266 return EXEC_INPUT_NULL;
271 static int fixup_output(ExecOutput std_output, int socket_fd) {
273 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
274 return EXEC_OUTPUT_INHERIT;
279 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
284 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
288 case EXEC_INPUT_NULL:
289 return open_null_as(O_RDONLY, STDIN_FILENO);
292 case EXEC_INPUT_TTY_FORCE:
293 case EXEC_INPUT_TTY_FAIL: {
296 if ((fd = acquire_terminal(
298 i == EXEC_INPUT_TTY_FAIL,
299 i == EXEC_INPUT_TTY_FORCE,
304 if (fd != STDIN_FILENO) {
305 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306 close_nointr_nofail(fd);
313 case EXEC_INPUT_SOCKET:
314 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
317 assert_not_reached("Unknown input type");
321 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
328 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
329 o = fixup_output(context->std_output, socket_fd);
331 /* This expects the input is already set up */
335 case EXEC_OUTPUT_INHERIT:
337 /* If input got downgraded, inherit the original value */
338 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
339 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
341 /* If the input is connected to anything that's not a /dev/null, inherit that... */
342 if (i != EXEC_INPUT_NULL)
343 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
345 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
347 return STDOUT_FILENO;
349 /* We need to open /dev/null here anew, to get the
350 * right access mode. So we fall through */
352 case EXEC_OUTPUT_NULL:
353 return open_null_as(O_WRONLY, STDOUT_FILENO);
355 case EXEC_OUTPUT_TTY:
356 if (is_terminal_input(i))
357 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
359 /* We don't reset the terminal if this is just about output */
360 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
362 case EXEC_OUTPUT_SYSLOG:
363 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
364 case EXEC_OUTPUT_KMSG:
365 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
366 case EXEC_OUTPUT_JOURNAL:
367 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
368 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
370 case EXEC_OUTPUT_SOCKET:
371 assert(socket_fd >= 0);
372 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
375 assert_not_reached("Unknown output type");
379 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
386 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
387 o = fixup_output(context->std_output, socket_fd);
388 e = fixup_output(context->std_error, socket_fd);
390 /* This expects the input and output are already set up */
392 /* Don't change the stderr file descriptor if we inherit all
393 * the way and are not on a tty */
394 if (e == EXEC_OUTPUT_INHERIT &&
395 o == EXEC_OUTPUT_INHERIT &&
396 i == EXEC_INPUT_NULL &&
397 !is_terminal_input(context->std_input) &&
399 return STDERR_FILENO;
401 /* Duplicate from stdout if possible */
402 if (e == o || e == EXEC_OUTPUT_INHERIT)
403 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
407 case EXEC_OUTPUT_NULL:
408 return open_null_as(O_WRONLY, STDERR_FILENO);
410 case EXEC_OUTPUT_TTY:
411 if (is_terminal_input(i))
412 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
414 /* We don't reset the terminal if this is just about output */
415 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
417 case EXEC_OUTPUT_SYSLOG:
418 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
419 case EXEC_OUTPUT_KMSG:
420 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
421 case EXEC_OUTPUT_JOURNAL:
422 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
423 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
425 case EXEC_OUTPUT_SOCKET:
426 assert(socket_fd >= 0);
427 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
430 assert_not_reached("Unknown error type");
434 static int chown_terminal(int fd, uid_t uid) {
439 /* This might fail. What matters are the results. */
440 (void) fchown(fd, uid, -1);
441 (void) fchmod(fd, TTY_MODE);
443 if (fstat(fd, &st) < 0)
446 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
452 static int setup_confirm_stdio(int *_saved_stdin,
453 int *_saved_stdout) {
454 int fd = -1, saved_stdin, saved_stdout = -1, r;
456 assert(_saved_stdin);
457 assert(_saved_stdout);
459 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
463 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
464 if (saved_stdout < 0) {
469 fd = acquire_terminal(
474 DEFAULT_CONFIRM_USEC);
480 r = chown_terminal(fd, getuid());
484 if (dup2(fd, STDIN_FILENO) < 0) {
489 if (dup2(fd, STDOUT_FILENO) < 0) {
495 close_nointr_nofail(fd);
497 *_saved_stdin = saved_stdin;
498 *_saved_stdout = saved_stdout;
503 if (saved_stdout >= 0)
504 close_nointr_nofail(saved_stdout);
506 if (saved_stdin >= 0)
507 close_nointr_nofail(saved_stdin);
510 close_nointr_nofail(fd);
515 static int write_confirm_message(const char *format, ...) {
521 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
525 va_start(ap, format);
526 vdprintf(fd, format, ap);
529 close_nointr_nofail(fd);
534 static int restore_confirm_stdio(int *saved_stdin,
540 assert(saved_stdout);
544 if (*saved_stdin >= 0)
545 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
548 if (*saved_stdout >= 0)
549 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
552 if (*saved_stdin >= 0)
553 close_nointr_nofail(*saved_stdin);
555 if (*saved_stdout >= 0)
556 close_nointr_nofail(*saved_stdout);
561 static int ask_for_confirmation(char *response, char **argv) {
562 int saved_stdout = -1, saved_stdin = -1, r;
565 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
569 line = exec_command_line(argv);
573 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576 restore_confirm_stdio(&saved_stdin, &saved_stdout);
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582 bool keep_groups = false;
587 /* Lookup and set GID and supplementary group list. Here too
588 * we avoid NSS lookups for gid=0. */
590 if (context->group || username) {
592 if (context->group) {
593 const char *g = context->group;
595 if ((r = get_group_creds(&g, &gid)) < 0)
599 /* First step, initialize groups from /etc/groups */
600 if (username && gid != 0) {
601 if (initgroups(username, gid) < 0)
607 /* Second step, set our gids */
608 if (setresgid(gid, gid, gid) < 0)
612 if (context->supplementary_groups) {
617 /* Final step, initialize any manually set supplementary groups */
618 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620 if (!(gids = new(gid_t, ngroups_max)))
624 if ((k = getgroups(ngroups_max, gids)) < 0) {
631 STRV_FOREACH(i, context->supplementary_groups) {
634 if (k >= ngroups_max) {
640 r = get_group_creds(&g, gids+k);
649 if (setgroups(k, gids) < 0) {
660 static int enforce_user(const ExecContext *context, uid_t uid) {
664 /* Sets (but doesn't lookup) the uid and make sure we keep the
665 * capabilities while doing so. */
667 if (context->capabilities) {
669 static const cap_value_t bits[] = {
670 CAP_SETUID, /* Necessary so that we can run setresuid() below */
671 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
674 /* First step: If we need to keep capabilities but
675 * drop privileges we need to make sure we keep our
676 * caps, whiel we drop privileges. */
678 int sb = context->secure_bits|SECURE_KEEP_CAPS;
680 if (prctl(PR_GET_SECUREBITS) != sb)
681 if (prctl(PR_SET_SECUREBITS, sb) < 0)
685 /* Second step: set the capabilities. This will reduce
686 * the capabilities to the minimum we need. */
688 if (!(d = cap_dup(context->capabilities)))
691 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
698 if (cap_set_proc(d) < 0) {
707 /* Third step: actually set the uids */
708 if (setresuid(uid, uid, uid) < 0)
711 /* At this point we should have all necessary capabilities but
712 are otherwise a normal user. However, the caps might got
713 corrupted due to the setresuid() so we need clean them up
714 later. This is done outside of this call. */
721 static int null_conv(
723 const struct pam_message **msg,
724 struct pam_response **resp,
727 /* We don't support conversations */
732 static int setup_pam(
738 int fds[], unsigned n_fds) {
740 static const struct pam_conv conv = {
745 pam_handle_t *handle = NULL;
747 int pam_code = PAM_SUCCESS;
750 bool close_session = false;
751 pid_t pam_pid = 0, parent_pid;
757 /* We set up PAM in the parent process, then fork. The child
758 * will then stay around until killed via PR_GET_PDEATHSIG or
759 * systemd via the cgroup logic. It will then remove the PAM
760 * session again. The parent process will exec() the actual
761 * daemon. We do things this way to ensure that the main PID
762 * of the daemon is the one we initially fork()ed. */
764 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
770 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
773 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
776 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
779 close_session = true;
781 if ((!(e = pam_getenvlist(handle)))) {
782 pam_code = PAM_BUF_ERR;
786 /* Block SIGTERM, so that we know that it won't get lost in
788 if (sigemptyset(&ss) < 0 ||
789 sigaddset(&ss, SIGTERM) < 0 ||
790 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
793 parent_pid = getpid();
795 if ((pam_pid = fork()) < 0)
802 /* The child's job is to reset the PAM session on
805 /* This string must fit in 10 chars (i.e. the length
806 * of "/sbin/init"), to look pretty in /bin/ps */
807 rename_process("(sd-pam)");
809 /* Make sure we don't keep open the passed fds in this
810 child. We assume that otherwise only those fds are
811 open here that have been opened by PAM. */
812 close_many(fds, n_fds);
814 /* Drop privileges - we don't need any to pam_close_session
815 * and this will make PR_SET_PDEATHSIG work in most cases.
816 * If this fails, ignore the error - but expect sd-pam threads
817 * to fail to exit normally */
818 if (setresuid(uid, uid, uid) < 0)
819 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
821 /* Wait until our parent died. This will only work if
822 * the above setresuid() succeeds, otherwise the kernel
823 * will not allow unprivileged parents kill their privileged
824 * children this way. We rely on the control groups kill logic
825 * to do the rest for us. */
826 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
829 /* Check if our parent process might already have
831 if (getppid() == parent_pid) {
833 if (sigwait(&ss, &sig) < 0) {
840 assert(sig == SIGTERM);
845 /* If our parent died we'll end the session */
846 if (getppid() != parent_pid)
847 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
853 pam_end(handle, pam_code | PAM_DATA_SILENT);
857 /* If the child was forked off successfully it will do all the
858 * cleanups, so forget about the handle here. */
861 /* Unblock SIGTERM again in the parent */
862 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
865 /* We close the log explicitly here, since the PAM modules
866 * might have opened it, but we don't want this fd around. */
875 if (pam_code != PAM_SUCCESS)
876 err = -EPERM; /* PAM errors do not map to errno */
882 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
884 pam_end(handle, pam_code | PAM_DATA_SILENT);
892 kill(pam_pid, SIGTERM);
893 kill(pam_pid, SIGCONT);
900 static void rename_process_from_path(const char *path) {
901 char process_name[11];
905 /* This resulting string must fit in 10 chars (i.e. the length
906 * of "/sbin/init") to look pretty in /bin/ps */
908 p = path_get_file_name(path);
910 rename_process("(...)");
916 /* The end of the process name is usually more
917 * interesting, since the first bit might just be
923 process_name[0] = '(';
924 memcpy(process_name+1, p, l);
925 process_name[1+l] = ')';
926 process_name[1+l+1] = 0;
928 rename_process(process_name);
931 static int apply_seccomp(uint32_t *syscall_filter) {
932 static const struct sock_filter header[] = {
933 VALIDATE_ARCHITECTURE,
936 static const struct sock_filter footer[] = {
942 struct sock_filter *f;
943 struct sock_fprog prog;
945 assert(syscall_filter);
947 /* First: count the syscalls to check for */
948 for (i = 0, n = 0; i < syscall_max(); i++)
949 if (syscall_filter[i >> 4] & (1 << (i & 31)))
952 /* Second: build the filter program from a header the syscall
953 * matches and the footer */
954 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
955 memcpy(f, header, sizeof(header));
957 for (i = 0, n = 0; i < syscall_max(); i++)
958 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
959 struct sock_filter item[] = {
960 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
961 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
964 assert_cc(ELEMENTSOF(item) == 2);
966 f[ELEMENTSOF(header) + 2*n] = item[0];
967 f[ELEMENTSOF(header) + 2*n+1] = item[1];
972 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
974 /* Third: install the filter */
976 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
978 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
984 int exec_spawn(ExecCommand *command,
986 const ExecContext *context,
987 int fds[], unsigned n_fds,
989 bool apply_permissions,
991 bool apply_tty_stdin,
993 CGroupBonding *cgroup_bondings,
994 CGroupAttribute *cgroup_attributes,
995 const char *cgroup_suffix,
1004 char _cleanup_strv_free_ **files_env = NULL;
1009 assert(fds || n_fds <= 0);
1011 if (context->std_input == EXEC_INPUT_SOCKET ||
1012 context->std_output == EXEC_OUTPUT_SOCKET ||
1013 context->std_error == EXEC_OUTPUT_SOCKET) {
1025 r = exec_context_load_environment(context, &files_env);
1029 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1036 argv = command->argv;
1038 line = exec_command_line(argv);
1042 log_struct(LOG_DEBUG,
1044 "MESSAGE=About to execute %s", line,
1048 r = cgroup_bonding_realize_list(cgroup_bondings);
1052 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1061 const char *username = NULL, *home = NULL;
1062 uid_t uid = (uid_t) -1;
1063 gid_t gid = (gid_t) -1;
1064 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1065 **final_env = NULL, **final_argv = NULL;
1067 bool set_access = false;
1071 rename_process_from_path(command->path);
1073 /* We reset exactly these signals, since they are the
1074 * only ones we set to SIG_IGN in the main daemon. All
1075 * others we leave untouched because we set them to
1076 * SIG_DFL or a valid handler initially, both of which
1077 * will be demoted to SIG_DFL. */
1078 default_signals(SIGNALS_CRASH_HANDLER,
1079 SIGNALS_IGNORE, -1);
1081 if (context->ignore_sigpipe)
1082 ignore_signals(SIGPIPE, -1);
1084 assert_se(sigemptyset(&ss) == 0);
1085 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1087 r = EXIT_SIGNAL_MASK;
1092 if (idle_pipe[1] >= 0)
1093 close_nointr_nofail(idle_pipe[1]);
1094 if (idle_pipe[0] >= 0) {
1095 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1096 close_nointr_nofail(idle_pipe[0]);
1100 /* Close sockets very early to make sure we don't
1101 * block init reexecution because it cannot bind its
1104 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1105 socket_fd >= 0 ? 1 : n_fds);
1111 if (!context->same_pgrp)
1118 if (context->tcpwrap_name) {
1120 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1126 for (i = 0; i < (int) n_fds; i++) {
1127 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1135 exec_context_tty_reset(context);
1137 if (confirm_spawn) {
1140 err = ask_for_confirmation(&response, argv);
1141 if (err == -ETIMEDOUT)
1142 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1144 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1145 else if (response == 's') {
1146 write_confirm_message("Skipping execution.\n");
1150 } else if (response == 'n') {
1151 write_confirm_message("Failing execution.\n");
1157 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1158 * must sure to drop O_NONBLOCK */
1160 fd_nonblock(socket_fd, false);
1162 err = setup_input(context, socket_fd, apply_tty_stdin);
1168 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1174 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1180 if (cgroup_bondings) {
1181 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1188 if (context->oom_score_adjust_set) {
1191 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1194 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1196 r = EXIT_OOM_ADJUST;
1201 if (context->nice_set)
1202 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1208 if (context->cpu_sched_set) {
1209 struct sched_param param;
1212 param.sched_priority = context->cpu_sched_priority;
1214 if (sched_setscheduler(0, context->cpu_sched_policy |
1215 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1217 r = EXIT_SETSCHEDULER;
1222 if (context->cpuset)
1223 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1225 r = EXIT_CPUAFFINITY;
1229 if (context->ioprio_set)
1230 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1236 if (context->timer_slack_nsec != (nsec_t) -1)
1237 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1239 r = EXIT_TIMERSLACK;
1243 if (context->utmp_id)
1244 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1246 if (context->user) {
1247 username = context->user;
1248 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1254 if (is_terminal_input(context->std_input)) {
1255 err = chown_terminal(STDIN_FILENO, uid);
1262 if (cgroup_bondings && context->control_group_modify) {
1263 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1265 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1275 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1276 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1283 if (apply_permissions) {
1284 err = enforce_groups(context, username, gid);
1291 umask(context->umask);
1294 if (apply_permissions && context->pam_name && username) {
1295 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1302 if (context->private_network) {
1303 if (unshare(CLONE_NEWNET) < 0) {
1312 if (strv_length(context->read_write_dirs) > 0 ||
1313 strv_length(context->read_only_dirs) > 0 ||
1314 strv_length(context->inaccessible_dirs) > 0 ||
1315 context->mount_flags != 0 ||
1316 context->private_tmp) {
1317 err = setup_namespace(context->read_write_dirs,
1318 context->read_only_dirs,
1319 context->inaccessible_dirs,
1320 context->private_tmp,
1321 context->mount_flags);
1329 if (context->root_directory)
1330 if (chroot(context->root_directory) < 0) {
1336 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1342 char _cleanup_free_ *d = NULL;
1344 if (asprintf(&d, "%s/%s",
1345 context->root_directory ? context->root_directory : "",
1346 context->working_directory ? context->working_directory : "") < 0) {
1359 /* We repeat the fd closing here, to make sure that
1360 * nothing is leaked from the PAM modules */
1361 err = close_all_fds(fds, n_fds);
1363 err = shift_fds(fds, n_fds);
1365 err = flags_fds(fds, n_fds, context->non_blocking);
1371 if (apply_permissions) {
1373 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1374 if (!context->rlimit[i])
1377 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1384 if (context->capability_bounding_set_drop) {
1385 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1387 r = EXIT_CAPABILITIES;
1392 if (context->user) {
1393 err = enforce_user(context, uid);
1400 /* PR_GET_SECUREBITS is not privileged, while
1401 * PR_SET_SECUREBITS is. So to suppress
1402 * potential EPERMs we'll try not to call
1403 * PR_SET_SECUREBITS unless necessary. */
1404 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1405 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1407 r = EXIT_SECUREBITS;
1411 if (context->capabilities)
1412 if (cap_set_proc(context->capabilities) < 0) {
1414 r = EXIT_CAPABILITIES;
1418 if (context->no_new_privileges)
1419 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1421 r = EXIT_NO_NEW_PRIVILEGES;
1425 if (context->syscall_filter) {
1426 err = apply_seccomp(context->syscall_filter);
1434 if (!(our_env = new0(char*, 7))) {
1441 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1442 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1449 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1456 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1457 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1463 if (is_terminal_input(context->std_input) ||
1464 context->std_output == EXEC_OUTPUT_TTY ||
1465 context->std_error == EXEC_OUTPUT_TTY)
1466 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1474 if (!(final_env = strv_env_merge(
1478 context->environment,
1487 if (!(final_argv = replace_env_argv(argv, final_env))) {
1493 final_env = strv_env_clean(final_env);
1495 execve(command->path, final_argv, final_env);
1502 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1503 "EXECUTABLE=%s", command->path,
1504 "MESSAGE=Failed at step %s spawning %s: %s",
1505 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1506 command->path, strerror(-err),
1515 log_struct(LOG_DEBUG,
1517 "MESSAGE=Forked %s as %lu",
1518 command->path, (unsigned long) pid,
1521 /* We add the new process to the cgroup both in the child (so
1522 * that we can be sure that no user code is ever executed
1523 * outside of the cgroup) and in the parent (so that we can be
1524 * sure that when we kill the cgroup the process will be
1526 if (cgroup_bondings)
1527 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1529 exec_status_start(&command->exec_status, pid);
1535 void exec_context_init(ExecContext *c) {
1539 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540 c->cpu_sched_policy = SCHED_OTHER;
1541 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542 c->syslog_level_prefix = true;
1543 c->control_group_persistent = -1;
1544 c->ignore_sigpipe = true;
1545 c->timer_slack_nsec = (nsec_t) -1;
1548 void exec_context_done(ExecContext *c) {
1553 strv_free(c->environment);
1554 c->environment = NULL;
1556 strv_free(c->environment_files);
1557 c->environment_files = NULL;
1559 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1561 c->rlimit[l] = NULL;
1564 free(c->working_directory);
1565 c->working_directory = NULL;
1566 free(c->root_directory);
1567 c->root_directory = NULL;
1572 free(c->tcpwrap_name);
1573 c->tcpwrap_name = NULL;
1575 free(c->syslog_identifier);
1576 c->syslog_identifier = NULL;
1584 strv_free(c->supplementary_groups);
1585 c->supplementary_groups = NULL;
1590 if (c->capabilities) {
1591 cap_free(c->capabilities);
1592 c->capabilities = NULL;
1595 strv_free(c->read_only_dirs);
1596 c->read_only_dirs = NULL;
1598 strv_free(c->read_write_dirs);
1599 c->read_write_dirs = NULL;
1601 strv_free(c->inaccessible_dirs);
1602 c->inaccessible_dirs = NULL;
1605 CPU_FREE(c->cpuset);
1610 free(c->syscall_filter);
1611 c->syscall_filter = NULL;
1614 void exec_command_done(ExecCommand *c) {
1624 void exec_command_done_array(ExecCommand *c, unsigned n) {
1627 for (i = 0; i < n; i++)
1628 exec_command_done(c+i);
1631 void exec_command_free_list(ExecCommand *c) {
1635 LIST_REMOVE(ExecCommand, command, c, i);
1636 exec_command_done(i);
1641 void exec_command_free_array(ExecCommand **c, unsigned n) {
1644 for (i = 0; i < n; i++) {
1645 exec_command_free_list(c[i]);
1650 int exec_context_load_environment(const ExecContext *c, char ***l) {
1651 char **i, **r = NULL;
1656 STRV_FOREACH(i, c->environment_files) {
1659 bool ignore = false;
1669 if (!path_is_absolute(fn)) {
1678 if ((k = load_env_file(fn, &p)) < 0) {
1692 m = strv_env_merge(2, r, p);
1708 static void strv_fprintf(FILE *f, char **l) {
1714 fprintf(f, " %s", *g);
1717 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1729 "%sWorkingDirectory: %s\n"
1730 "%sRootDirectory: %s\n"
1731 "%sNonBlocking: %s\n"
1732 "%sPrivateTmp: %s\n"
1733 "%sControlGroupModify: %s\n"
1734 "%sControlGroupPersistent: %s\n"
1735 "%sPrivateNetwork: %s\n"
1736 "%sIgnoreSIGPIPE: %s\n",
1738 prefix, c->working_directory ? c->working_directory : "/",
1739 prefix, c->root_directory ? c->root_directory : "/",
1740 prefix, yes_no(c->non_blocking),
1741 prefix, yes_no(c->private_tmp),
1742 prefix, yes_no(c->control_group_modify),
1743 prefix, yes_no(c->control_group_persistent),
1744 prefix, yes_no(c->private_network),
1745 prefix, yes_no(c->ignore_sigpipe));
1747 STRV_FOREACH(e, c->environment)
1748 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1750 STRV_FOREACH(e, c->environment_files)
1751 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1753 if (c->tcpwrap_name)
1755 "%sTCPWrapName: %s\n",
1756 prefix, c->tcpwrap_name);
1763 if (c->oom_score_adjust_set)
1765 "%sOOMScoreAdjust: %i\n",
1766 prefix, c->oom_score_adjust);
1768 for (i = 0; i < RLIM_NLIMITS; i++)
1770 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1772 if (c->ioprio_set) {
1776 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1780 "%sIOSchedulingClass: %s\n"
1781 "%sIOPriority: %i\n",
1782 prefix, strna(class_str),
1783 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1787 if (c->cpu_sched_set) {
1791 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1795 "%sCPUSchedulingPolicy: %s\n"
1796 "%sCPUSchedulingPriority: %i\n"
1797 "%sCPUSchedulingResetOnFork: %s\n",
1798 prefix, strna(policy_str),
1799 prefix, c->cpu_sched_priority,
1800 prefix, yes_no(c->cpu_sched_reset_on_fork));
1805 fprintf(f, "%sCPUAffinity:", prefix);
1806 for (i = 0; i < c->cpuset_ncpus; i++)
1807 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1808 fprintf(f, " %i", i);
1812 if (c->timer_slack_nsec != (nsec_t) -1)
1813 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1816 "%sStandardInput: %s\n"
1817 "%sStandardOutput: %s\n"
1818 "%sStandardError: %s\n",
1819 prefix, exec_input_to_string(c->std_input),
1820 prefix, exec_output_to_string(c->std_output),
1821 prefix, exec_output_to_string(c->std_error));
1827 "%sTTYVHangup: %s\n"
1828 "%sTTYVTDisallocate: %s\n",
1829 prefix, c->tty_path,
1830 prefix, yes_no(c->tty_reset),
1831 prefix, yes_no(c->tty_vhangup),
1832 prefix, yes_no(c->tty_vt_disallocate));
1834 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1835 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1836 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1837 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1838 char *fac_str, *lvl_str;
1841 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1845 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1850 "%sSyslogFacility: %s\n"
1851 "%sSyslogLevel: %s\n",
1852 prefix, strna(fac_str),
1853 prefix, strna(lvl_str));
1858 if (c->capabilities) {
1860 if ((t = cap_to_text(c->capabilities, NULL))) {
1861 fprintf(f, "%sCapabilities: %s\n",
1868 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1870 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1871 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1872 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1873 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1874 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1875 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1877 if (c->capability_bounding_set_drop) {
1879 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1881 for (l = 0; l <= cap_last_cap(); l++)
1882 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1885 if ((t = cap_to_name(l))) {
1886 fprintf(f, " %s", t);
1895 fprintf(f, "%sUser: %s\n", prefix, c->user);
1897 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1899 if (strv_length(c->supplementary_groups) > 0) {
1900 fprintf(f, "%sSupplementaryGroups:", prefix);
1901 strv_fprintf(f, c->supplementary_groups);
1906 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1908 if (strv_length(c->read_write_dirs) > 0) {
1909 fprintf(f, "%sReadWriteDirs:", prefix);
1910 strv_fprintf(f, c->read_write_dirs);
1914 if (strv_length(c->read_only_dirs) > 0) {
1915 fprintf(f, "%sReadOnlyDirs:", prefix);
1916 strv_fprintf(f, c->read_only_dirs);
1920 if (strv_length(c->inaccessible_dirs) > 0) {
1921 fprintf(f, "%sInaccessibleDirs:", prefix);
1922 strv_fprintf(f, c->inaccessible_dirs);
1928 "%sUtmpIdentifier: %s\n",
1929 prefix, c->utmp_id);
1932 void exec_status_start(ExecStatus *s, pid_t pid) {
1937 dual_timestamp_get(&s->start_timestamp);
1940 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1943 if (s->pid && s->pid != pid)
1947 dual_timestamp_get(&s->exit_timestamp);
1953 if (context->utmp_id)
1954 utmp_put_dead_process(context->utmp_id, pid, code, status);
1956 exec_context_tty_reset(context);
1960 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1961 char buf[FORMAT_TIMESTAMP_MAX];
1974 prefix, (unsigned long) s->pid);
1976 if (s->start_timestamp.realtime > 0)
1978 "%sStart Timestamp: %s\n",
1979 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1981 if (s->exit_timestamp.realtime > 0)
1983 "%sExit Timestamp: %s\n"
1985 "%sExit Status: %i\n",
1986 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1987 prefix, sigchld_code_to_string(s->code),
1991 char *exec_command_line(char **argv) {
1999 STRV_FOREACH(a, argv)
2002 if (!(n = new(char, k)))
2006 STRV_FOREACH(a, argv) {
2013 if (strpbrk(*a, WHITESPACE)) {
2024 /* FIXME: this doesn't really handle arguments that have
2025 * spaces and ticks in them */
2030 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2032 const char *prefix2;
2041 p2 = strappend(prefix, "\t");
2042 prefix2 = p2 ? p2 : prefix;
2044 cmd = exec_command_line(c->argv);
2047 "%sCommand Line: %s\n",
2048 prefix, cmd ? cmd : strerror(ENOMEM));
2052 exec_status_dump(&c->exec_status, f, prefix2);
2057 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2063 LIST_FOREACH(command, c, c)
2064 exec_command_dump(c, f, prefix);
2067 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2074 /* It's kind of important, that we keep the order here */
2075 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2076 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2081 int exec_command_set(ExecCommand *c, const char *path, ...) {
2089 l = strv_new_ap(path, ap);
2095 if (!(p = strdup(path))) {
2109 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2110 [EXEC_INPUT_NULL] = "null",
2111 [EXEC_INPUT_TTY] = "tty",
2112 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2113 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2114 [EXEC_INPUT_SOCKET] = "socket"
2117 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2119 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2120 [EXEC_OUTPUT_INHERIT] = "inherit",
2121 [EXEC_OUTPUT_NULL] = "null",
2122 [EXEC_OUTPUT_TTY] = "tty",
2123 [EXEC_OUTPUT_SYSLOG] = "syslog",
2124 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2125 [EXEC_OUTPUT_KMSG] = "kmsg",
2126 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2127 [EXEC_OUTPUT_JOURNAL] = "journal",
2128 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2129 [EXEC_OUTPUT_SOCKET] = "socket"
2132 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);