1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static int open_null_as(int flags, int nfd) {
173 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
177 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178 close_nointr_nofail(fd);
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
187 union sockaddr_union sa;
190 assert(output < _EXEC_OUTPUT_MAX);
194 fd = socket(AF_UNIX, SOCK_STREAM, 0);
199 sa.un.sun_family = AF_UNIX;
200 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
202 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
204 close_nointr_nofail(fd);
208 if (shutdown(fd, SHUT_RD) < 0) {
209 close_nointr_nofail(fd);
221 context->syslog_identifier ? context->syslog_identifier : ident,
223 context->syslog_priority,
224 !!context->syslog_level_prefix,
225 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231 close_nointr_nofail(fd);
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
243 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
247 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248 close_nointr_nofail(fd);
255 static bool is_terminal_input(ExecInput i) {
257 i == EXEC_INPUT_TTY ||
258 i == EXEC_INPUT_TTY_FORCE ||
259 i == EXEC_INPUT_TTY_FAIL;
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
264 if (is_terminal_input(std_input) && !apply_tty_stdin)
265 return EXEC_INPUT_NULL;
267 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268 return EXEC_INPUT_NULL;
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
275 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276 return EXEC_OUTPUT_INHERIT;
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
286 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
290 case EXEC_INPUT_NULL:
291 return open_null_as(O_RDONLY, STDIN_FILENO);
294 case EXEC_INPUT_TTY_FORCE:
295 case EXEC_INPUT_TTY_FAIL: {
298 if ((fd = acquire_terminal(
300 i == EXEC_INPUT_TTY_FAIL,
301 i == EXEC_INPUT_TTY_FORCE,
306 if (fd != STDIN_FILENO) {
307 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308 close_nointr_nofail(fd);
315 case EXEC_INPUT_SOCKET:
316 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
319 assert_not_reached("Unknown input type");
323 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
331 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
332 o = fixup_output(context->std_output, socket_fd);
334 if (fileno == STDERR_FILENO) {
336 e = fixup_output(context->std_error, socket_fd);
338 /* This expects the input and output are already set up */
340 /* Don't change the stderr file descriptor if we inherit all
341 * the way and are not on a tty */
342 if (e == EXEC_OUTPUT_INHERIT &&
343 o == EXEC_OUTPUT_INHERIT &&
344 i == EXEC_INPUT_NULL &&
345 !is_terminal_input(context->std_input) &&
349 /* Duplicate from stdout if possible */
350 if (e == o || e == EXEC_OUTPUT_INHERIT)
351 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
355 } else if (o == EXEC_OUTPUT_INHERIT) {
356 /* If input got downgraded, inherit the original value */
357 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
358 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
360 /* If the input is connected to anything that's not a /dev/null, inherit that... */
361 if (i != EXEC_INPUT_NULL)
362 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
364 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
368 /* We need to open /dev/null here anew, to get the right access mode. */
369 return open_null_as(O_WRONLY, fileno);
374 case EXEC_OUTPUT_NULL:
375 return open_null_as(O_WRONLY, fileno);
377 case EXEC_OUTPUT_TTY:
378 if (is_terminal_input(i))
379 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
381 /* We don't reset the terminal if this is just about output */
382 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
384 case EXEC_OUTPUT_SYSLOG:
385 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
386 case EXEC_OUTPUT_KMSG:
387 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
388 case EXEC_OUTPUT_JOURNAL:
389 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
390 r = connect_logger_as(context, o, ident, unit_id, fileno);
392 log_error("Failed to connect std%s of %s to the journal socket: %s",
393 fileno == STDOUT_FILENO ? "out" : "err",
394 unit_id, strerror(-r));
395 r = open_null_as(O_WRONLY, fileno);
399 case EXEC_OUTPUT_SOCKET:
400 assert(socket_fd >= 0);
401 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
404 assert_not_reached("Unknown error type");
408 static int chown_terminal(int fd, uid_t uid) {
413 /* This might fail. What matters are the results. */
414 (void) fchown(fd, uid, -1);
415 (void) fchmod(fd, TTY_MODE);
417 if (fstat(fd, &st) < 0)
420 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
426 static int setup_confirm_stdio(int *_saved_stdin,
427 int *_saved_stdout) {
428 int fd = -1, saved_stdin, saved_stdout = -1, r;
430 assert(_saved_stdin);
431 assert(_saved_stdout);
433 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
437 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
438 if (saved_stdout < 0) {
443 fd = acquire_terminal(
448 DEFAULT_CONFIRM_USEC);
454 r = chown_terminal(fd, getuid());
458 if (dup2(fd, STDIN_FILENO) < 0) {
463 if (dup2(fd, STDOUT_FILENO) < 0) {
469 close_nointr_nofail(fd);
471 *_saved_stdin = saved_stdin;
472 *_saved_stdout = saved_stdout;
477 if (saved_stdout >= 0)
478 close_nointr_nofail(saved_stdout);
480 if (saved_stdin >= 0)
481 close_nointr_nofail(saved_stdin);
484 close_nointr_nofail(fd);
489 static int write_confirm_message(const char *format, ...) {
495 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
499 va_start(ap, format);
500 vdprintf(fd, format, ap);
503 close_nointr_nofail(fd);
508 static int restore_confirm_stdio(int *saved_stdin,
514 assert(saved_stdout);
518 if (*saved_stdin >= 0)
519 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
522 if (*saved_stdout >= 0)
523 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
526 if (*saved_stdin >= 0)
527 close_nointr_nofail(*saved_stdin);
529 if (*saved_stdout >= 0)
530 close_nointr_nofail(*saved_stdout);
535 static int ask_for_confirmation(char *response, char **argv) {
536 int saved_stdout = -1, saved_stdin = -1, r;
539 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
543 line = exec_command_line(argv);
547 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
550 restore_confirm_stdio(&saved_stdin, &saved_stdout);
555 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
556 bool keep_groups = false;
561 /* Lookup and set GID and supplementary group list. Here too
562 * we avoid NSS lookups for gid=0. */
564 if (context->group || username) {
566 if (context->group) {
567 const char *g = context->group;
569 if ((r = get_group_creds(&g, &gid)) < 0)
573 /* First step, initialize groups from /etc/groups */
574 if (username && gid != 0) {
575 if (initgroups(username, gid) < 0)
581 /* Second step, set our gids */
582 if (setresgid(gid, gid, gid) < 0)
586 if (context->supplementary_groups) {
591 /* Final step, initialize any manually set supplementary groups */
592 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
594 if (!(gids = new(gid_t, ngroups_max)))
598 if ((k = getgroups(ngroups_max, gids)) < 0) {
605 STRV_FOREACH(i, context->supplementary_groups) {
608 if (k >= ngroups_max) {
614 r = get_group_creds(&g, gids+k);
623 if (setgroups(k, gids) < 0) {
634 static int enforce_user(const ExecContext *context, uid_t uid) {
638 /* Sets (but doesn't lookup) the uid and make sure we keep the
639 * capabilities while doing so. */
641 if (context->capabilities) {
643 static const cap_value_t bits[] = {
644 CAP_SETUID, /* Necessary so that we can run setresuid() below */
645 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
648 /* First step: If we need to keep capabilities but
649 * drop privileges we need to make sure we keep our
650 * caps, whiel we drop privileges. */
652 int sb = context->secure_bits|SECURE_KEEP_CAPS;
654 if (prctl(PR_GET_SECUREBITS) != sb)
655 if (prctl(PR_SET_SECUREBITS, sb) < 0)
659 /* Second step: set the capabilities. This will reduce
660 * the capabilities to the minimum we need. */
662 if (!(d = cap_dup(context->capabilities)))
665 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
666 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
672 if (cap_set_proc(d) < 0) {
681 /* Third step: actually set the uids */
682 if (setresuid(uid, uid, uid) < 0)
685 /* At this point we should have all necessary capabilities but
686 are otherwise a normal user. However, the caps might got
687 corrupted due to the setresuid() so we need clean them up
688 later. This is done outside of this call. */
695 static int null_conv(
697 const struct pam_message **msg,
698 struct pam_response **resp,
701 /* We don't support conversations */
706 static int setup_pam(
712 int fds[], unsigned n_fds) {
714 static const struct pam_conv conv = {
719 pam_handle_t *handle = NULL;
721 int pam_code = PAM_SUCCESS;
724 bool close_session = false;
725 pid_t pam_pid = 0, parent_pid;
731 /* We set up PAM in the parent process, then fork. The child
732 * will then stay around until killed via PR_GET_PDEATHSIG or
733 * systemd via the cgroup logic. It will then remove the PAM
734 * session again. The parent process will exec() the actual
735 * daemon. We do things this way to ensure that the main PID
736 * of the daemon is the one we initially fork()ed. */
738 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
744 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
747 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
750 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
753 close_session = true;
755 if ((!(e = pam_getenvlist(handle)))) {
756 pam_code = PAM_BUF_ERR;
760 /* Block SIGTERM, so that we know that it won't get lost in
762 if (sigemptyset(&ss) < 0 ||
763 sigaddset(&ss, SIGTERM) < 0 ||
764 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
767 parent_pid = getpid();
769 if ((pam_pid = fork()) < 0)
776 /* The child's job is to reset the PAM session on
779 /* This string must fit in 10 chars (i.e. the length
780 * of "/sbin/init"), to look pretty in /bin/ps */
781 rename_process("(sd-pam)");
783 /* Make sure we don't keep open the passed fds in this
784 child. We assume that otherwise only those fds are
785 open here that have been opened by PAM. */
786 close_many(fds, n_fds);
788 /* Drop privileges - we don't need any to pam_close_session
789 * and this will make PR_SET_PDEATHSIG work in most cases.
790 * If this fails, ignore the error - but expect sd-pam threads
791 * to fail to exit normally */
792 if (setresuid(uid, uid, uid) < 0)
793 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
795 /* Wait until our parent died. This will only work if
796 * the above setresuid() succeeds, otherwise the kernel
797 * will not allow unprivileged parents kill their privileged
798 * children this way. We rely on the control groups kill logic
799 * to do the rest for us. */
800 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
803 /* Check if our parent process might already have
805 if (getppid() == parent_pid) {
807 if (sigwait(&ss, &sig) < 0) {
814 assert(sig == SIGTERM);
819 /* If our parent died we'll end the session */
820 if (getppid() != parent_pid)
821 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
827 pam_end(handle, pam_code | PAM_DATA_SILENT);
831 /* If the child was forked off successfully it will do all the
832 * cleanups, so forget about the handle here. */
835 /* Unblock SIGTERM again in the parent */
836 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
839 /* We close the log explicitly here, since the PAM modules
840 * might have opened it, but we don't want this fd around. */
849 if (pam_code != PAM_SUCCESS)
850 err = -EPERM; /* PAM errors do not map to errno */
856 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
858 pam_end(handle, pam_code | PAM_DATA_SILENT);
866 kill(pam_pid, SIGTERM);
867 kill(pam_pid, SIGCONT);
874 static void rename_process_from_path(const char *path) {
875 char process_name[11];
879 /* This resulting string must fit in 10 chars (i.e. the length
880 * of "/sbin/init") to look pretty in /bin/ps */
882 p = path_get_file_name(path);
884 rename_process("(...)");
890 /* The end of the process name is usually more
891 * interesting, since the first bit might just be
897 process_name[0] = '(';
898 memcpy(process_name+1, p, l);
899 process_name[1+l] = ')';
900 process_name[1+l+1] = 0;
902 rename_process(process_name);
905 static int apply_seccomp(uint32_t *syscall_filter) {
906 static const struct sock_filter header[] = {
907 VALIDATE_ARCHITECTURE,
910 static const struct sock_filter footer[] = {
916 struct sock_filter *f;
917 struct sock_fprog prog;
919 assert(syscall_filter);
921 /* First: count the syscalls to check for */
922 for (i = 0, n = 0; i < syscall_max(); i++)
923 if (syscall_filter[i >> 4] & (1 << (i & 31)))
926 /* Second: build the filter program from a header the syscall
927 * matches and the footer */
928 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
929 memcpy(f, header, sizeof(header));
931 for (i = 0, n = 0; i < syscall_max(); i++)
932 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
933 struct sock_filter item[] = {
934 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
935 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
938 assert_cc(ELEMENTSOF(item) == 2);
940 f[ELEMENTSOF(header) + 2*n] = item[0];
941 f[ELEMENTSOF(header) + 2*n+1] = item[1];
946 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
948 /* Third: install the filter */
950 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
952 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
958 int exec_spawn(ExecCommand *command,
960 const ExecContext *context,
961 int fds[], unsigned n_fds,
963 bool apply_permissions,
965 bool apply_tty_stdin,
967 CGroupBonding *cgroup_bondings,
968 CGroupAttribute *cgroup_attributes,
969 const char *cgroup_suffix,
978 char _cleanup_strv_free_ **files_env = NULL;
983 assert(fds || n_fds <= 0);
985 if (context->std_input == EXEC_INPUT_SOCKET ||
986 context->std_output == EXEC_OUTPUT_SOCKET ||
987 context->std_error == EXEC_OUTPUT_SOCKET) {
999 r = exec_context_load_environment(context, &files_env);
1001 log_struct_unit(LOG_ERR,
1003 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1010 argv = command->argv;
1012 line = exec_command_line(argv);
1016 log_struct_unit(LOG_DEBUG,
1018 "MESSAGE=About to execute %s", line,
1022 r = cgroup_bonding_realize_list(cgroup_bondings);
1026 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1035 const char *username = NULL, *home = NULL;
1036 uid_t uid = (uid_t) -1;
1037 gid_t gid = (gid_t) -1;
1038 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1039 **final_env = NULL, **final_argv = NULL;
1041 bool set_access = false;
1045 rename_process_from_path(command->path);
1047 /* We reset exactly these signals, since they are the
1048 * only ones we set to SIG_IGN in the main daemon. All
1049 * others we leave untouched because we set them to
1050 * SIG_DFL or a valid handler initially, both of which
1051 * will be demoted to SIG_DFL. */
1052 default_signals(SIGNALS_CRASH_HANDLER,
1053 SIGNALS_IGNORE, -1);
1055 if (context->ignore_sigpipe)
1056 ignore_signals(SIGPIPE, -1);
1058 assert_se(sigemptyset(&ss) == 0);
1059 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1061 r = EXIT_SIGNAL_MASK;
1066 if (idle_pipe[1] >= 0)
1067 close_nointr_nofail(idle_pipe[1]);
1068 if (idle_pipe[0] >= 0) {
1069 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1070 close_nointr_nofail(idle_pipe[0]);
1074 /* Close sockets very early to make sure we don't
1075 * block init reexecution because it cannot bind its
1078 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1079 socket_fd >= 0 ? 1 : n_fds);
1085 if (!context->same_pgrp)
1092 if (context->tcpwrap_name) {
1094 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1100 for (i = 0; i < (int) n_fds; i++) {
1101 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1109 exec_context_tty_reset(context);
1111 if (confirm_spawn) {
1114 err = ask_for_confirmation(&response, argv);
1115 if (err == -ETIMEDOUT)
1116 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1118 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1119 else if (response == 's') {
1120 write_confirm_message("Skipping execution.\n");
1124 } else if (response == 'n') {
1125 write_confirm_message("Failing execution.\n");
1131 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1132 * must sure to drop O_NONBLOCK */
1134 fd_nonblock(socket_fd, false);
1136 err = setup_input(context, socket_fd, apply_tty_stdin);
1142 err = setup_output(context, STDOUT_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1148 err = setup_output(context, STDERR_FILENO, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1154 if (cgroup_bondings) {
1155 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1162 if (context->oom_score_adjust_set) {
1165 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1168 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1170 r = EXIT_OOM_ADJUST;
1175 if (context->nice_set)
1176 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1182 if (context->cpu_sched_set) {
1183 struct sched_param param;
1186 param.sched_priority = context->cpu_sched_priority;
1188 if (sched_setscheduler(0, context->cpu_sched_policy |
1189 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1191 r = EXIT_SETSCHEDULER;
1196 if (context->cpuset)
1197 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1199 r = EXIT_CPUAFFINITY;
1203 if (context->ioprio_set)
1204 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1210 if (context->timer_slack_nsec != (nsec_t) -1)
1211 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1213 r = EXIT_TIMERSLACK;
1217 if (context->utmp_id)
1218 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1220 if (context->user) {
1221 username = context->user;
1222 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1228 if (is_terminal_input(context->std_input)) {
1229 err = chown_terminal(STDIN_FILENO, uid);
1236 if (cgroup_bondings && context->control_group_modify) {
1237 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1239 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1249 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1250 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1257 if (apply_permissions) {
1258 err = enforce_groups(context, username, gid);
1265 umask(context->umask);
1268 if (apply_permissions && context->pam_name && username) {
1269 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1276 if (context->private_network) {
1277 if (unshare(CLONE_NEWNET) < 0) {
1286 if (strv_length(context->read_write_dirs) > 0 ||
1287 strv_length(context->read_only_dirs) > 0 ||
1288 strv_length(context->inaccessible_dirs) > 0 ||
1289 context->mount_flags != 0 ||
1290 context->private_tmp) {
1291 err = setup_namespace(context->read_write_dirs,
1292 context->read_only_dirs,
1293 context->inaccessible_dirs,
1294 context->private_tmp,
1295 context->mount_flags);
1303 if (context->root_directory)
1304 if (chroot(context->root_directory) < 0) {
1310 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1316 char _cleanup_free_ *d = NULL;
1318 if (asprintf(&d, "%s/%s",
1319 context->root_directory ? context->root_directory : "",
1320 context->working_directory ? context->working_directory : "") < 0) {
1333 /* We repeat the fd closing here, to make sure that
1334 * nothing is leaked from the PAM modules */
1335 err = close_all_fds(fds, n_fds);
1337 err = shift_fds(fds, n_fds);
1339 err = flags_fds(fds, n_fds, context->non_blocking);
1345 if (apply_permissions) {
1347 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1348 if (!context->rlimit[i])
1351 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1358 if (context->capability_bounding_set_drop) {
1359 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1361 r = EXIT_CAPABILITIES;
1366 if (context->user) {
1367 err = enforce_user(context, uid);
1374 /* PR_GET_SECUREBITS is not privileged, while
1375 * PR_SET_SECUREBITS is. So to suppress
1376 * potential EPERMs we'll try not to call
1377 * PR_SET_SECUREBITS unless necessary. */
1378 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1379 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1381 r = EXIT_SECUREBITS;
1385 if (context->capabilities)
1386 if (cap_set_proc(context->capabilities) < 0) {
1388 r = EXIT_CAPABILITIES;
1392 if (context->no_new_privileges)
1393 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1395 r = EXIT_NO_NEW_PRIVILEGES;
1399 if (context->syscall_filter) {
1400 err = apply_seccomp(context->syscall_filter);
1408 if (!(our_env = new0(char*, 7))) {
1415 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1416 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1423 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1430 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1431 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1437 if (is_terminal_input(context->std_input) ||
1438 context->std_output == EXEC_OUTPUT_TTY ||
1439 context->std_error == EXEC_OUTPUT_TTY)
1440 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1448 if (!(final_env = strv_env_merge(
1452 context->environment,
1461 if (!(final_argv = replace_env_argv(argv, final_env))) {
1467 final_env = strv_env_clean(final_env);
1469 execve(command->path, final_argv, final_env);
1476 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1477 "EXECUTABLE=%s", command->path,
1478 "MESSAGE=Failed at step %s spawning %s: %s",
1479 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1480 command->path, strerror(-err),
1489 log_struct_unit(LOG_DEBUG,
1491 "MESSAGE=Forked %s as %lu",
1492 command->path, (unsigned long) pid,
1495 /* We add the new process to the cgroup both in the child (so
1496 * that we can be sure that no user code is ever executed
1497 * outside of the cgroup) and in the parent (so that we can be
1498 * sure that when we kill the cgroup the process will be
1500 if (cgroup_bondings)
1501 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1503 exec_status_start(&command->exec_status, pid);
1509 void exec_context_init(ExecContext *c) {
1513 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1514 c->cpu_sched_policy = SCHED_OTHER;
1515 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1516 c->syslog_level_prefix = true;
1517 c->control_group_persistent = -1;
1518 c->ignore_sigpipe = true;
1519 c->timer_slack_nsec = (nsec_t) -1;
1522 void exec_context_done(ExecContext *c) {
1527 strv_free(c->environment);
1528 c->environment = NULL;
1530 strv_free(c->environment_files);
1531 c->environment_files = NULL;
1533 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1535 c->rlimit[l] = NULL;
1538 free(c->working_directory);
1539 c->working_directory = NULL;
1540 free(c->root_directory);
1541 c->root_directory = NULL;
1546 free(c->tcpwrap_name);
1547 c->tcpwrap_name = NULL;
1549 free(c->syslog_identifier);
1550 c->syslog_identifier = NULL;
1558 strv_free(c->supplementary_groups);
1559 c->supplementary_groups = NULL;
1564 if (c->capabilities) {
1565 cap_free(c->capabilities);
1566 c->capabilities = NULL;
1569 strv_free(c->read_only_dirs);
1570 c->read_only_dirs = NULL;
1572 strv_free(c->read_write_dirs);
1573 c->read_write_dirs = NULL;
1575 strv_free(c->inaccessible_dirs);
1576 c->inaccessible_dirs = NULL;
1579 CPU_FREE(c->cpuset);
1584 free(c->syscall_filter);
1585 c->syscall_filter = NULL;
1588 void exec_command_done(ExecCommand *c) {
1598 void exec_command_done_array(ExecCommand *c, unsigned n) {
1601 for (i = 0; i < n; i++)
1602 exec_command_done(c+i);
1605 void exec_command_free_list(ExecCommand *c) {
1609 LIST_REMOVE(ExecCommand, command, c, i);
1610 exec_command_done(i);
1615 void exec_command_free_array(ExecCommand **c, unsigned n) {
1618 for (i = 0; i < n; i++) {
1619 exec_command_free_list(c[i]);
1624 int exec_context_load_environment(const ExecContext *c, char ***l) {
1625 char **i, **r = NULL;
1630 STRV_FOREACH(i, c->environment_files) {
1633 bool ignore = false;
1645 if (!path_is_absolute(fn)) {
1654 /* Filename supports globbing, take all matching files */
1657 if (glob(fn, 0, NULL, &pglob) != 0) {
1663 return errno ? -errno : -EINVAL;
1665 count = pglob.gl_pathc;
1674 for (n = 0; n < count; n++) {
1675 k = load_env_file(pglob.gl_pathv[n], &p);
1690 m = strv_env_merge(2, r, p);
1710 static void strv_fprintf(FILE *f, char **l) {
1716 fprintf(f, " %s", *g);
1719 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1731 "%sWorkingDirectory: %s\n"
1732 "%sRootDirectory: %s\n"
1733 "%sNonBlocking: %s\n"
1734 "%sPrivateTmp: %s\n"
1735 "%sControlGroupModify: %s\n"
1736 "%sControlGroupPersistent: %s\n"
1737 "%sPrivateNetwork: %s\n"
1738 "%sIgnoreSIGPIPE: %s\n",
1740 prefix, c->working_directory ? c->working_directory : "/",
1741 prefix, c->root_directory ? c->root_directory : "/",
1742 prefix, yes_no(c->non_blocking),
1743 prefix, yes_no(c->private_tmp),
1744 prefix, yes_no(c->control_group_modify),
1745 prefix, yes_no(c->control_group_persistent),
1746 prefix, yes_no(c->private_network),
1747 prefix, yes_no(c->ignore_sigpipe));
1749 STRV_FOREACH(e, c->environment)
1750 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1752 STRV_FOREACH(e, c->environment_files)
1753 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1755 if (c->tcpwrap_name)
1757 "%sTCPWrapName: %s\n",
1758 prefix, c->tcpwrap_name);
1765 if (c->oom_score_adjust_set)
1767 "%sOOMScoreAdjust: %i\n",
1768 prefix, c->oom_score_adjust);
1770 for (i = 0; i < RLIM_NLIMITS; i++)
1772 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1774 if (c->ioprio_set) {
1778 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1782 "%sIOSchedulingClass: %s\n"
1783 "%sIOPriority: %i\n",
1784 prefix, strna(class_str),
1785 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1789 if (c->cpu_sched_set) {
1793 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1797 "%sCPUSchedulingPolicy: %s\n"
1798 "%sCPUSchedulingPriority: %i\n"
1799 "%sCPUSchedulingResetOnFork: %s\n",
1800 prefix, strna(policy_str),
1801 prefix, c->cpu_sched_priority,
1802 prefix, yes_no(c->cpu_sched_reset_on_fork));
1807 fprintf(f, "%sCPUAffinity:", prefix);
1808 for (i = 0; i < c->cpuset_ncpus; i++)
1809 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1810 fprintf(f, " %i", i);
1814 if (c->timer_slack_nsec != (nsec_t) -1)
1815 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1818 "%sStandardInput: %s\n"
1819 "%sStandardOutput: %s\n"
1820 "%sStandardError: %s\n",
1821 prefix, exec_input_to_string(c->std_input),
1822 prefix, exec_output_to_string(c->std_output),
1823 prefix, exec_output_to_string(c->std_error));
1829 "%sTTYVHangup: %s\n"
1830 "%sTTYVTDisallocate: %s\n",
1831 prefix, c->tty_path,
1832 prefix, yes_no(c->tty_reset),
1833 prefix, yes_no(c->tty_vhangup),
1834 prefix, yes_no(c->tty_vt_disallocate));
1836 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1837 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1838 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1839 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1840 char *fac_str, *lvl_str;
1843 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1847 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1852 "%sSyslogFacility: %s\n"
1853 "%sSyslogLevel: %s\n",
1854 prefix, strna(fac_str),
1855 prefix, strna(lvl_str));
1860 if (c->capabilities) {
1862 if ((t = cap_to_text(c->capabilities, NULL))) {
1863 fprintf(f, "%sCapabilities: %s\n",
1870 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1872 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1873 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1874 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1875 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1876 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1877 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1879 if (c->capability_bounding_set_drop) {
1881 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1883 for (l = 0; l <= cap_last_cap(); l++)
1884 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1887 if ((t = cap_to_name(l))) {
1888 fprintf(f, " %s", t);
1897 fprintf(f, "%sUser: %s\n", prefix, c->user);
1899 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1901 if (strv_length(c->supplementary_groups) > 0) {
1902 fprintf(f, "%sSupplementaryGroups:", prefix);
1903 strv_fprintf(f, c->supplementary_groups);
1908 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1910 if (strv_length(c->read_write_dirs) > 0) {
1911 fprintf(f, "%sReadWriteDirs:", prefix);
1912 strv_fprintf(f, c->read_write_dirs);
1916 if (strv_length(c->read_only_dirs) > 0) {
1917 fprintf(f, "%sReadOnlyDirs:", prefix);
1918 strv_fprintf(f, c->read_only_dirs);
1922 if (strv_length(c->inaccessible_dirs) > 0) {
1923 fprintf(f, "%sInaccessibleDirs:", prefix);
1924 strv_fprintf(f, c->inaccessible_dirs);
1930 "%sUtmpIdentifier: %s\n",
1931 prefix, c->utmp_id);
1934 void exec_status_start(ExecStatus *s, pid_t pid) {
1939 dual_timestamp_get(&s->start_timestamp);
1942 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1945 if (s->pid && s->pid != pid)
1949 dual_timestamp_get(&s->exit_timestamp);
1955 if (context->utmp_id)
1956 utmp_put_dead_process(context->utmp_id, pid, code, status);
1958 exec_context_tty_reset(context);
1962 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1963 char buf[FORMAT_TIMESTAMP_MAX];
1976 prefix, (unsigned long) s->pid);
1978 if (s->start_timestamp.realtime > 0)
1980 "%sStart Timestamp: %s\n",
1981 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1983 if (s->exit_timestamp.realtime > 0)
1985 "%sExit Timestamp: %s\n"
1987 "%sExit Status: %i\n",
1988 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1989 prefix, sigchld_code_to_string(s->code),
1993 char *exec_command_line(char **argv) {
2001 STRV_FOREACH(a, argv)
2004 if (!(n = new(char, k)))
2008 STRV_FOREACH(a, argv) {
2015 if (strpbrk(*a, WHITESPACE)) {
2026 /* FIXME: this doesn't really handle arguments that have
2027 * spaces and ticks in them */
2032 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2034 const char *prefix2;
2043 p2 = strappend(prefix, "\t");
2044 prefix2 = p2 ? p2 : prefix;
2046 cmd = exec_command_line(c->argv);
2049 "%sCommand Line: %s\n",
2050 prefix, cmd ? cmd : strerror(ENOMEM));
2054 exec_status_dump(&c->exec_status, f, prefix2);
2059 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2065 LIST_FOREACH(command, c, c)
2066 exec_command_dump(c, f, prefix);
2069 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2076 /* It's kind of important, that we keep the order here */
2077 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2078 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2083 int exec_command_set(ExecCommand *c, const char *path, ...) {
2091 l = strv_new_ap(path, ap);
2097 if (!(p = strdup(path))) {
2111 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2112 [EXEC_INPUT_NULL] = "null",
2113 [EXEC_INPUT_TTY] = "tty",
2114 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2115 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2116 [EXEC_INPUT_SOCKET] = "socket"
2119 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2121 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2122 [EXEC_OUTPUT_INHERIT] = "inherit",
2123 [EXEC_OUTPUT_NULL] = "null",
2124 [EXEC_OUTPUT_TTY] = "tty",
2125 [EXEC_OUTPUT_SYSLOG] = "syslog",
2126 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2127 [EXEC_OUTPUT_KMSG] = "kmsg",
2128 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2129 [EXEC_OUTPUT_JOURNAL] = "journal",
2130 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2131 [EXEC_OUTPUT_SOCKET] = "socket"
2134 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);