1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
69 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
71 /* This assumes there is a 'tty' group */
74 static int shift_fds(int fds[], unsigned n_fds) {
75 int start, restart_from;
80 /* Modifies the fds array! (sorts it) */
90 for (i = start; i < (int) n_fds; i++) {
93 /* Already at right index? */
97 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
100 close_nointr_nofail(fds[i]);
103 /* Hmm, the fd we wanted isn't free? Then
104 * let's remember that and try again from here*/
105 if (nfd != i+3 && restart_from < 0)
109 if (restart_from < 0)
112 start = restart_from;
118 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
127 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
129 for (i = 0; i < n_fds; i++) {
131 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
134 /* We unconditionally drop FD_CLOEXEC from the fds,
135 * since after all we want to pass these fds to our
138 if ((r = fd_cloexec(fds[i], false)) < 0)
145 static const char *tty_path(const ExecContext *context) {
148 if (context->tty_path)
149 return context->tty_path;
151 return "/dev/console";
154 void exec_context_tty_reset(const ExecContext *context) {
157 if (context->tty_vhangup)
158 terminal_vhangup(tty_path(context));
160 if (context->tty_reset)
161 reset_terminal(tty_path(context));
163 if (context->tty_vt_disallocate && context->tty_path)
164 vt_disallocate(context->tty_path);
167 static int open_null_as(int flags, int nfd) {
172 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
176 r = dup2(fd, nfd) < 0 ? -errno : nfd;
177 close_nointr_nofail(fd);
184 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
186 union sockaddr_union sa;
189 assert(output < _EXEC_OUTPUT_MAX);
193 fd = socket(AF_UNIX, SOCK_STREAM, 0);
198 sa.un.sun_family = AF_UNIX;
199 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
201 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
203 close_nointr_nofail(fd);
207 if (shutdown(fd, SHUT_RD) < 0) {
208 close_nointr_nofail(fd);
220 context->syslog_identifier ? context->syslog_identifier : ident,
222 context->syslog_priority,
223 !!context->syslog_level_prefix,
224 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
225 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
226 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
229 r = dup2(fd, nfd) < 0 ? -errno : nfd;
230 close_nointr_nofail(fd);
236 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
242 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
246 r = dup2(fd, nfd) < 0 ? -errno : nfd;
247 close_nointr_nofail(fd);
254 static bool is_terminal_input(ExecInput i) {
256 i == EXEC_INPUT_TTY ||
257 i == EXEC_INPUT_TTY_FORCE ||
258 i == EXEC_INPUT_TTY_FAIL;
261 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
263 if (is_terminal_input(std_input) && !apply_tty_stdin)
264 return EXEC_INPUT_NULL;
266 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
267 return EXEC_INPUT_NULL;
272 static int fixup_output(ExecOutput std_output, int socket_fd) {
274 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
275 return EXEC_OUTPUT_INHERIT;
280 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
285 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
289 case EXEC_INPUT_NULL:
290 return open_null_as(O_RDONLY, STDIN_FILENO);
293 case EXEC_INPUT_TTY_FORCE:
294 case EXEC_INPUT_TTY_FAIL: {
297 if ((fd = acquire_terminal(
299 i == EXEC_INPUT_TTY_FAIL,
300 i == EXEC_INPUT_TTY_FORCE,
305 if (fd != STDIN_FILENO) {
306 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
307 close_nointr_nofail(fd);
314 case EXEC_INPUT_SOCKET:
315 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
318 assert_not_reached("Unknown input type");
322 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
329 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
330 o = fixup_output(context->std_output, socket_fd);
332 /* This expects the input is already set up */
336 case EXEC_OUTPUT_INHERIT:
338 /* If input got downgraded, inherit the original value */
339 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
340 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
342 /* If the input is connected to anything that's not a /dev/null, inherit that... */
343 if (i != EXEC_INPUT_NULL)
344 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
346 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
348 return STDOUT_FILENO;
350 /* We need to open /dev/null here anew, to get the
351 * right access mode. So we fall through */
353 case EXEC_OUTPUT_NULL:
354 return open_null_as(O_WRONLY, STDOUT_FILENO);
356 case EXEC_OUTPUT_TTY:
357 if (is_terminal_input(i))
358 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
360 /* We don't reset the terminal if this is just about output */
361 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
363 case EXEC_OUTPUT_SYSLOG:
364 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
365 case EXEC_OUTPUT_KMSG:
366 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
367 case EXEC_OUTPUT_JOURNAL:
368 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
369 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
371 case EXEC_OUTPUT_SOCKET:
372 assert(socket_fd >= 0);
373 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
376 assert_not_reached("Unknown output type");
380 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
387 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
388 o = fixup_output(context->std_output, socket_fd);
389 e = fixup_output(context->std_error, socket_fd);
391 /* This expects the input and output are already set up */
393 /* Don't change the stderr file descriptor if we inherit all
394 * the way and are not on a tty */
395 if (e == EXEC_OUTPUT_INHERIT &&
396 o == EXEC_OUTPUT_INHERIT &&
397 i == EXEC_INPUT_NULL &&
398 !is_terminal_input(context->std_input) &&
400 return STDERR_FILENO;
402 /* Duplicate from stdout if possible */
403 if (e == o || e == EXEC_OUTPUT_INHERIT)
404 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
408 case EXEC_OUTPUT_NULL:
409 return open_null_as(O_WRONLY, STDERR_FILENO);
411 case EXEC_OUTPUT_TTY:
412 if (is_terminal_input(i))
413 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
415 /* We don't reset the terminal if this is just about output */
416 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
418 case EXEC_OUTPUT_SYSLOG:
419 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420 case EXEC_OUTPUT_KMSG:
421 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422 case EXEC_OUTPUT_JOURNAL:
423 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
426 case EXEC_OUTPUT_SOCKET:
427 assert(socket_fd >= 0);
428 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
431 assert_not_reached("Unknown error type");
435 static int chown_terminal(int fd, uid_t uid) {
440 /* This might fail. What matters are the results. */
441 (void) fchown(fd, uid, -1);
442 (void) fchmod(fd, TTY_MODE);
444 if (fstat(fd, &st) < 0)
447 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
453 static int setup_confirm_stdio(int *_saved_stdin,
454 int *_saved_stdout) {
455 int fd = -1, saved_stdin, saved_stdout = -1, r;
457 assert(_saved_stdin);
458 assert(_saved_stdout);
460 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
464 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
465 if (saved_stdout < 0) {
470 fd = acquire_terminal(
475 DEFAULT_CONFIRM_USEC);
481 r = chown_terminal(fd, getuid());
485 if (dup2(fd, STDIN_FILENO) < 0) {
490 if (dup2(fd, STDOUT_FILENO) < 0) {
496 close_nointr_nofail(fd);
498 *_saved_stdin = saved_stdin;
499 *_saved_stdout = saved_stdout;
504 if (saved_stdout >= 0)
505 close_nointr_nofail(saved_stdout);
507 if (saved_stdin >= 0)
508 close_nointr_nofail(saved_stdin);
511 close_nointr_nofail(fd);
516 static int write_confirm_message(const char *format, ...) {
522 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
526 va_start(ap, format);
527 vdprintf(fd, format, ap);
530 close_nointr_nofail(fd);
535 static int restore_confirm_stdio(int *saved_stdin,
541 assert(saved_stdout);
545 if (*saved_stdin >= 0)
546 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
549 if (*saved_stdout >= 0)
550 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
553 if (*saved_stdin >= 0)
554 close_nointr_nofail(*saved_stdin);
556 if (*saved_stdout >= 0)
557 close_nointr_nofail(*saved_stdout);
562 static int ask_for_confirmation(char *response, char **argv) {
563 int saved_stdout = -1, saved_stdin = -1, r;
566 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570 line = exec_command_line(argv);
574 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
577 restore_confirm_stdio(&saved_stdin, &saved_stdout);
582 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
583 bool keep_groups = false;
588 /* Lookup and set GID and supplementary group list. Here too
589 * we avoid NSS lookups for gid=0. */
591 if (context->group || username) {
593 if (context->group) {
594 const char *g = context->group;
596 if ((r = get_group_creds(&g, &gid)) < 0)
600 /* First step, initialize groups from /etc/groups */
601 if (username && gid != 0) {
602 if (initgroups(username, gid) < 0)
608 /* Second step, set our gids */
609 if (setresgid(gid, gid, gid) < 0)
613 if (context->supplementary_groups) {
618 /* Final step, initialize any manually set supplementary groups */
619 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
621 if (!(gids = new(gid_t, ngroups_max)))
625 if ((k = getgroups(ngroups_max, gids)) < 0) {
632 STRV_FOREACH(i, context->supplementary_groups) {
635 if (k >= ngroups_max) {
641 r = get_group_creds(&g, gids+k);
650 if (setgroups(k, gids) < 0) {
661 static int enforce_user(const ExecContext *context, uid_t uid) {
665 /* Sets (but doesn't lookup) the uid and make sure we keep the
666 * capabilities while doing so. */
668 if (context->capabilities) {
670 static const cap_value_t bits[] = {
671 CAP_SETUID, /* Necessary so that we can run setresuid() below */
672 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
675 /* First step: If we need to keep capabilities but
676 * drop privileges we need to make sure we keep our
677 * caps, whiel we drop privileges. */
679 int sb = context->secure_bits|SECURE_KEEP_CAPS;
681 if (prctl(PR_GET_SECUREBITS) != sb)
682 if (prctl(PR_SET_SECUREBITS, sb) < 0)
686 /* Second step: set the capabilities. This will reduce
687 * the capabilities to the minimum we need. */
689 if (!(d = cap_dup(context->capabilities)))
692 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
693 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
699 if (cap_set_proc(d) < 0) {
708 /* Third step: actually set the uids */
709 if (setresuid(uid, uid, uid) < 0)
712 /* At this point we should have all necessary capabilities but
713 are otherwise a normal user. However, the caps might got
714 corrupted due to the setresuid() so we need clean them up
715 later. This is done outside of this call. */
722 static int null_conv(
724 const struct pam_message **msg,
725 struct pam_response **resp,
728 /* We don't support conversations */
733 static int setup_pam(
739 int fds[], unsigned n_fds) {
741 static const struct pam_conv conv = {
746 pam_handle_t *handle = NULL;
748 int pam_code = PAM_SUCCESS;
751 bool close_session = false;
752 pid_t pam_pid = 0, parent_pid;
758 /* We set up PAM in the parent process, then fork. The child
759 * will then stay around until killed via PR_GET_PDEATHSIG or
760 * systemd via the cgroup logic. It will then remove the PAM
761 * session again. The parent process will exec() the actual
762 * daemon. We do things this way to ensure that the main PID
763 * of the daemon is the one we initially fork()ed. */
765 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
771 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
774 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
777 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
780 close_session = true;
782 if ((!(e = pam_getenvlist(handle)))) {
783 pam_code = PAM_BUF_ERR;
787 /* Block SIGTERM, so that we know that it won't get lost in
789 if (sigemptyset(&ss) < 0 ||
790 sigaddset(&ss, SIGTERM) < 0 ||
791 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
794 parent_pid = getpid();
796 if ((pam_pid = fork()) < 0)
803 /* The child's job is to reset the PAM session on
806 /* This string must fit in 10 chars (i.e. the length
807 * of "/sbin/init"), to look pretty in /bin/ps */
808 rename_process("(sd-pam)");
810 /* Make sure we don't keep open the passed fds in this
811 child. We assume that otherwise only those fds are
812 open here that have been opened by PAM. */
813 close_many(fds, n_fds);
815 /* Drop privileges - we don't need any to pam_close_session
816 * and this will make PR_SET_PDEATHSIG work in most cases.
817 * If this fails, ignore the error - but expect sd-pam threads
818 * to fail to exit normally */
819 if (setresuid(uid, uid, uid) < 0)
820 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
822 /* Wait until our parent died. This will only work if
823 * the above setresuid() succeeds, otherwise the kernel
824 * will not allow unprivileged parents kill their privileged
825 * children this way. We rely on the control groups kill logic
826 * to do the rest for us. */
827 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
830 /* Check if our parent process might already have
832 if (getppid() == parent_pid) {
834 if (sigwait(&ss, &sig) < 0) {
841 assert(sig == SIGTERM);
846 /* If our parent died we'll end the session */
847 if (getppid() != parent_pid)
848 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
854 pam_end(handle, pam_code | PAM_DATA_SILENT);
858 /* If the child was forked off successfully it will do all the
859 * cleanups, so forget about the handle here. */
862 /* Unblock SIGTERM again in the parent */
863 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
866 /* We close the log explicitly here, since the PAM modules
867 * might have opened it, but we don't want this fd around. */
876 if (pam_code != PAM_SUCCESS)
877 err = -EPERM; /* PAM errors do not map to errno */
883 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
885 pam_end(handle, pam_code | PAM_DATA_SILENT);
893 kill(pam_pid, SIGTERM);
894 kill(pam_pid, SIGCONT);
901 static void rename_process_from_path(const char *path) {
902 char process_name[11];
906 /* This resulting string must fit in 10 chars (i.e. the length
907 * of "/sbin/init") to look pretty in /bin/ps */
909 p = path_get_file_name(path);
911 rename_process("(...)");
917 /* The end of the process name is usually more
918 * interesting, since the first bit might just be
924 process_name[0] = '(';
925 memcpy(process_name+1, p, l);
926 process_name[1+l] = ')';
927 process_name[1+l+1] = 0;
929 rename_process(process_name);
932 static int apply_seccomp(uint32_t *syscall_filter) {
933 static const struct sock_filter header[] = {
934 VALIDATE_ARCHITECTURE,
937 static const struct sock_filter footer[] = {
943 struct sock_filter *f;
944 struct sock_fprog prog;
946 assert(syscall_filter);
948 /* First: count the syscalls to check for */
949 for (i = 0, n = 0; i < syscall_max(); i++)
950 if (syscall_filter[i >> 4] & (1 << (i & 31)))
953 /* Second: build the filter program from a header the syscall
954 * matches and the footer */
955 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
956 memcpy(f, header, sizeof(header));
958 for (i = 0, n = 0; i < syscall_max(); i++)
959 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
960 struct sock_filter item[] = {
961 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
962 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
965 assert_cc(ELEMENTSOF(item) == 2);
967 f[ELEMENTSOF(header) + 2*n] = item[0];
968 f[ELEMENTSOF(header) + 2*n+1] = item[1];
973 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
975 /* Third: install the filter */
977 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
979 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
985 int exec_spawn(ExecCommand *command,
987 const ExecContext *context,
988 int fds[], unsigned n_fds,
990 bool apply_permissions,
992 bool apply_tty_stdin,
994 CGroupBonding *cgroup_bondings,
995 CGroupAttribute *cgroup_attributes,
996 const char *cgroup_suffix,
1005 char _cleanup_strv_free_ **files_env = NULL;
1010 assert(fds || n_fds <= 0);
1012 if (context->std_input == EXEC_INPUT_SOCKET ||
1013 context->std_output == EXEC_OUTPUT_SOCKET ||
1014 context->std_error == EXEC_OUTPUT_SOCKET) {
1026 r = exec_context_load_environment(context, &files_env);
1028 log_struct_unit(LOG_ERR,
1030 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1037 argv = command->argv;
1039 line = exec_command_line(argv);
1043 log_struct_unit(LOG_DEBUG,
1045 "MESSAGE=About to execute %s", line,
1049 r = cgroup_bonding_realize_list(cgroup_bondings);
1053 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1062 const char *username = NULL, *home = NULL;
1063 uid_t uid = (uid_t) -1;
1064 gid_t gid = (gid_t) -1;
1065 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1066 **final_env = NULL, **final_argv = NULL;
1068 bool set_access = false;
1072 rename_process_from_path(command->path);
1074 /* We reset exactly these signals, since they are the
1075 * only ones we set to SIG_IGN in the main daemon. All
1076 * others we leave untouched because we set them to
1077 * SIG_DFL or a valid handler initially, both of which
1078 * will be demoted to SIG_DFL. */
1079 default_signals(SIGNALS_CRASH_HANDLER,
1080 SIGNALS_IGNORE, -1);
1082 if (context->ignore_sigpipe)
1083 ignore_signals(SIGPIPE, -1);
1085 assert_se(sigemptyset(&ss) == 0);
1086 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1088 r = EXIT_SIGNAL_MASK;
1093 if (idle_pipe[1] >= 0)
1094 close_nointr_nofail(idle_pipe[1]);
1095 if (idle_pipe[0] >= 0) {
1096 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1097 close_nointr_nofail(idle_pipe[0]);
1101 /* Close sockets very early to make sure we don't
1102 * block init reexecution because it cannot bind its
1105 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1106 socket_fd >= 0 ? 1 : n_fds);
1112 if (!context->same_pgrp)
1119 if (context->tcpwrap_name) {
1121 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1127 for (i = 0; i < (int) n_fds; i++) {
1128 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1136 exec_context_tty_reset(context);
1138 if (confirm_spawn) {
1141 err = ask_for_confirmation(&response, argv);
1142 if (err == -ETIMEDOUT)
1143 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1145 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1146 else if (response == 's') {
1147 write_confirm_message("Skipping execution.\n");
1151 } else if (response == 'n') {
1152 write_confirm_message("Failing execution.\n");
1158 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1159 * must sure to drop O_NONBLOCK */
1161 fd_nonblock(socket_fd, false);
1163 err = setup_input(context, socket_fd, apply_tty_stdin);
1169 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1175 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1181 if (cgroup_bondings) {
1182 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1189 if (context->oom_score_adjust_set) {
1192 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1195 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1197 r = EXIT_OOM_ADJUST;
1202 if (context->nice_set)
1203 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1209 if (context->cpu_sched_set) {
1210 struct sched_param param;
1213 param.sched_priority = context->cpu_sched_priority;
1215 if (sched_setscheduler(0, context->cpu_sched_policy |
1216 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1218 r = EXIT_SETSCHEDULER;
1223 if (context->cpuset)
1224 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1226 r = EXIT_CPUAFFINITY;
1230 if (context->ioprio_set)
1231 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1237 if (context->timer_slack_nsec != (nsec_t) -1)
1238 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1240 r = EXIT_TIMERSLACK;
1244 if (context->utmp_id)
1245 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1247 if (context->user) {
1248 username = context->user;
1249 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1255 if (is_terminal_input(context->std_input)) {
1256 err = chown_terminal(STDIN_FILENO, uid);
1263 if (cgroup_bondings && context->control_group_modify) {
1264 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1266 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1276 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1277 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1284 if (apply_permissions) {
1285 err = enforce_groups(context, username, gid);
1292 umask(context->umask);
1295 if (apply_permissions && context->pam_name && username) {
1296 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1303 if (context->private_network) {
1304 if (unshare(CLONE_NEWNET) < 0) {
1313 if (strv_length(context->read_write_dirs) > 0 ||
1314 strv_length(context->read_only_dirs) > 0 ||
1315 strv_length(context->inaccessible_dirs) > 0 ||
1316 context->mount_flags != 0 ||
1317 context->private_tmp) {
1318 err = setup_namespace(context->read_write_dirs,
1319 context->read_only_dirs,
1320 context->inaccessible_dirs,
1321 context->private_tmp,
1322 context->mount_flags);
1330 if (context->root_directory)
1331 if (chroot(context->root_directory) < 0) {
1337 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1343 char _cleanup_free_ *d = NULL;
1345 if (asprintf(&d, "%s/%s",
1346 context->root_directory ? context->root_directory : "",
1347 context->working_directory ? context->working_directory : "") < 0) {
1360 /* We repeat the fd closing here, to make sure that
1361 * nothing is leaked from the PAM modules */
1362 err = close_all_fds(fds, n_fds);
1364 err = shift_fds(fds, n_fds);
1366 err = flags_fds(fds, n_fds, context->non_blocking);
1372 if (apply_permissions) {
1374 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1375 if (!context->rlimit[i])
1378 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1385 if (context->capability_bounding_set_drop) {
1386 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1388 r = EXIT_CAPABILITIES;
1393 if (context->user) {
1394 err = enforce_user(context, uid);
1401 /* PR_GET_SECUREBITS is not privileged, while
1402 * PR_SET_SECUREBITS is. So to suppress
1403 * potential EPERMs we'll try not to call
1404 * PR_SET_SECUREBITS unless necessary. */
1405 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1406 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1408 r = EXIT_SECUREBITS;
1412 if (context->capabilities)
1413 if (cap_set_proc(context->capabilities) < 0) {
1415 r = EXIT_CAPABILITIES;
1419 if (context->no_new_privileges)
1420 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1422 r = EXIT_NO_NEW_PRIVILEGES;
1426 if (context->syscall_filter) {
1427 err = apply_seccomp(context->syscall_filter);
1435 if (!(our_env = new0(char*, 7))) {
1442 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1443 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1450 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1457 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1458 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1464 if (is_terminal_input(context->std_input) ||
1465 context->std_output == EXEC_OUTPUT_TTY ||
1466 context->std_error == EXEC_OUTPUT_TTY)
1467 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1475 if (!(final_env = strv_env_merge(
1479 context->environment,
1488 if (!(final_argv = replace_env_argv(argv, final_env))) {
1494 final_env = strv_env_clean(final_env);
1496 execve(command->path, final_argv, final_env);
1503 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1504 "EXECUTABLE=%s", command->path,
1505 "MESSAGE=Failed at step %s spawning %s: %s",
1506 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1507 command->path, strerror(-err),
1516 log_struct_unit(LOG_DEBUG,
1518 "MESSAGE=Forked %s as %lu",
1519 command->path, (unsigned long) pid,
1522 /* We add the new process to the cgroup both in the child (so
1523 * that we can be sure that no user code is ever executed
1524 * outside of the cgroup) and in the parent (so that we can be
1525 * sure that when we kill the cgroup the process will be
1527 if (cgroup_bondings)
1528 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1530 exec_status_start(&command->exec_status, pid);
1536 void exec_context_init(ExecContext *c) {
1540 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1541 c->cpu_sched_policy = SCHED_OTHER;
1542 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1543 c->syslog_level_prefix = true;
1544 c->control_group_persistent = -1;
1545 c->ignore_sigpipe = true;
1546 c->timer_slack_nsec = (nsec_t) -1;
1549 void exec_context_done(ExecContext *c) {
1554 strv_free(c->environment);
1555 c->environment = NULL;
1557 strv_free(c->environment_files);
1558 c->environment_files = NULL;
1560 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1562 c->rlimit[l] = NULL;
1565 free(c->working_directory);
1566 c->working_directory = NULL;
1567 free(c->root_directory);
1568 c->root_directory = NULL;
1573 free(c->tcpwrap_name);
1574 c->tcpwrap_name = NULL;
1576 free(c->syslog_identifier);
1577 c->syslog_identifier = NULL;
1585 strv_free(c->supplementary_groups);
1586 c->supplementary_groups = NULL;
1591 if (c->capabilities) {
1592 cap_free(c->capabilities);
1593 c->capabilities = NULL;
1596 strv_free(c->read_only_dirs);
1597 c->read_only_dirs = NULL;
1599 strv_free(c->read_write_dirs);
1600 c->read_write_dirs = NULL;
1602 strv_free(c->inaccessible_dirs);
1603 c->inaccessible_dirs = NULL;
1606 CPU_FREE(c->cpuset);
1611 free(c->syscall_filter);
1612 c->syscall_filter = NULL;
1615 void exec_command_done(ExecCommand *c) {
1625 void exec_command_done_array(ExecCommand *c, unsigned n) {
1628 for (i = 0; i < n; i++)
1629 exec_command_done(c+i);
1632 void exec_command_free_list(ExecCommand *c) {
1636 LIST_REMOVE(ExecCommand, command, c, i);
1637 exec_command_done(i);
1642 void exec_command_free_array(ExecCommand **c, unsigned n) {
1645 for (i = 0; i < n; i++) {
1646 exec_command_free_list(c[i]);
1651 int exec_context_load_environment(const ExecContext *c, char ***l) {
1652 char **i, **r = NULL;
1657 STRV_FOREACH(i, c->environment_files) {
1660 bool ignore = false;
1672 if (!path_is_absolute(fn)) {
1681 /* Filename supports globbing, take all matching files */
1684 if (glob(fn, 0, NULL, &pglob) != 0) {
1690 return errno ? -errno : -EINVAL;
1692 count = pglob.gl_pathc;
1701 for (n = 0; n < count; n++) {
1702 k = load_env_file(pglob.gl_pathv[n], &p);
1717 m = strv_env_merge(2, r, p);
1737 static void strv_fprintf(FILE *f, char **l) {
1743 fprintf(f, " %s", *g);
1746 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1758 "%sWorkingDirectory: %s\n"
1759 "%sRootDirectory: %s\n"
1760 "%sNonBlocking: %s\n"
1761 "%sPrivateTmp: %s\n"
1762 "%sControlGroupModify: %s\n"
1763 "%sControlGroupPersistent: %s\n"
1764 "%sPrivateNetwork: %s\n"
1765 "%sIgnoreSIGPIPE: %s\n",
1767 prefix, c->working_directory ? c->working_directory : "/",
1768 prefix, c->root_directory ? c->root_directory : "/",
1769 prefix, yes_no(c->non_blocking),
1770 prefix, yes_no(c->private_tmp),
1771 prefix, yes_no(c->control_group_modify),
1772 prefix, yes_no(c->control_group_persistent),
1773 prefix, yes_no(c->private_network),
1774 prefix, yes_no(c->ignore_sigpipe));
1776 STRV_FOREACH(e, c->environment)
1777 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1779 STRV_FOREACH(e, c->environment_files)
1780 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1782 if (c->tcpwrap_name)
1784 "%sTCPWrapName: %s\n",
1785 prefix, c->tcpwrap_name);
1792 if (c->oom_score_adjust_set)
1794 "%sOOMScoreAdjust: %i\n",
1795 prefix, c->oom_score_adjust);
1797 for (i = 0; i < RLIM_NLIMITS; i++)
1799 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1801 if (c->ioprio_set) {
1805 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1809 "%sIOSchedulingClass: %s\n"
1810 "%sIOPriority: %i\n",
1811 prefix, strna(class_str),
1812 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1816 if (c->cpu_sched_set) {
1820 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1824 "%sCPUSchedulingPolicy: %s\n"
1825 "%sCPUSchedulingPriority: %i\n"
1826 "%sCPUSchedulingResetOnFork: %s\n",
1827 prefix, strna(policy_str),
1828 prefix, c->cpu_sched_priority,
1829 prefix, yes_no(c->cpu_sched_reset_on_fork));
1834 fprintf(f, "%sCPUAffinity:", prefix);
1835 for (i = 0; i < c->cpuset_ncpus; i++)
1836 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1837 fprintf(f, " %i", i);
1841 if (c->timer_slack_nsec != (nsec_t) -1)
1842 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1845 "%sStandardInput: %s\n"
1846 "%sStandardOutput: %s\n"
1847 "%sStandardError: %s\n",
1848 prefix, exec_input_to_string(c->std_input),
1849 prefix, exec_output_to_string(c->std_output),
1850 prefix, exec_output_to_string(c->std_error));
1856 "%sTTYVHangup: %s\n"
1857 "%sTTYVTDisallocate: %s\n",
1858 prefix, c->tty_path,
1859 prefix, yes_no(c->tty_reset),
1860 prefix, yes_no(c->tty_vhangup),
1861 prefix, yes_no(c->tty_vt_disallocate));
1863 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1864 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1865 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1866 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1867 char *fac_str, *lvl_str;
1870 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1874 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1879 "%sSyslogFacility: %s\n"
1880 "%sSyslogLevel: %s\n",
1881 prefix, strna(fac_str),
1882 prefix, strna(lvl_str));
1887 if (c->capabilities) {
1889 if ((t = cap_to_text(c->capabilities, NULL))) {
1890 fprintf(f, "%sCapabilities: %s\n",
1897 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1899 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1900 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1901 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1902 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1903 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1904 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1906 if (c->capability_bounding_set_drop) {
1908 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1910 for (l = 0; l <= cap_last_cap(); l++)
1911 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1914 if ((t = cap_to_name(l))) {
1915 fprintf(f, " %s", t);
1924 fprintf(f, "%sUser: %s\n", prefix, c->user);
1926 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1928 if (strv_length(c->supplementary_groups) > 0) {
1929 fprintf(f, "%sSupplementaryGroups:", prefix);
1930 strv_fprintf(f, c->supplementary_groups);
1935 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1937 if (strv_length(c->read_write_dirs) > 0) {
1938 fprintf(f, "%sReadWriteDirs:", prefix);
1939 strv_fprintf(f, c->read_write_dirs);
1943 if (strv_length(c->read_only_dirs) > 0) {
1944 fprintf(f, "%sReadOnlyDirs:", prefix);
1945 strv_fprintf(f, c->read_only_dirs);
1949 if (strv_length(c->inaccessible_dirs) > 0) {
1950 fprintf(f, "%sInaccessibleDirs:", prefix);
1951 strv_fprintf(f, c->inaccessible_dirs);
1957 "%sUtmpIdentifier: %s\n",
1958 prefix, c->utmp_id);
1961 void exec_status_start(ExecStatus *s, pid_t pid) {
1966 dual_timestamp_get(&s->start_timestamp);
1969 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1972 if (s->pid && s->pid != pid)
1976 dual_timestamp_get(&s->exit_timestamp);
1982 if (context->utmp_id)
1983 utmp_put_dead_process(context->utmp_id, pid, code, status);
1985 exec_context_tty_reset(context);
1989 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1990 char buf[FORMAT_TIMESTAMP_MAX];
2003 prefix, (unsigned long) s->pid);
2005 if (s->start_timestamp.realtime > 0)
2007 "%sStart Timestamp: %s\n",
2008 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2010 if (s->exit_timestamp.realtime > 0)
2012 "%sExit Timestamp: %s\n"
2014 "%sExit Status: %i\n",
2015 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2016 prefix, sigchld_code_to_string(s->code),
2020 char *exec_command_line(char **argv) {
2028 STRV_FOREACH(a, argv)
2031 if (!(n = new(char, k)))
2035 STRV_FOREACH(a, argv) {
2042 if (strpbrk(*a, WHITESPACE)) {
2053 /* FIXME: this doesn't really handle arguments that have
2054 * spaces and ticks in them */
2059 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2061 const char *prefix2;
2070 p2 = strappend(prefix, "\t");
2071 prefix2 = p2 ? p2 : prefix;
2073 cmd = exec_command_line(c->argv);
2076 "%sCommand Line: %s\n",
2077 prefix, cmd ? cmd : strerror(ENOMEM));
2081 exec_status_dump(&c->exec_status, f, prefix2);
2086 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2092 LIST_FOREACH(command, c, c)
2093 exec_command_dump(c, f, prefix);
2096 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2103 /* It's kind of important, that we keep the order here */
2104 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2105 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2110 int exec_command_set(ExecCommand *c, const char *path, ...) {
2118 l = strv_new_ap(path, ap);
2124 if (!(p = strdup(path))) {
2138 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2139 [EXEC_INPUT_NULL] = "null",
2140 [EXEC_INPUT_TTY] = "tty",
2141 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2142 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2143 [EXEC_INPUT_SOCKET] = "socket"
2146 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2148 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2149 [EXEC_OUTPUT_INHERIT] = "inherit",
2150 [EXEC_OUTPUT_NULL] = "null",
2151 [EXEC_OUTPUT_TTY] = "tty",
2152 [EXEC_OUTPUT_SYSLOG] = "syslog",
2153 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2154 [EXEC_OUTPUT_KMSG] = "kmsg",
2155 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2156 [EXEC_OUTPUT_JOURNAL] = "journal",
2157 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2158 [EXEC_OUTPUT_SOCKET] = "socket"
2161 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);