1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
41 #include <linux/seccomp-bpf.h>
45 #include <security/pam_appl.h>
51 #include "capability.h"
54 #include "sd-messages.h"
56 #include "securebits.h"
58 #include "namespace.h"
60 #include "exit-status.h"
62 #include "utmp-wtmp.h"
64 #include "loopback-setup.h"
65 #include "path-util.h"
66 #include "syscall-list.h"
70 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
72 /* This assumes there is a 'tty' group */
75 static int shift_fds(int fds[], unsigned n_fds) {
76 int start, restart_from;
81 /* Modifies the fds array! (sorts it) */
91 for (i = start; i < (int) n_fds; i++) {
94 /* Already at right index? */
98 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
101 close_nointr_nofail(fds[i]);
104 /* Hmm, the fd we wanted isn't free? Then
105 * let's remember that and try again from here*/
106 if (nfd != i+3 && restart_from < 0)
110 if (restart_from < 0)
113 start = restart_from;
119 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
128 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
130 for (i = 0; i < n_fds; i++) {
132 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
135 /* We unconditionally drop FD_CLOEXEC from the fds,
136 * since after all we want to pass these fds to our
139 if ((r = fd_cloexec(fds[i], false)) < 0)
146 static const char *tty_path(const ExecContext *context) {
149 if (context->tty_path)
150 return context->tty_path;
152 return "/dev/console";
155 void exec_context_tty_reset(const ExecContext *context) {
158 if (context->tty_vhangup)
159 terminal_vhangup(tty_path(context));
161 if (context->tty_reset)
162 reset_terminal(tty_path(context));
164 if (context->tty_vt_disallocate && context->tty_path)
165 vt_disallocate(context->tty_path);
168 static int open_null_as(int flags, int nfd) {
173 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
177 r = dup2(fd, nfd) < 0 ? -errno : nfd;
178 close_nointr_nofail(fd);
185 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
187 union sockaddr_union sa;
190 assert(output < _EXEC_OUTPUT_MAX);
194 fd = socket(AF_UNIX, SOCK_STREAM, 0);
199 sa.un.sun_family = AF_UNIX;
200 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
202 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
204 close_nointr_nofail(fd);
208 if (shutdown(fd, SHUT_RD) < 0) {
209 close_nointr_nofail(fd);
221 context->syslog_identifier ? context->syslog_identifier : ident,
223 context->syslog_priority,
224 !!context->syslog_level_prefix,
225 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
226 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
227 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
230 r = dup2(fd, nfd) < 0 ? -errno : nfd;
231 close_nointr_nofail(fd);
237 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
243 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
247 r = dup2(fd, nfd) < 0 ? -errno : nfd;
248 close_nointr_nofail(fd);
255 static bool is_terminal_input(ExecInput i) {
257 i == EXEC_INPUT_TTY ||
258 i == EXEC_INPUT_TTY_FORCE ||
259 i == EXEC_INPUT_TTY_FAIL;
262 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
264 if (is_terminal_input(std_input) && !apply_tty_stdin)
265 return EXEC_INPUT_NULL;
267 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
268 return EXEC_INPUT_NULL;
273 static int fixup_output(ExecOutput std_output, int socket_fd) {
275 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
276 return EXEC_OUTPUT_INHERIT;
281 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
286 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
290 case EXEC_INPUT_NULL:
291 return open_null_as(O_RDONLY, STDIN_FILENO);
294 case EXEC_INPUT_TTY_FORCE:
295 case EXEC_INPUT_TTY_FAIL: {
298 if ((fd = acquire_terminal(
300 i == EXEC_INPUT_TTY_FAIL,
301 i == EXEC_INPUT_TTY_FORCE,
306 if (fd != STDIN_FILENO) {
307 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308 close_nointr_nofail(fd);
315 case EXEC_INPUT_SOCKET:
316 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
319 assert_not_reached("Unknown input type");
323 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
330 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
331 o = fixup_output(context->std_output, socket_fd);
333 /* This expects the input is already set up */
337 case EXEC_OUTPUT_INHERIT:
339 /* If input got downgraded, inherit the original value */
340 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
341 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
343 /* If the input is connected to anything that's not a /dev/null, inherit that... */
344 if (i != EXEC_INPUT_NULL)
345 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
347 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
349 return STDOUT_FILENO;
351 /* We need to open /dev/null here anew, to get the
352 * right access mode. So we fall through */
354 case EXEC_OUTPUT_NULL:
355 return open_null_as(O_WRONLY, STDOUT_FILENO);
357 case EXEC_OUTPUT_TTY:
358 if (is_terminal_input(i))
359 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
361 /* We don't reset the terminal if this is just about output */
362 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
364 case EXEC_OUTPUT_SYSLOG:
365 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
366 case EXEC_OUTPUT_KMSG:
367 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
368 case EXEC_OUTPUT_JOURNAL:
369 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
370 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
372 case EXEC_OUTPUT_SOCKET:
373 assert(socket_fd >= 0);
374 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
377 assert_not_reached("Unknown output type");
381 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
388 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
389 o = fixup_output(context->std_output, socket_fd);
390 e = fixup_output(context->std_error, socket_fd);
392 /* This expects the input and output are already set up */
394 /* Don't change the stderr file descriptor if we inherit all
395 * the way and are not on a tty */
396 if (e == EXEC_OUTPUT_INHERIT &&
397 o == EXEC_OUTPUT_INHERIT &&
398 i == EXEC_INPUT_NULL &&
399 !is_terminal_input(context->std_input) &&
401 return STDERR_FILENO;
403 /* Duplicate from stdout if possible */
404 if (e == o || e == EXEC_OUTPUT_INHERIT)
405 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
409 case EXEC_OUTPUT_NULL:
410 return open_null_as(O_WRONLY, STDERR_FILENO);
412 case EXEC_OUTPUT_TTY:
413 if (is_terminal_input(i))
414 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
416 /* We don't reset the terminal if this is just about output */
417 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
419 case EXEC_OUTPUT_SYSLOG:
420 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
421 case EXEC_OUTPUT_KMSG:
422 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
423 case EXEC_OUTPUT_JOURNAL:
424 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
425 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
427 case EXEC_OUTPUT_SOCKET:
428 assert(socket_fd >= 0);
429 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
432 assert_not_reached("Unknown error type");
436 static int chown_terminal(int fd, uid_t uid) {
441 /* This might fail. What matters are the results. */
442 (void) fchown(fd, uid, -1);
443 (void) fchmod(fd, TTY_MODE);
445 if (fstat(fd, &st) < 0)
448 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
454 static int setup_confirm_stdio(int *_saved_stdin,
455 int *_saved_stdout) {
456 int fd = -1, saved_stdin, saved_stdout = -1, r;
458 assert(_saved_stdin);
459 assert(_saved_stdout);
461 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
465 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
466 if (saved_stdout < 0) {
471 fd = acquire_terminal(
476 DEFAULT_CONFIRM_USEC);
482 r = chown_terminal(fd, getuid());
486 if (dup2(fd, STDIN_FILENO) < 0) {
491 if (dup2(fd, STDOUT_FILENO) < 0) {
497 close_nointr_nofail(fd);
499 *_saved_stdin = saved_stdin;
500 *_saved_stdout = saved_stdout;
505 if (saved_stdout >= 0)
506 close_nointr_nofail(saved_stdout);
508 if (saved_stdin >= 0)
509 close_nointr_nofail(saved_stdin);
512 close_nointr_nofail(fd);
517 static int write_confirm_message(const char *format, ...) {
523 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
527 va_start(ap, format);
528 vdprintf(fd, format, ap);
531 close_nointr_nofail(fd);
536 static int restore_confirm_stdio(int *saved_stdin,
542 assert(saved_stdout);
546 if (*saved_stdin >= 0)
547 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
550 if (*saved_stdout >= 0)
551 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
554 if (*saved_stdin >= 0)
555 close_nointr_nofail(*saved_stdin);
557 if (*saved_stdout >= 0)
558 close_nointr_nofail(*saved_stdout);
563 static int ask_for_confirmation(char *response, char **argv) {
564 int saved_stdout = -1, saved_stdin = -1, r;
567 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
571 line = exec_command_line(argv);
575 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
578 restore_confirm_stdio(&saved_stdin, &saved_stdout);
583 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
584 bool keep_groups = false;
589 /* Lookup and set GID and supplementary group list. Here too
590 * we avoid NSS lookups for gid=0. */
592 if (context->group || username) {
594 if (context->group) {
595 const char *g = context->group;
597 if ((r = get_group_creds(&g, &gid)) < 0)
601 /* First step, initialize groups from /etc/groups */
602 if (username && gid != 0) {
603 if (initgroups(username, gid) < 0)
609 /* Second step, set our gids */
610 if (setresgid(gid, gid, gid) < 0)
614 if (context->supplementary_groups) {
619 /* Final step, initialize any manually set supplementary groups */
620 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
622 if (!(gids = new(gid_t, ngroups_max)))
626 if ((k = getgroups(ngroups_max, gids)) < 0) {
633 STRV_FOREACH(i, context->supplementary_groups) {
636 if (k >= ngroups_max) {
642 r = get_group_creds(&g, gids+k);
651 if (setgroups(k, gids) < 0) {
662 static int enforce_user(const ExecContext *context, uid_t uid) {
666 /* Sets (but doesn't lookup) the uid and make sure we keep the
667 * capabilities while doing so. */
669 if (context->capabilities) {
671 static const cap_value_t bits[] = {
672 CAP_SETUID, /* Necessary so that we can run setresuid() below */
673 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
676 /* First step: If we need to keep capabilities but
677 * drop privileges we need to make sure we keep our
678 * caps, whiel we drop privileges. */
680 int sb = context->secure_bits|SECURE_KEEP_CAPS;
682 if (prctl(PR_GET_SECUREBITS) != sb)
683 if (prctl(PR_SET_SECUREBITS, sb) < 0)
687 /* Second step: set the capabilities. This will reduce
688 * the capabilities to the minimum we need. */
690 if (!(d = cap_dup(context->capabilities)))
693 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
694 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
700 if (cap_set_proc(d) < 0) {
709 /* Third step: actually set the uids */
710 if (setresuid(uid, uid, uid) < 0)
713 /* At this point we should have all necessary capabilities but
714 are otherwise a normal user. However, the caps might got
715 corrupted due to the setresuid() so we need clean them up
716 later. This is done outside of this call. */
723 static int null_conv(
725 const struct pam_message **msg,
726 struct pam_response **resp,
729 /* We don't support conversations */
734 static int setup_pam(
740 int fds[], unsigned n_fds) {
742 static const struct pam_conv conv = {
747 pam_handle_t *handle = NULL;
749 int pam_code = PAM_SUCCESS;
752 bool close_session = false;
753 pid_t pam_pid = 0, parent_pid;
759 /* We set up PAM in the parent process, then fork. The child
760 * will then stay around until killed via PR_GET_PDEATHSIG or
761 * systemd via the cgroup logic. It will then remove the PAM
762 * session again. The parent process will exec() the actual
763 * daemon. We do things this way to ensure that the main PID
764 * of the daemon is the one we initially fork()ed. */
766 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
772 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
775 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
778 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
781 close_session = true;
783 if ((!(e = pam_getenvlist(handle)))) {
784 pam_code = PAM_BUF_ERR;
788 /* Block SIGTERM, so that we know that it won't get lost in
790 if (sigemptyset(&ss) < 0 ||
791 sigaddset(&ss, SIGTERM) < 0 ||
792 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
795 parent_pid = getpid();
797 if ((pam_pid = fork()) < 0)
804 /* The child's job is to reset the PAM session on
807 /* This string must fit in 10 chars (i.e. the length
808 * of "/sbin/init"), to look pretty in /bin/ps */
809 rename_process("(sd-pam)");
811 /* Make sure we don't keep open the passed fds in this
812 child. We assume that otherwise only those fds are
813 open here that have been opened by PAM. */
814 close_many(fds, n_fds);
816 /* Drop privileges - we don't need any to pam_close_session
817 * and this will make PR_SET_PDEATHSIG work in most cases.
818 * If this fails, ignore the error - but expect sd-pam threads
819 * to fail to exit normally */
820 if (setresuid(uid, uid, uid) < 0)
821 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
823 /* Wait until our parent died. This will only work if
824 * the above setresuid() succeeds, otherwise the kernel
825 * will not allow unprivileged parents kill their privileged
826 * children this way. We rely on the control groups kill logic
827 * to do the rest for us. */
828 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
831 /* Check if our parent process might already have
833 if (getppid() == parent_pid) {
835 if (sigwait(&ss, &sig) < 0) {
842 assert(sig == SIGTERM);
847 /* If our parent died we'll end the session */
848 if (getppid() != parent_pid)
849 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
855 pam_end(handle, pam_code | PAM_DATA_SILENT);
859 /* If the child was forked off successfully it will do all the
860 * cleanups, so forget about the handle here. */
863 /* Unblock SIGTERM again in the parent */
864 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
867 /* We close the log explicitly here, since the PAM modules
868 * might have opened it, but we don't want this fd around. */
877 if (pam_code != PAM_SUCCESS)
878 err = -EPERM; /* PAM errors do not map to errno */
884 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
886 pam_end(handle, pam_code | PAM_DATA_SILENT);
894 kill(pam_pid, SIGTERM);
895 kill(pam_pid, SIGCONT);
902 static void rename_process_from_path(const char *path) {
903 char process_name[11];
907 /* This resulting string must fit in 10 chars (i.e. the length
908 * of "/sbin/init") to look pretty in /bin/ps */
910 p = path_get_file_name(path);
912 rename_process("(...)");
918 /* The end of the process name is usually more
919 * interesting, since the first bit might just be
925 process_name[0] = '(';
926 memcpy(process_name+1, p, l);
927 process_name[1+l] = ')';
928 process_name[1+l+1] = 0;
930 rename_process(process_name);
933 static int apply_seccomp(uint32_t *syscall_filter) {
934 static const struct sock_filter header[] = {
935 VALIDATE_ARCHITECTURE,
938 static const struct sock_filter footer[] = {
944 struct sock_filter *f;
945 struct sock_fprog prog;
947 assert(syscall_filter);
949 /* First: count the syscalls to check for */
950 for (i = 0, n = 0; i < syscall_max(); i++)
951 if (syscall_filter[i >> 4] & (1 << (i & 31)))
954 /* Second: build the filter program from a header the syscall
955 * matches and the footer */
956 f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
957 memcpy(f, header, sizeof(header));
959 for (i = 0, n = 0; i < syscall_max(); i++)
960 if (syscall_filter[i >> 4] & (1 << (i & 31))) {
961 struct sock_filter item[] = {
962 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
963 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
966 assert_cc(ELEMENTSOF(item) == 2);
968 f[ELEMENTSOF(header) + 2*n] = item[0];
969 f[ELEMENTSOF(header) + 2*n+1] = item[1];
974 memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
976 /* Third: install the filter */
978 prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
980 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
986 int exec_spawn(ExecCommand *command,
988 const ExecContext *context,
989 int fds[], unsigned n_fds,
991 bool apply_permissions,
993 bool apply_tty_stdin,
995 CGroupBonding *cgroup_bondings,
996 CGroupAttribute *cgroup_attributes,
997 const char *cgroup_suffix,
1006 char _cleanup_strv_free_ **files_env = NULL;
1011 assert(fds || n_fds <= 0);
1013 if (context->std_input == EXEC_INPUT_SOCKET ||
1014 context->std_output == EXEC_OUTPUT_SOCKET ||
1015 context->std_error == EXEC_OUTPUT_SOCKET) {
1027 r = exec_context_load_environment(context, &files_env);
1029 log_struct_unit(LOG_ERR,
1031 "MESSAGE=Failed to load environment files: %s", strerror(-r),
1038 argv = command->argv;
1040 line = exec_command_line(argv);
1044 log_struct_unit(LOG_DEBUG,
1046 "MESSAGE=About to execute %s", line,
1050 r = cgroup_bonding_realize_list(cgroup_bondings);
1054 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1063 const char *username = NULL, *home = NULL;
1064 uid_t uid = (uid_t) -1;
1065 gid_t gid = (gid_t) -1;
1066 char _cleanup_strv_free_ **our_env = NULL, **pam_env = NULL,
1067 **final_env = NULL, **final_argv = NULL;
1069 bool set_access = false;
1073 rename_process_from_path(command->path);
1075 /* We reset exactly these signals, since they are the
1076 * only ones we set to SIG_IGN in the main daemon. All
1077 * others we leave untouched because we set them to
1078 * SIG_DFL or a valid handler initially, both of which
1079 * will be demoted to SIG_DFL. */
1080 default_signals(SIGNALS_CRASH_HANDLER,
1081 SIGNALS_IGNORE, -1);
1083 if (context->ignore_sigpipe)
1084 ignore_signals(SIGPIPE, -1);
1086 assert_se(sigemptyset(&ss) == 0);
1087 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1089 r = EXIT_SIGNAL_MASK;
1094 if (idle_pipe[1] >= 0)
1095 close_nointr_nofail(idle_pipe[1]);
1096 if (idle_pipe[0] >= 0) {
1097 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1098 close_nointr_nofail(idle_pipe[0]);
1102 /* Close sockets very early to make sure we don't
1103 * block init reexecution because it cannot bind its
1106 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1107 socket_fd >= 0 ? 1 : n_fds);
1113 if (!context->same_pgrp)
1120 if (context->tcpwrap_name) {
1122 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1128 for (i = 0; i < (int) n_fds; i++) {
1129 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1137 exec_context_tty_reset(context);
1139 if (confirm_spawn) {
1142 err = ask_for_confirmation(&response, argv);
1143 if (err == -ETIMEDOUT)
1144 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1146 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1147 else if (response == 's') {
1148 write_confirm_message("Skipping execution.\n");
1152 } else if (response == 'n') {
1153 write_confirm_message("Failing execution.\n");
1159 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1160 * must sure to drop O_NONBLOCK */
1162 fd_nonblock(socket_fd, false);
1164 err = setup_input(context, socket_fd, apply_tty_stdin);
1170 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1176 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1182 if (cgroup_bondings) {
1183 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1190 if (context->oom_score_adjust_set) {
1193 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1196 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1198 r = EXIT_OOM_ADJUST;
1203 if (context->nice_set)
1204 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1210 if (context->cpu_sched_set) {
1211 struct sched_param param;
1214 param.sched_priority = context->cpu_sched_priority;
1216 if (sched_setscheduler(0, context->cpu_sched_policy |
1217 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1219 r = EXIT_SETSCHEDULER;
1224 if (context->cpuset)
1225 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1227 r = EXIT_CPUAFFINITY;
1231 if (context->ioprio_set)
1232 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1238 if (context->timer_slack_nsec != (nsec_t) -1)
1239 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1241 r = EXIT_TIMERSLACK;
1245 if (context->utmp_id)
1246 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1248 if (context->user) {
1249 username = context->user;
1250 err = get_user_creds(&username, &uid, &gid, &home, NULL);
1256 if (is_terminal_input(context->std_input)) {
1257 err = chown_terminal(STDIN_FILENO, uid);
1264 if (cgroup_bondings && context->control_group_modify) {
1265 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1267 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1277 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1278 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1285 if (apply_permissions) {
1286 err = enforce_groups(context, username, gid);
1293 umask(context->umask);
1296 if (apply_permissions && context->pam_name && username) {
1297 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1304 if (context->private_network) {
1305 if (unshare(CLONE_NEWNET) < 0) {
1314 if (strv_length(context->read_write_dirs) > 0 ||
1315 strv_length(context->read_only_dirs) > 0 ||
1316 strv_length(context->inaccessible_dirs) > 0 ||
1317 context->mount_flags != 0 ||
1318 context->private_tmp) {
1319 err = setup_namespace(context->read_write_dirs,
1320 context->read_only_dirs,
1321 context->inaccessible_dirs,
1322 context->private_tmp,
1323 context->mount_flags);
1331 if (context->root_directory)
1332 if (chroot(context->root_directory) < 0) {
1338 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1344 char _cleanup_free_ *d = NULL;
1346 if (asprintf(&d, "%s/%s",
1347 context->root_directory ? context->root_directory : "",
1348 context->working_directory ? context->working_directory : "") < 0) {
1361 /* We repeat the fd closing here, to make sure that
1362 * nothing is leaked from the PAM modules */
1363 err = close_all_fds(fds, n_fds);
1365 err = shift_fds(fds, n_fds);
1367 err = flags_fds(fds, n_fds, context->non_blocking);
1373 if (apply_permissions) {
1375 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1376 if (!context->rlimit[i])
1379 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1386 if (context->capability_bounding_set_drop) {
1387 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1389 r = EXIT_CAPABILITIES;
1394 if (context->user) {
1395 err = enforce_user(context, uid);
1402 /* PR_GET_SECUREBITS is not privileged, while
1403 * PR_SET_SECUREBITS is. So to suppress
1404 * potential EPERMs we'll try not to call
1405 * PR_SET_SECUREBITS unless necessary. */
1406 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1407 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1409 r = EXIT_SECUREBITS;
1413 if (context->capabilities)
1414 if (cap_set_proc(context->capabilities) < 0) {
1416 r = EXIT_CAPABILITIES;
1420 if (context->no_new_privileges)
1421 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1423 r = EXIT_NO_NEW_PRIVILEGES;
1427 if (context->syscall_filter) {
1428 err = apply_seccomp(context->syscall_filter);
1436 if (!(our_env = new0(char*, 7))) {
1443 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1444 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1451 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1458 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1459 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1465 if (is_terminal_input(context->std_input) ||
1466 context->std_output == EXEC_OUTPUT_TTY ||
1467 context->std_error == EXEC_OUTPUT_TTY)
1468 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1476 if (!(final_env = strv_env_merge(
1480 context->environment,
1489 if (!(final_argv = replace_env_argv(argv, final_env))) {
1495 final_env = strv_env_clean(final_env);
1497 execve(command->path, final_argv, final_env);
1504 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1505 "EXECUTABLE=%s", command->path,
1506 "MESSAGE=Failed at step %s spawning %s: %s",
1507 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1508 command->path, strerror(-err),
1517 log_struct_unit(LOG_DEBUG,
1519 "MESSAGE=Forked %s as %lu",
1520 command->path, (unsigned long) pid,
1523 /* We add the new process to the cgroup both in the child (so
1524 * that we can be sure that no user code is ever executed
1525 * outside of the cgroup) and in the parent (so that we can be
1526 * sure that when we kill the cgroup the process will be
1528 if (cgroup_bondings)
1529 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1531 exec_status_start(&command->exec_status, pid);
1537 void exec_context_init(ExecContext *c) {
1541 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1542 c->cpu_sched_policy = SCHED_OTHER;
1543 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1544 c->syslog_level_prefix = true;
1545 c->control_group_persistent = -1;
1546 c->ignore_sigpipe = true;
1547 c->timer_slack_nsec = (nsec_t) -1;
1550 void exec_context_done(ExecContext *c) {
1555 strv_free(c->environment);
1556 c->environment = NULL;
1558 strv_free(c->environment_files);
1559 c->environment_files = NULL;
1561 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1563 c->rlimit[l] = NULL;
1566 free(c->working_directory);
1567 c->working_directory = NULL;
1568 free(c->root_directory);
1569 c->root_directory = NULL;
1574 free(c->tcpwrap_name);
1575 c->tcpwrap_name = NULL;
1577 free(c->syslog_identifier);
1578 c->syslog_identifier = NULL;
1586 strv_free(c->supplementary_groups);
1587 c->supplementary_groups = NULL;
1592 if (c->capabilities) {
1593 cap_free(c->capabilities);
1594 c->capabilities = NULL;
1597 strv_free(c->read_only_dirs);
1598 c->read_only_dirs = NULL;
1600 strv_free(c->read_write_dirs);
1601 c->read_write_dirs = NULL;
1603 strv_free(c->inaccessible_dirs);
1604 c->inaccessible_dirs = NULL;
1607 CPU_FREE(c->cpuset);
1612 free(c->syscall_filter);
1613 c->syscall_filter = NULL;
1616 void exec_command_done(ExecCommand *c) {
1626 void exec_command_done_array(ExecCommand *c, unsigned n) {
1629 for (i = 0; i < n; i++)
1630 exec_command_done(c+i);
1633 void exec_command_free_list(ExecCommand *c) {
1637 LIST_REMOVE(ExecCommand, command, c, i);
1638 exec_command_done(i);
1643 void exec_command_free_array(ExecCommand **c, unsigned n) {
1646 for (i = 0; i < n; i++) {
1647 exec_command_free_list(c[i]);
1652 int exec_context_load_environment(const ExecContext *c, char ***l) {
1653 char **i, **r = NULL;
1658 STRV_FOREACH(i, c->environment_files) {
1661 bool ignore = false;
1673 if (!path_is_absolute(fn)) {
1682 /* Filename supports globbing, take all matching files */
1685 if (glob(fn, 0, NULL, &pglob) != 0) {
1691 return errno ? -errno : -EINVAL;
1693 count = pglob.gl_pathc;
1702 for (n = 0; n < count; n++) {
1703 k = load_env_file(pglob.gl_pathv[n], &p);
1718 m = strv_env_merge(2, r, p);
1738 static void strv_fprintf(FILE *f, char **l) {
1744 fprintf(f, " %s", *g);
1747 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1759 "%sWorkingDirectory: %s\n"
1760 "%sRootDirectory: %s\n"
1761 "%sNonBlocking: %s\n"
1762 "%sPrivateTmp: %s\n"
1763 "%sControlGroupModify: %s\n"
1764 "%sControlGroupPersistent: %s\n"
1765 "%sPrivateNetwork: %s\n"
1766 "%sIgnoreSIGPIPE: %s\n",
1768 prefix, c->working_directory ? c->working_directory : "/",
1769 prefix, c->root_directory ? c->root_directory : "/",
1770 prefix, yes_no(c->non_blocking),
1771 prefix, yes_no(c->private_tmp),
1772 prefix, yes_no(c->control_group_modify),
1773 prefix, yes_no(c->control_group_persistent),
1774 prefix, yes_no(c->private_network),
1775 prefix, yes_no(c->ignore_sigpipe));
1777 STRV_FOREACH(e, c->environment)
1778 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1780 STRV_FOREACH(e, c->environment_files)
1781 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1783 if (c->tcpwrap_name)
1785 "%sTCPWrapName: %s\n",
1786 prefix, c->tcpwrap_name);
1793 if (c->oom_score_adjust_set)
1795 "%sOOMScoreAdjust: %i\n",
1796 prefix, c->oom_score_adjust);
1798 for (i = 0; i < RLIM_NLIMITS; i++)
1800 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1802 if (c->ioprio_set) {
1806 r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
1810 "%sIOSchedulingClass: %s\n"
1811 "%sIOPriority: %i\n",
1812 prefix, strna(class_str),
1813 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1817 if (c->cpu_sched_set) {
1821 r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
1825 "%sCPUSchedulingPolicy: %s\n"
1826 "%sCPUSchedulingPriority: %i\n"
1827 "%sCPUSchedulingResetOnFork: %s\n",
1828 prefix, strna(policy_str),
1829 prefix, c->cpu_sched_priority,
1830 prefix, yes_no(c->cpu_sched_reset_on_fork));
1835 fprintf(f, "%sCPUAffinity:", prefix);
1836 for (i = 0; i < c->cpuset_ncpus; i++)
1837 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1838 fprintf(f, " %i", i);
1842 if (c->timer_slack_nsec != (nsec_t) -1)
1843 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1846 "%sStandardInput: %s\n"
1847 "%sStandardOutput: %s\n"
1848 "%sStandardError: %s\n",
1849 prefix, exec_input_to_string(c->std_input),
1850 prefix, exec_output_to_string(c->std_output),
1851 prefix, exec_output_to_string(c->std_error));
1857 "%sTTYVHangup: %s\n"
1858 "%sTTYVTDisallocate: %s\n",
1859 prefix, c->tty_path,
1860 prefix, yes_no(c->tty_reset),
1861 prefix, yes_no(c->tty_vhangup),
1862 prefix, yes_no(c->tty_vt_disallocate));
1864 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1865 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1866 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1867 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
1868 char *fac_str, *lvl_str;
1871 r = log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
1875 r = log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
1880 "%sSyslogFacility: %s\n"
1881 "%sSyslogLevel: %s\n",
1882 prefix, strna(fac_str),
1883 prefix, strna(lvl_str));
1888 if (c->capabilities) {
1890 if ((t = cap_to_text(c->capabilities, NULL))) {
1891 fprintf(f, "%sCapabilities: %s\n",
1898 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1900 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1901 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1902 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1903 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1904 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1905 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1907 if (c->capability_bounding_set_drop) {
1909 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1911 for (l = 0; l <= cap_last_cap(); l++)
1912 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1915 if ((t = cap_to_name(l))) {
1916 fprintf(f, " %s", t);
1925 fprintf(f, "%sUser: %s\n", prefix, c->user);
1927 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1929 if (strv_length(c->supplementary_groups) > 0) {
1930 fprintf(f, "%sSupplementaryGroups:", prefix);
1931 strv_fprintf(f, c->supplementary_groups);
1936 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1938 if (strv_length(c->read_write_dirs) > 0) {
1939 fprintf(f, "%sReadWriteDirs:", prefix);
1940 strv_fprintf(f, c->read_write_dirs);
1944 if (strv_length(c->read_only_dirs) > 0) {
1945 fprintf(f, "%sReadOnlyDirs:", prefix);
1946 strv_fprintf(f, c->read_only_dirs);
1950 if (strv_length(c->inaccessible_dirs) > 0) {
1951 fprintf(f, "%sInaccessibleDirs:", prefix);
1952 strv_fprintf(f, c->inaccessible_dirs);
1958 "%sUtmpIdentifier: %s\n",
1959 prefix, c->utmp_id);
1962 void exec_status_start(ExecStatus *s, pid_t pid) {
1967 dual_timestamp_get(&s->start_timestamp);
1970 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1973 if (s->pid && s->pid != pid)
1977 dual_timestamp_get(&s->exit_timestamp);
1983 if (context->utmp_id)
1984 utmp_put_dead_process(context->utmp_id, pid, code, status);
1986 exec_context_tty_reset(context);
1990 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1991 char buf[FORMAT_TIMESTAMP_MAX];
2004 prefix, (unsigned long) s->pid);
2006 if (s->start_timestamp.realtime > 0)
2008 "%sStart Timestamp: %s\n",
2009 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2011 if (s->exit_timestamp.realtime > 0)
2013 "%sExit Timestamp: %s\n"
2015 "%sExit Status: %i\n",
2016 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2017 prefix, sigchld_code_to_string(s->code),
2021 char *exec_command_line(char **argv) {
2029 STRV_FOREACH(a, argv)
2032 if (!(n = new(char, k)))
2036 STRV_FOREACH(a, argv) {
2043 if (strpbrk(*a, WHITESPACE)) {
2054 /* FIXME: this doesn't really handle arguments that have
2055 * spaces and ticks in them */
2060 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2062 const char *prefix2;
2071 p2 = strappend(prefix, "\t");
2072 prefix2 = p2 ? p2 : prefix;
2074 cmd = exec_command_line(c->argv);
2077 "%sCommand Line: %s\n",
2078 prefix, cmd ? cmd : strerror(ENOMEM));
2082 exec_status_dump(&c->exec_status, f, prefix2);
2087 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2093 LIST_FOREACH(command, c, c)
2094 exec_command_dump(c, f, prefix);
2097 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2104 /* It's kind of important, that we keep the order here */
2105 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2106 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2111 int exec_command_set(ExecCommand *c, const char *path, ...) {
2119 l = strv_new_ap(path, ap);
2125 if (!(p = strdup(path))) {
2139 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2140 [EXEC_INPUT_NULL] = "null",
2141 [EXEC_INPUT_TTY] = "tty",
2142 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2143 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2144 [EXEC_INPUT_SOCKET] = "socket"
2147 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2149 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2150 [EXEC_OUTPUT_INHERIT] = "inherit",
2151 [EXEC_OUTPUT_NULL] = "null",
2152 [EXEC_OUTPUT_TTY] = "tty",
2153 [EXEC_OUTPUT_SYSLOG] = "syslog",
2154 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2155 [EXEC_OUTPUT_KMSG] = "kmsg",
2156 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2157 [EXEC_OUTPUT_JOURNAL] = "journal",
2158 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2159 [EXEC_OUTPUT_SOCKET] = "socket"
2162 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);