1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
43 #include <security/pam_appl.h>
49 #include "capability.h"
53 #include "securebits.h"
55 #include "namespace.h"
57 #include "exit-status.h"
59 #include "utmp-wtmp.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
64 /* This assumes there is a 'tty' group */
67 static int shift_fds(int fds[], unsigned n_fds) {
68 int start, restart_from;
73 /* Modifies the fds array! (sorts it) */
83 for (i = start; i < (int) n_fds; i++) {
86 /* Already at right index? */
90 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
93 close_nointr_nofail(fds[i]);
96 /* Hmm, the fd we wanted isn't free? Then
97 * let's remember that and try again from here*/
98 if (nfd != i+3 && restart_from < 0)
102 if (restart_from < 0)
105 start = restart_from;
111 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
120 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
122 for (i = 0; i < n_fds; i++) {
124 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
127 /* We unconditionally drop FD_CLOEXEC from the fds,
128 * since after all we want to pass these fds to our
131 if ((r = fd_cloexec(fds[i], false)) < 0)
138 static const char *tty_path(const ExecContext *context) {
141 if (context->tty_path)
142 return context->tty_path;
144 return "/dev/console";
147 void exec_context_tty_reset(const ExecContext *context) {
150 if (context->tty_vhangup)
151 terminal_vhangup(tty_path(context));
153 if (context->tty_reset)
154 reset_terminal(tty_path(context));
156 if (context->tty_vt_disallocate && context->tty_path)
157 vt_disallocate(context->tty_path);
160 static int open_null_as(int flags, int nfd) {
165 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
169 r = dup2(fd, nfd) < 0 ? -errno : nfd;
170 close_nointr_nofail(fd);
177 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
179 union sockaddr_union sa;
182 assert(output < _EXEC_OUTPUT_MAX);
186 fd = socket(AF_UNIX, SOCK_STREAM, 0);
191 sa.un.sun_family = AF_UNIX;
192 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
194 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
196 close_nointr_nofail(fd);
200 if (shutdown(fd, SHUT_RD) < 0) {
201 close_nointr_nofail(fd);
212 context->syslog_identifier ? context->syslog_identifier : ident,
213 context->syslog_priority,
214 !!context->syslog_level_prefix,
215 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
216 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
217 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
220 r = dup2(fd, nfd) < 0 ? -errno : nfd;
221 close_nointr_nofail(fd);
227 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
233 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
237 r = dup2(fd, nfd) < 0 ? -errno : nfd;
238 close_nointr_nofail(fd);
245 static bool is_terminal_input(ExecInput i) {
247 i == EXEC_INPUT_TTY ||
248 i == EXEC_INPUT_TTY_FORCE ||
249 i == EXEC_INPUT_TTY_FAIL;
252 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
254 if (is_terminal_input(std_input) && !apply_tty_stdin)
255 return EXEC_INPUT_NULL;
257 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
258 return EXEC_INPUT_NULL;
263 static int fixup_output(ExecOutput std_output, int socket_fd) {
265 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
266 return EXEC_OUTPUT_INHERIT;
271 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
276 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
280 case EXEC_INPUT_NULL:
281 return open_null_as(O_RDONLY, STDIN_FILENO);
284 case EXEC_INPUT_TTY_FORCE:
285 case EXEC_INPUT_TTY_FAIL: {
288 if ((fd = acquire_terminal(
290 i == EXEC_INPUT_TTY_FAIL,
291 i == EXEC_INPUT_TTY_FORCE,
295 if (fd != STDIN_FILENO) {
296 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
297 close_nointr_nofail(fd);
304 case EXEC_INPUT_SOCKET:
305 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
308 assert_not_reached("Unknown input type");
312 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
319 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
320 o = fixup_output(context->std_output, socket_fd);
322 /* This expects the input is already set up */
326 case EXEC_OUTPUT_INHERIT:
328 /* If input got downgraded, inherit the original value */
329 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
330 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
332 /* If the input is connected to anything that's not a /dev/null, inherit that... */
333 if (i != EXEC_INPUT_NULL)
334 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
336 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
338 return STDOUT_FILENO;
340 /* We need to open /dev/null here anew, to get the
341 * right access mode. So we fall through */
343 case EXEC_OUTPUT_NULL:
344 return open_null_as(O_WRONLY, STDOUT_FILENO);
346 case EXEC_OUTPUT_TTY:
347 if (is_terminal_input(i))
348 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
350 /* We don't reset the terminal if this is just about output */
351 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
353 case EXEC_OUTPUT_SYSLOG:
354 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
355 case EXEC_OUTPUT_KMSG:
356 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
357 case EXEC_OUTPUT_JOURNAL:
358 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
359 return connect_logger_as(context, o, ident, STDOUT_FILENO);
361 case EXEC_OUTPUT_SOCKET:
362 assert(socket_fd >= 0);
363 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
366 assert_not_reached("Unknown output type");
370 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
377 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
378 o = fixup_output(context->std_output, socket_fd);
379 e = fixup_output(context->std_error, socket_fd);
381 /* This expects the input and output are already set up */
383 /* Don't change the stderr file descriptor if we inherit all
384 * the way and are not on a tty */
385 if (e == EXEC_OUTPUT_INHERIT &&
386 o == EXEC_OUTPUT_INHERIT &&
387 i == EXEC_INPUT_NULL &&
388 !is_terminal_input(context->std_input) &&
390 return STDERR_FILENO;
392 /* Duplicate from stdout if possible */
393 if (e == o || e == EXEC_OUTPUT_INHERIT)
394 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
398 case EXEC_OUTPUT_NULL:
399 return open_null_as(O_WRONLY, STDERR_FILENO);
401 case EXEC_OUTPUT_TTY:
402 if (is_terminal_input(i))
403 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
405 /* We don't reset the terminal if this is just about output */
406 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
408 case EXEC_OUTPUT_SYSLOG:
409 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
410 case EXEC_OUTPUT_KMSG:
411 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
412 case EXEC_OUTPUT_JOURNAL:
413 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
414 return connect_logger_as(context, e, ident, STDERR_FILENO);
416 case EXEC_OUTPUT_SOCKET:
417 assert(socket_fd >= 0);
418 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
421 assert_not_reached("Unknown error type");
425 static int chown_terminal(int fd, uid_t uid) {
430 /* This might fail. What matters are the results. */
431 (void) fchown(fd, uid, -1);
432 (void) fchmod(fd, TTY_MODE);
434 if (fstat(fd, &st) < 0)
437 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
443 static int setup_confirm_stdio(const ExecContext *context,
445 int *_saved_stdout) {
446 int fd = -1, saved_stdin, saved_stdout = -1, r;
449 assert(_saved_stdin);
450 assert(_saved_stdout);
452 /* This returns positive EXIT_xxx return values instead of
453 * negative errno style values! */
455 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
458 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
463 if ((fd = acquire_terminal(
465 context->std_input == EXEC_INPUT_TTY_FAIL,
466 context->std_input == EXEC_INPUT_TTY_FORCE,
472 if (chown_terminal(fd, getuid()) < 0) {
477 if (dup2(fd, STDIN_FILENO) < 0) {
482 if (dup2(fd, STDOUT_FILENO) < 0) {
488 close_nointr_nofail(fd);
490 *_saved_stdin = saved_stdin;
491 *_saved_stdout = saved_stdout;
496 if (saved_stdout >= 0)
497 close_nointr_nofail(saved_stdout);
499 if (saved_stdin >= 0)
500 close_nointr_nofail(saved_stdin);
503 close_nointr_nofail(fd);
508 static int restore_confirm_stdio(const ExecContext *context,
516 assert(*saved_stdin >= 0);
517 assert(saved_stdout);
518 assert(*saved_stdout >= 0);
520 /* This returns positive EXIT_xxx return values instead of
521 * negative errno style values! */
523 if (is_terminal_input(context->std_input)) {
525 /* The service wants terminal input. */
529 context->std_output == EXEC_OUTPUT_INHERIT ||
530 context->std_output == EXEC_OUTPUT_TTY;
533 /* If the service doesn't want a controlling terminal,
534 * then we need to get rid entirely of what we have
537 if (release_terminal() < 0)
540 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
543 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
546 *keep_stdout = *keep_stdin = false;
552 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
553 bool keep_groups = false;
558 /* Lookup and set GID and supplementary group list. Here too
559 * we avoid NSS lookups for gid=0. */
561 if (context->group || username) {
563 if (context->group) {
564 const char *g = context->group;
566 if ((r = get_group_creds(&g, &gid)) < 0)
570 /* First step, initialize groups from /etc/groups */
571 if (username && gid != 0) {
572 if (initgroups(username, gid) < 0)
578 /* Second step, set our gids */
579 if (setresgid(gid, gid, gid) < 0)
583 if (context->supplementary_groups) {
588 /* Final step, initialize any manually set supplementary groups */
589 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
591 if (!(gids = new(gid_t, ngroups_max)))
595 if ((k = getgroups(ngroups_max, gids)) < 0) {
602 STRV_FOREACH(i, context->supplementary_groups) {
605 if (k >= ngroups_max) {
611 r = get_group_creds(&g, gids+k);
620 if (setgroups(k, gids) < 0) {
631 static int enforce_user(const ExecContext *context, uid_t uid) {
635 /* Sets (but doesn't lookup) the uid and make sure we keep the
636 * capabilities while doing so. */
638 if (context->capabilities) {
640 static const cap_value_t bits[] = {
641 CAP_SETUID, /* Necessary so that we can run setresuid() below */
642 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
645 /* First step: If we need to keep capabilities but
646 * drop privileges we need to make sure we keep our
647 * caps, whiel we drop privileges. */
649 int sb = context->secure_bits|SECURE_KEEP_CAPS;
651 if (prctl(PR_GET_SECUREBITS) != sb)
652 if (prctl(PR_SET_SECUREBITS, sb) < 0)
656 /* Second step: set the capabilities. This will reduce
657 * the capabilities to the minimum we need. */
659 if (!(d = cap_dup(context->capabilities)))
662 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
663 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
669 if (cap_set_proc(d) < 0) {
678 /* Third step: actually set the uids */
679 if (setresuid(uid, uid, uid) < 0)
682 /* At this point we should have all necessary capabilities but
683 are otherwise a normal user. However, the caps might got
684 corrupted due to the setresuid() so we need clean them up
685 later. This is done outside of this call. */
692 static int null_conv(
694 const struct pam_message **msg,
695 struct pam_response **resp,
698 /* We don't support conversations */
703 static int setup_pam(
709 int fds[], unsigned n_fds) {
711 static const struct pam_conv conv = {
716 pam_handle_t *handle = NULL;
718 int pam_code = PAM_SUCCESS;
721 bool close_session = false;
722 pid_t pam_pid = 0, parent_pid;
728 /* We set up PAM in the parent process, then fork. The child
729 * will then stay around until killed via PR_GET_PDEATHSIG or
730 * systemd via the cgroup logic. It will then remove the PAM
731 * session again. The parent process will exec() the actual
732 * daemon. We do things this way to ensure that the main PID
733 * of the daemon is the one we initially fork()ed. */
735 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
741 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
744 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
747 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
750 close_session = true;
752 if ((!(e = pam_getenvlist(handle)))) {
753 pam_code = PAM_BUF_ERR;
757 /* Block SIGTERM, so that we know that it won't get lost in
759 if (sigemptyset(&ss) < 0 ||
760 sigaddset(&ss, SIGTERM) < 0 ||
761 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
764 parent_pid = getpid();
766 if ((pam_pid = fork()) < 0)
773 /* The child's job is to reset the PAM session on
776 /* This string must fit in 10 chars (i.e. the length
777 * of "/sbin/init"), to look pretty in /bin/ps */
778 rename_process("(sd-pam)");
780 /* Make sure we don't keep open the passed fds in this
781 child. We assume that otherwise only those fds are
782 open here that have been opened by PAM. */
783 close_many(fds, n_fds);
785 /* Drop privileges - we don't need any to pam_close_session
786 * and this will make PR_SET_PDEATHSIG work in most cases.
787 * If this fails, ignore the error - but expect sd-pam threads
788 * to fail to exit normally */
789 if (setresuid(uid, uid, uid) < 0)
790 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
792 /* Wait until our parent died. This will only work if
793 * the above setresuid() succeeds, otherwise the kernel
794 * will not allow unprivileged parents kill their privileged
795 * children this way. We rely on the control groups kill logic
796 * to do the rest for us. */
797 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
800 /* Check if our parent process might already have
802 if (getppid() == parent_pid) {
804 if (sigwait(&ss, &sig) < 0) {
811 assert(sig == SIGTERM);
816 /* If our parent died we'll end the session */
817 if (getppid() != parent_pid)
818 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
824 pam_end(handle, pam_code | PAM_DATA_SILENT);
828 /* If the child was forked off successfully it will do all the
829 * cleanups, so forget about the handle here. */
832 /* Unblock SIGTERM again in the parent */
833 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
836 /* We close the log explicitly here, since the PAM modules
837 * might have opened it, but we don't want this fd around. */
846 if (pam_code != PAM_SUCCESS)
847 err = -EPERM; /* PAM errors do not map to errno */
853 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
855 pam_end(handle, pam_code | PAM_DATA_SILENT);
863 kill(pam_pid, SIGTERM);
864 kill(pam_pid, SIGCONT);
871 static int do_capability_bounding_set_drop(uint64_t drop) {
873 cap_t old_cap = NULL, new_cap = NULL;
877 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
878 * in the effective set (yes, the kernel drops that when
879 * executing init!), so get it back temporarily so that we can
880 * call PR_CAPBSET_DROP. */
882 old_cap = cap_get_proc();
886 if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
892 static const cap_value_t v = CAP_SETPCAP;
894 new_cap = cap_dup(old_cap);
900 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
905 if (cap_set_proc(new_cap) < 0) {
911 for (i = 0; i <= cap_last_cap(); i++)
912 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
913 if (prctl(PR_CAPBSET_DROP, i) < 0) {
926 cap_set_proc(old_cap);
933 static void rename_process_from_path(const char *path) {
934 char process_name[11];
938 /* This resulting string must fit in 10 chars (i.e. the length
939 * of "/sbin/init") to look pretty in /bin/ps */
941 p = path_get_file_name(path);
943 rename_process("(...)");
949 /* The end of the process name is usually more
950 * interesting, since the first bit might just be
956 process_name[0] = '(';
957 memcpy(process_name+1, p, l);
958 process_name[1+l] = ')';
959 process_name[1+l+1] = 0;
961 rename_process(process_name);
964 int exec_spawn(ExecCommand *command,
966 const ExecContext *context,
967 int fds[], unsigned n_fds,
969 bool apply_permissions,
971 bool apply_tty_stdin,
973 CGroupBonding *cgroup_bondings,
974 CGroupAttribute *cgroup_attributes,
975 const char *cgroup_suffix,
983 char **files_env = NULL;
988 assert(fds || n_fds <= 0);
990 if (context->std_input == EXEC_INPUT_SOCKET ||
991 context->std_output == EXEC_OUTPUT_SOCKET ||
992 context->std_error == EXEC_OUTPUT_SOCKET) {
1004 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1005 log_error("Failed to load environment files: %s", strerror(-r));
1010 argv = command->argv;
1012 if (!(line = exec_command_line(argv))) {
1017 log_debug("About to execute: %s", line);
1020 r = cgroup_bonding_realize_list(cgroup_bondings);
1024 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1026 if ((pid = fork()) < 0) {
1034 const char *username = NULL, *home = NULL;
1035 uid_t uid = (uid_t) -1;
1036 gid_t gid = (gid_t) -1;
1037 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1039 int saved_stdout = -1, saved_stdin = -1;
1040 bool keep_stdout = false, keep_stdin = false, set_access = false;
1044 rename_process_from_path(command->path);
1046 /* We reset exactly these signals, since they are the
1047 * only ones we set to SIG_IGN in the main daemon. All
1048 * others we leave untouched because we set them to
1049 * SIG_DFL or a valid handler initially, both of which
1050 * will be demoted to SIG_DFL. */
1051 default_signals(SIGNALS_CRASH_HANDLER,
1052 SIGNALS_IGNORE, -1);
1054 if (context->ignore_sigpipe)
1055 ignore_signals(SIGPIPE, -1);
1057 assert_se(sigemptyset(&ss) == 0);
1058 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1060 r = EXIT_SIGNAL_MASK;
1065 if (idle_pipe[1] >= 0)
1066 close_nointr_nofail(idle_pipe[1]);
1067 if (idle_pipe[0] >= 0) {
1068 fd_wait_for_event(idle_pipe[0], POLLHUP, DEFAULT_TIMEOUT_USEC);
1069 close_nointr_nofail(idle_pipe[0]);
1073 /* Close sockets very early to make sure we don't
1074 * block init reexecution because it cannot bind its
1077 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1078 socket_fd >= 0 ? 1 : n_fds);
1084 if (!context->same_pgrp)
1091 if (context->tcpwrap_name) {
1093 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1099 for (i = 0; i < (int) n_fds; i++) {
1100 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1108 exec_context_tty_reset(context);
1110 /* We skip the confirmation step if we shall not apply the TTY */
1111 if (confirm_spawn &&
1112 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1115 /* Set up terminal for the question */
1116 if ((r = setup_confirm_stdio(context,
1117 &saved_stdin, &saved_stdout))) {
1122 /* Now ask the question. */
1123 if (!(line = exec_command_line(argv))) {
1129 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1132 if (r < 0 || response == 'n') {
1136 } else if (response == 's') {
1141 /* Release terminal for the question */
1142 if ((r = restore_confirm_stdio(context,
1143 &saved_stdin, &saved_stdout,
1144 &keep_stdin, &keep_stdout))) {
1150 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1151 * must sure to drop O_NONBLOCK */
1153 fd_nonblock(socket_fd, false);
1156 err = setup_input(context, socket_fd, apply_tty_stdin);
1164 err = setup_output(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1171 err = setup_error(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1177 if (cgroup_bondings) {
1178 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1185 if (context->oom_score_adjust_set) {
1188 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1191 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1192 /* Compatibility with Linux <= 2.6.35 */
1196 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1197 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1199 snprintf(t, sizeof(t), "%i", adj);
1202 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1203 && errno != EACCES) {
1205 r = EXIT_OOM_ADJUST;
1211 if (context->nice_set)
1212 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1218 if (context->cpu_sched_set) {
1219 struct sched_param param;
1222 param.sched_priority = context->cpu_sched_priority;
1224 if (sched_setscheduler(0, context->cpu_sched_policy |
1225 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1227 r = EXIT_SETSCHEDULER;
1232 if (context->cpuset)
1233 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1235 r = EXIT_CPUAFFINITY;
1239 if (context->ioprio_set)
1240 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1246 if (context->timer_slack_nsec_set)
1247 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1249 r = EXIT_TIMERSLACK;
1253 if (context->utmp_id)
1254 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1256 if (context->user) {
1257 username = context->user;
1258 err = get_user_creds(&username, &uid, &gid, &home);
1264 if (is_terminal_input(context->std_input)) {
1265 err = chown_terminal(STDIN_FILENO, uid);
1272 if (cgroup_bondings && context->control_group_modify) {
1273 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1275 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1285 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1286 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1293 if (apply_permissions) {
1294 err = enforce_groups(context, username, gid);
1301 umask(context->umask);
1304 if (context->pam_name && username) {
1305 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1312 if (context->private_network) {
1313 if (unshare(CLONE_NEWNET) < 0) {
1322 if (strv_length(context->read_write_dirs) > 0 ||
1323 strv_length(context->read_only_dirs) > 0 ||
1324 strv_length(context->inaccessible_dirs) > 0 ||
1325 context->mount_flags != MS_SHARED ||
1326 context->private_tmp) {
1327 err = setup_namespace(context->read_write_dirs,
1328 context->read_only_dirs,
1329 context->inaccessible_dirs,
1330 context->private_tmp,
1331 context->mount_flags);
1339 if (context->root_directory)
1340 if (chroot(context->root_directory) < 0) {
1346 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1355 if (asprintf(&d, "%s/%s",
1356 context->root_directory ? context->root_directory : "",
1357 context->working_directory ? context->working_directory : "") < 0) {
1373 /* We repeat the fd closing here, to make sure that
1374 * nothing is leaked from the PAM modules */
1375 err = close_all_fds(fds, n_fds);
1377 err = shift_fds(fds, n_fds);
1379 err = flags_fds(fds, n_fds, context->non_blocking);
1385 if (apply_permissions) {
1387 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1388 if (!context->rlimit[i])
1391 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1398 if (context->capability_bounding_set_drop) {
1399 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1401 r = EXIT_CAPABILITIES;
1406 if (context->user) {
1407 err = enforce_user(context, uid);
1414 /* PR_GET_SECUREBITS is not privileged, while
1415 * PR_SET_SECUREBITS is. So to suppress
1416 * potential EPERMs we'll try not to call
1417 * PR_SET_SECUREBITS unless necessary. */
1418 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1419 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1421 r = EXIT_SECUREBITS;
1425 if (context->capabilities)
1426 if (cap_set_proc(context->capabilities) < 0) {
1428 r = EXIT_CAPABILITIES;
1433 if (!(our_env = new0(char*, 7))) {
1440 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1441 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1448 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1455 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1456 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1462 if (is_terminal_input(context->std_input) ||
1463 context->std_output == EXEC_OUTPUT_TTY ||
1464 context->std_error == EXEC_OUTPUT_TTY)
1465 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1473 if (!(final_env = strv_env_merge(
1477 context->environment,
1486 if (!(final_argv = replace_env_argv(argv, final_env))) {
1492 final_env = strv_env_clean(final_env);
1494 execve(command->path, final_argv, final_env);
1501 log_warning("Failed at step %s spawning %s: %s",
1502 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1503 command->path, strerror(-err));
1507 strv_free(final_env);
1509 strv_free(files_env);
1510 strv_free(final_argv);
1512 if (saved_stdin >= 0)
1513 close_nointr_nofail(saved_stdin);
1515 if (saved_stdout >= 0)
1516 close_nointr_nofail(saved_stdout);
1521 strv_free(files_env);
1523 /* We add the new process to the cgroup both in the child (so
1524 * that we can be sure that no user code is ever executed
1525 * outside of the cgroup) and in the parent (so that we can be
1526 * sure that when we kill the cgroup the process will be
1528 if (cgroup_bondings)
1529 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1531 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1533 exec_status_start(&command->exec_status, pid);
1539 strv_free(files_env);
1544 void exec_context_init(ExecContext *c) {
1548 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1549 c->cpu_sched_policy = SCHED_OTHER;
1550 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1551 c->syslog_level_prefix = true;
1552 c->mount_flags = MS_SHARED;
1553 c->kill_signal = SIGTERM;
1554 c->send_sigkill = true;
1555 c->control_group_persistent = -1;
1556 c->ignore_sigpipe = true;
1559 void exec_context_done(ExecContext *c) {
1564 strv_free(c->environment);
1565 c->environment = NULL;
1567 strv_free(c->environment_files);
1568 c->environment_files = NULL;
1570 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1572 c->rlimit[l] = NULL;
1575 free(c->working_directory);
1576 c->working_directory = NULL;
1577 free(c->root_directory);
1578 c->root_directory = NULL;
1583 free(c->tcpwrap_name);
1584 c->tcpwrap_name = NULL;
1586 free(c->syslog_identifier);
1587 c->syslog_identifier = NULL;
1595 strv_free(c->supplementary_groups);
1596 c->supplementary_groups = NULL;
1601 if (c->capabilities) {
1602 cap_free(c->capabilities);
1603 c->capabilities = NULL;
1606 strv_free(c->read_only_dirs);
1607 c->read_only_dirs = NULL;
1609 strv_free(c->read_write_dirs);
1610 c->read_write_dirs = NULL;
1612 strv_free(c->inaccessible_dirs);
1613 c->inaccessible_dirs = NULL;
1616 CPU_FREE(c->cpuset);
1622 void exec_command_done(ExecCommand *c) {
1632 void exec_command_done_array(ExecCommand *c, unsigned n) {
1635 for (i = 0; i < n; i++)
1636 exec_command_done(c+i);
1639 void exec_command_free_list(ExecCommand *c) {
1643 LIST_REMOVE(ExecCommand, command, c, i);
1644 exec_command_done(i);
1649 void exec_command_free_array(ExecCommand **c, unsigned n) {
1652 for (i = 0; i < n; i++) {
1653 exec_command_free_list(c[i]);
1658 int exec_context_load_environment(const ExecContext *c, char ***l) {
1659 char **i, **r = NULL;
1664 STRV_FOREACH(i, c->environment_files) {
1667 bool ignore = false;
1677 if (!path_is_absolute(fn)) {
1686 if ((k = load_env_file(fn, &p)) < 0) {
1700 m = strv_env_merge(2, r, p);
1716 static void strv_fprintf(FILE *f, char **l) {
1722 fprintf(f, " %s", *g);
1725 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1737 "%sWorkingDirectory: %s\n"
1738 "%sRootDirectory: %s\n"
1739 "%sNonBlocking: %s\n"
1740 "%sPrivateTmp: %s\n"
1741 "%sControlGroupModify: %s\n"
1742 "%sControlGroupPersistent: %s\n"
1743 "%sPrivateNetwork: %s\n",
1745 prefix, c->working_directory ? c->working_directory : "/",
1746 prefix, c->root_directory ? c->root_directory : "/",
1747 prefix, yes_no(c->non_blocking),
1748 prefix, yes_no(c->private_tmp),
1749 prefix, yes_no(c->control_group_modify),
1750 prefix, yes_no(c->control_group_persistent),
1751 prefix, yes_no(c->private_network));
1753 STRV_FOREACH(e, c->environment)
1754 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1756 STRV_FOREACH(e, c->environment_files)
1757 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1759 if (c->tcpwrap_name)
1761 "%sTCPWrapName: %s\n",
1762 prefix, c->tcpwrap_name);
1769 if (c->oom_score_adjust_set)
1771 "%sOOMScoreAdjust: %i\n",
1772 prefix, c->oom_score_adjust);
1774 for (i = 0; i < RLIM_NLIMITS; i++)
1776 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1780 "%sIOSchedulingClass: %s\n"
1781 "%sIOPriority: %i\n",
1782 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1783 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1785 if (c->cpu_sched_set)
1787 "%sCPUSchedulingPolicy: %s\n"
1788 "%sCPUSchedulingPriority: %i\n"
1789 "%sCPUSchedulingResetOnFork: %s\n",
1790 prefix, sched_policy_to_string(c->cpu_sched_policy),
1791 prefix, c->cpu_sched_priority,
1792 prefix, yes_no(c->cpu_sched_reset_on_fork));
1795 fprintf(f, "%sCPUAffinity:", prefix);
1796 for (i = 0; i < c->cpuset_ncpus; i++)
1797 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1798 fprintf(f, " %i", i);
1802 if (c->timer_slack_nsec_set)
1803 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1806 "%sStandardInput: %s\n"
1807 "%sStandardOutput: %s\n"
1808 "%sStandardError: %s\n",
1809 prefix, exec_input_to_string(c->std_input),
1810 prefix, exec_output_to_string(c->std_output),
1811 prefix, exec_output_to_string(c->std_error));
1817 "%sTTYVHangup: %s\n"
1818 "%sTTYVTDisallocate: %s\n",
1819 prefix, c->tty_path,
1820 prefix, yes_no(c->tty_reset),
1821 prefix, yes_no(c->tty_vhangup),
1822 prefix, yes_no(c->tty_vt_disallocate));
1824 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1825 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1826 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1827 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1829 "%sSyslogFacility: %s\n"
1830 "%sSyslogLevel: %s\n",
1831 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1832 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1834 if (c->capabilities) {
1836 if ((t = cap_to_text(c->capabilities, NULL))) {
1837 fprintf(f, "%sCapabilities: %s\n",
1844 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1846 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1847 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1848 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1849 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1850 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1851 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1853 if (c->capability_bounding_set_drop) {
1855 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1857 for (l = 0; l <= cap_last_cap(); l++)
1858 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1861 if ((t = cap_to_name(l))) {
1862 fprintf(f, " %s", t);
1871 fprintf(f, "%sUser: %s\n", prefix, c->user);
1873 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1875 if (strv_length(c->supplementary_groups) > 0) {
1876 fprintf(f, "%sSupplementaryGroups:", prefix);
1877 strv_fprintf(f, c->supplementary_groups);
1882 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1884 if (strv_length(c->read_write_dirs) > 0) {
1885 fprintf(f, "%sReadWriteDirs:", prefix);
1886 strv_fprintf(f, c->read_write_dirs);
1890 if (strv_length(c->read_only_dirs) > 0) {
1891 fprintf(f, "%sReadOnlyDirs:", prefix);
1892 strv_fprintf(f, c->read_only_dirs);
1896 if (strv_length(c->inaccessible_dirs) > 0) {
1897 fprintf(f, "%sInaccessibleDirs:", prefix);
1898 strv_fprintf(f, c->inaccessible_dirs);
1904 "%sKillSignal: SIG%s\n"
1905 "%sSendSIGKILL: %s\n"
1906 "%sIgnoreSIGPIPE: %s\n",
1907 prefix, kill_mode_to_string(c->kill_mode),
1908 prefix, signal_to_string(c->kill_signal),
1909 prefix, yes_no(c->send_sigkill),
1910 prefix, yes_no(c->ignore_sigpipe));
1914 "%sUtmpIdentifier: %s\n",
1915 prefix, c->utmp_id);
1918 void exec_status_start(ExecStatus *s, pid_t pid) {
1923 dual_timestamp_get(&s->start_timestamp);
1926 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1929 if (s->pid && s->pid != pid)
1933 dual_timestamp_get(&s->exit_timestamp);
1939 if (context->utmp_id)
1940 utmp_put_dead_process(context->utmp_id, pid, code, status);
1942 exec_context_tty_reset(context);
1946 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1947 char buf[FORMAT_TIMESTAMP_MAX];
1960 prefix, (unsigned long) s->pid);
1962 if (s->start_timestamp.realtime > 0)
1964 "%sStart Timestamp: %s\n",
1965 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1967 if (s->exit_timestamp.realtime > 0)
1969 "%sExit Timestamp: %s\n"
1971 "%sExit Status: %i\n",
1972 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1973 prefix, sigchld_code_to_string(s->code),
1977 char *exec_command_line(char **argv) {
1985 STRV_FOREACH(a, argv)
1988 if (!(n = new(char, k)))
1992 STRV_FOREACH(a, argv) {
1999 if (strpbrk(*a, WHITESPACE)) {
2010 /* FIXME: this doesn't really handle arguments that have
2011 * spaces and ticks in them */
2016 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2018 const char *prefix2;
2027 p2 = strappend(prefix, "\t");
2028 prefix2 = p2 ? p2 : prefix;
2030 cmd = exec_command_line(c->argv);
2033 "%sCommand Line: %s\n",
2034 prefix, cmd ? cmd : strerror(ENOMEM));
2038 exec_status_dump(&c->exec_status, f, prefix2);
2043 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2049 LIST_FOREACH(command, c, c)
2050 exec_command_dump(c, f, prefix);
2053 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2060 /* It's kind of important, that we keep the order here */
2061 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2062 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2067 int exec_command_set(ExecCommand *c, const char *path, ...) {
2075 l = strv_new_ap(path, ap);
2081 if (!(p = strdup(path))) {
2095 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2096 [EXEC_INPUT_NULL] = "null",
2097 [EXEC_INPUT_TTY] = "tty",
2098 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2099 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2100 [EXEC_INPUT_SOCKET] = "socket"
2103 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2105 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2106 [EXEC_OUTPUT_INHERIT] = "inherit",
2107 [EXEC_OUTPUT_NULL] = "null",
2108 [EXEC_OUTPUT_TTY] = "tty",
2109 [EXEC_OUTPUT_SYSLOG] = "syslog",
2110 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2111 [EXEC_OUTPUT_KMSG] = "kmsg",
2112 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2113 [EXEC_OUTPUT_JOURNAL] = "journal",
2114 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2115 [EXEC_OUTPUT_SOCKET] = "socket"
2118 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2120 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2121 [KILL_CONTROL_GROUP] = "control-group",
2122 [KILL_PROCESS] = "process",
2123 [KILL_NONE] = "none"
2126 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2128 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2129 [KILL_MAIN] = "main",
2130 [KILL_CONTROL] = "control",
2134 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);