1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
43 #include <security/pam_appl.h>
49 #include "capability.h"
53 #include "securebits.h"
55 #include "namespace.h"
57 #include "exit-status.h"
59 #include "utmp-wtmp.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
64 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
66 /* This assumes there is a 'tty' group */
69 static int shift_fds(int fds[], unsigned n_fds) {
70 int start, restart_from;
75 /* Modifies the fds array! (sorts it) */
85 for (i = start; i < (int) n_fds; i++) {
88 /* Already at right index? */
92 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
95 close_nointr_nofail(fds[i]);
98 /* Hmm, the fd we wanted isn't free? Then
99 * let's remember that and try again from here*/
100 if (nfd != i+3 && restart_from < 0)
104 if (restart_from < 0)
107 start = restart_from;
113 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
122 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
124 for (i = 0; i < n_fds; i++) {
126 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
129 /* We unconditionally drop FD_CLOEXEC from the fds,
130 * since after all we want to pass these fds to our
133 if ((r = fd_cloexec(fds[i], false)) < 0)
140 static const char *tty_path(const ExecContext *context) {
143 if (context->tty_path)
144 return context->tty_path;
146 return "/dev/console";
149 void exec_context_tty_reset(const ExecContext *context) {
152 if (context->tty_vhangup)
153 terminal_vhangup(tty_path(context));
155 if (context->tty_reset)
156 reset_terminal(tty_path(context));
158 if (context->tty_vt_disallocate && context->tty_path)
159 vt_disallocate(context->tty_path);
162 static int open_null_as(int flags, int nfd) {
167 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
171 r = dup2(fd, nfd) < 0 ? -errno : nfd;
172 close_nointr_nofail(fd);
179 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
181 union sockaddr_union sa;
184 assert(output < _EXEC_OUTPUT_MAX);
188 fd = socket(AF_UNIX, SOCK_STREAM, 0);
193 sa.un.sun_family = AF_UNIX;
194 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
196 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
198 close_nointr_nofail(fd);
202 if (shutdown(fd, SHUT_RD) < 0) {
203 close_nointr_nofail(fd);
214 context->syslog_identifier ? context->syslog_identifier : ident,
215 context->syslog_priority,
216 !!context->syslog_level_prefix,
217 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
218 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
219 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
222 r = dup2(fd, nfd) < 0 ? -errno : nfd;
223 close_nointr_nofail(fd);
229 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
235 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
239 r = dup2(fd, nfd) < 0 ? -errno : nfd;
240 close_nointr_nofail(fd);
247 static bool is_terminal_input(ExecInput i) {
249 i == EXEC_INPUT_TTY ||
250 i == EXEC_INPUT_TTY_FORCE ||
251 i == EXEC_INPUT_TTY_FAIL;
254 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
256 if (is_terminal_input(std_input) && !apply_tty_stdin)
257 return EXEC_INPUT_NULL;
259 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
260 return EXEC_INPUT_NULL;
265 static int fixup_output(ExecOutput std_output, int socket_fd) {
267 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
268 return EXEC_OUTPUT_INHERIT;
273 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
278 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
282 case EXEC_INPUT_NULL:
283 return open_null_as(O_RDONLY, STDIN_FILENO);
286 case EXEC_INPUT_TTY_FORCE:
287 case EXEC_INPUT_TTY_FAIL: {
290 if ((fd = acquire_terminal(
292 i == EXEC_INPUT_TTY_FAIL,
293 i == EXEC_INPUT_TTY_FORCE,
297 if (fd != STDIN_FILENO) {
298 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
299 close_nointr_nofail(fd);
306 case EXEC_INPUT_SOCKET:
307 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
310 assert_not_reached("Unknown input type");
314 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
321 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
322 o = fixup_output(context->std_output, socket_fd);
324 /* This expects the input is already set up */
328 case EXEC_OUTPUT_INHERIT:
330 /* If input got downgraded, inherit the original value */
331 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
332 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
334 /* If the input is connected to anything that's not a /dev/null, inherit that... */
335 if (i != EXEC_INPUT_NULL)
336 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
338 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
340 return STDOUT_FILENO;
342 /* We need to open /dev/null here anew, to get the
343 * right access mode. So we fall through */
345 case EXEC_OUTPUT_NULL:
346 return open_null_as(O_WRONLY, STDOUT_FILENO);
348 case EXEC_OUTPUT_TTY:
349 if (is_terminal_input(i))
350 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
352 /* We don't reset the terminal if this is just about output */
353 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
355 case EXEC_OUTPUT_SYSLOG:
356 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
357 case EXEC_OUTPUT_KMSG:
358 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
359 case EXEC_OUTPUT_JOURNAL:
360 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
361 return connect_logger_as(context, o, ident, STDOUT_FILENO);
363 case EXEC_OUTPUT_SOCKET:
364 assert(socket_fd >= 0);
365 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
368 assert_not_reached("Unknown output type");
372 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
379 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
380 o = fixup_output(context->std_output, socket_fd);
381 e = fixup_output(context->std_error, socket_fd);
383 /* This expects the input and output are already set up */
385 /* Don't change the stderr file descriptor if we inherit all
386 * the way and are not on a tty */
387 if (e == EXEC_OUTPUT_INHERIT &&
388 o == EXEC_OUTPUT_INHERIT &&
389 i == EXEC_INPUT_NULL &&
390 !is_terminal_input(context->std_input) &&
392 return STDERR_FILENO;
394 /* Duplicate from stdout if possible */
395 if (e == o || e == EXEC_OUTPUT_INHERIT)
396 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
400 case EXEC_OUTPUT_NULL:
401 return open_null_as(O_WRONLY, STDERR_FILENO);
403 case EXEC_OUTPUT_TTY:
404 if (is_terminal_input(i))
405 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
407 /* We don't reset the terminal if this is just about output */
408 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
410 case EXEC_OUTPUT_SYSLOG:
411 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
412 case EXEC_OUTPUT_KMSG:
413 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
414 case EXEC_OUTPUT_JOURNAL:
415 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
416 return connect_logger_as(context, e, ident, STDERR_FILENO);
418 case EXEC_OUTPUT_SOCKET:
419 assert(socket_fd >= 0);
420 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
423 assert_not_reached("Unknown error type");
427 static int chown_terminal(int fd, uid_t uid) {
432 /* This might fail. What matters are the results. */
433 (void) fchown(fd, uid, -1);
434 (void) fchmod(fd, TTY_MODE);
436 if (fstat(fd, &st) < 0)
439 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
445 static int setup_confirm_stdio(const ExecContext *context,
447 int *_saved_stdout) {
448 int fd = -1, saved_stdin, saved_stdout = -1, r;
451 assert(_saved_stdin);
452 assert(_saved_stdout);
454 /* This returns positive EXIT_xxx return values instead of
455 * negative errno style values! */
457 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
460 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
465 if ((fd = acquire_terminal(
467 context->std_input == EXEC_INPUT_TTY_FAIL,
468 context->std_input == EXEC_INPUT_TTY_FORCE,
474 if (chown_terminal(fd, getuid()) < 0) {
479 if (dup2(fd, STDIN_FILENO) < 0) {
484 if (dup2(fd, STDOUT_FILENO) < 0) {
490 close_nointr_nofail(fd);
492 *_saved_stdin = saved_stdin;
493 *_saved_stdout = saved_stdout;
498 if (saved_stdout >= 0)
499 close_nointr_nofail(saved_stdout);
501 if (saved_stdin >= 0)
502 close_nointr_nofail(saved_stdin);
505 close_nointr_nofail(fd);
510 static int restore_confirm_stdio(const ExecContext *context,
518 assert(*saved_stdin >= 0);
519 assert(saved_stdout);
520 assert(*saved_stdout >= 0);
522 /* This returns positive EXIT_xxx return values instead of
523 * negative errno style values! */
525 if (is_terminal_input(context->std_input)) {
527 /* The service wants terminal input. */
531 context->std_output == EXEC_OUTPUT_INHERIT ||
532 context->std_output == EXEC_OUTPUT_TTY;
535 /* If the service doesn't want a controlling terminal,
536 * then we need to get rid entirely of what we have
539 if (release_terminal() < 0)
542 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
545 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
548 *keep_stdout = *keep_stdin = false;
554 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
555 bool keep_groups = false;
560 /* Lookup and set GID and supplementary group list. Here too
561 * we avoid NSS lookups for gid=0. */
563 if (context->group || username) {
565 if (context->group) {
566 const char *g = context->group;
568 if ((r = get_group_creds(&g, &gid)) < 0)
572 /* First step, initialize groups from /etc/groups */
573 if (username && gid != 0) {
574 if (initgroups(username, gid) < 0)
580 /* Second step, set our gids */
581 if (setresgid(gid, gid, gid) < 0)
585 if (context->supplementary_groups) {
590 /* Final step, initialize any manually set supplementary groups */
591 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
593 if (!(gids = new(gid_t, ngroups_max)))
597 if ((k = getgroups(ngroups_max, gids)) < 0) {
604 STRV_FOREACH(i, context->supplementary_groups) {
607 if (k >= ngroups_max) {
613 r = get_group_creds(&g, gids+k);
622 if (setgroups(k, gids) < 0) {
633 static int enforce_user(const ExecContext *context, uid_t uid) {
637 /* Sets (but doesn't lookup) the uid and make sure we keep the
638 * capabilities while doing so. */
640 if (context->capabilities) {
642 static const cap_value_t bits[] = {
643 CAP_SETUID, /* Necessary so that we can run setresuid() below */
644 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
647 /* First step: If we need to keep capabilities but
648 * drop privileges we need to make sure we keep our
649 * caps, whiel we drop privileges. */
651 int sb = context->secure_bits|SECURE_KEEP_CAPS;
653 if (prctl(PR_GET_SECUREBITS) != sb)
654 if (prctl(PR_SET_SECUREBITS, sb) < 0)
658 /* Second step: set the capabilities. This will reduce
659 * the capabilities to the minimum we need. */
661 if (!(d = cap_dup(context->capabilities)))
664 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
665 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
671 if (cap_set_proc(d) < 0) {
680 /* Third step: actually set the uids */
681 if (setresuid(uid, uid, uid) < 0)
684 /* At this point we should have all necessary capabilities but
685 are otherwise a normal user. However, the caps might got
686 corrupted due to the setresuid() so we need clean them up
687 later. This is done outside of this call. */
694 static int null_conv(
696 const struct pam_message **msg,
697 struct pam_response **resp,
700 /* We don't support conversations */
705 static int setup_pam(
711 int fds[], unsigned n_fds) {
713 static const struct pam_conv conv = {
718 pam_handle_t *handle = NULL;
720 int pam_code = PAM_SUCCESS;
723 bool close_session = false;
724 pid_t pam_pid = 0, parent_pid;
730 /* We set up PAM in the parent process, then fork. The child
731 * will then stay around until killed via PR_GET_PDEATHSIG or
732 * systemd via the cgroup logic. It will then remove the PAM
733 * session again. The parent process will exec() the actual
734 * daemon. We do things this way to ensure that the main PID
735 * of the daemon is the one we initially fork()ed. */
737 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
743 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
746 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
749 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
752 close_session = true;
754 if ((!(e = pam_getenvlist(handle)))) {
755 pam_code = PAM_BUF_ERR;
759 /* Block SIGTERM, so that we know that it won't get lost in
761 if (sigemptyset(&ss) < 0 ||
762 sigaddset(&ss, SIGTERM) < 0 ||
763 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
766 parent_pid = getpid();
768 if ((pam_pid = fork()) < 0)
775 /* The child's job is to reset the PAM session on
778 /* This string must fit in 10 chars (i.e. the length
779 * of "/sbin/init"), to look pretty in /bin/ps */
780 rename_process("(sd-pam)");
782 /* Make sure we don't keep open the passed fds in this
783 child. We assume that otherwise only those fds are
784 open here that have been opened by PAM. */
785 close_many(fds, n_fds);
787 /* Drop privileges - we don't need any to pam_close_session
788 * and this will make PR_SET_PDEATHSIG work in most cases.
789 * If this fails, ignore the error - but expect sd-pam threads
790 * to fail to exit normally */
791 if (setresuid(uid, uid, uid) < 0)
792 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
794 /* Wait until our parent died. This will only work if
795 * the above setresuid() succeeds, otherwise the kernel
796 * will not allow unprivileged parents kill their privileged
797 * children this way. We rely on the control groups kill logic
798 * to do the rest for us. */
799 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
802 /* Check if our parent process might already have
804 if (getppid() == parent_pid) {
806 if (sigwait(&ss, &sig) < 0) {
813 assert(sig == SIGTERM);
818 /* If our parent died we'll end the session */
819 if (getppid() != parent_pid)
820 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
826 pam_end(handle, pam_code | PAM_DATA_SILENT);
830 /* If the child was forked off successfully it will do all the
831 * cleanups, so forget about the handle here. */
834 /* Unblock SIGTERM again in the parent */
835 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
838 /* We close the log explicitly here, since the PAM modules
839 * might have opened it, but we don't want this fd around. */
848 if (pam_code != PAM_SUCCESS)
849 err = -EPERM; /* PAM errors do not map to errno */
855 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
857 pam_end(handle, pam_code | PAM_DATA_SILENT);
865 kill(pam_pid, SIGTERM);
866 kill(pam_pid, SIGCONT);
873 static int do_capability_bounding_set_drop(uint64_t drop) {
875 cap_t old_cap = NULL, new_cap = NULL;
879 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
880 * in the effective set (yes, the kernel drops that when
881 * executing init!), so get it back temporarily so that we can
882 * call PR_CAPBSET_DROP. */
884 old_cap = cap_get_proc();
888 if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
894 static const cap_value_t v = CAP_SETPCAP;
896 new_cap = cap_dup(old_cap);
902 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
907 if (cap_set_proc(new_cap) < 0) {
913 for (i = 0; i <= cap_last_cap(); i++)
914 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
915 if (prctl(PR_CAPBSET_DROP, i) < 0) {
928 cap_set_proc(old_cap);
935 static void rename_process_from_path(const char *path) {
936 char process_name[11];
940 /* This resulting string must fit in 10 chars (i.e. the length
941 * of "/sbin/init") to look pretty in /bin/ps */
943 p = path_get_file_name(path);
945 rename_process("(...)");
951 /* The end of the process name is usually more
952 * interesting, since the first bit might just be
958 process_name[0] = '(';
959 memcpy(process_name+1, p, l);
960 process_name[1+l] = ')';
961 process_name[1+l+1] = 0;
963 rename_process(process_name);
966 int exec_spawn(ExecCommand *command,
968 const ExecContext *context,
969 int fds[], unsigned n_fds,
971 bool apply_permissions,
973 bool apply_tty_stdin,
975 CGroupBonding *cgroup_bondings,
976 CGroupAttribute *cgroup_attributes,
977 const char *cgroup_suffix,
985 char **files_env = NULL;
990 assert(fds || n_fds <= 0);
992 if (context->std_input == EXEC_INPUT_SOCKET ||
993 context->std_output == EXEC_OUTPUT_SOCKET ||
994 context->std_error == EXEC_OUTPUT_SOCKET) {
1006 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
1007 log_error("Failed to load environment files: %s", strerror(-r));
1012 argv = command->argv;
1014 if (!(line = exec_command_line(argv))) {
1019 log_debug("About to execute: %s", line);
1022 r = cgroup_bonding_realize_list(cgroup_bondings);
1026 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1028 if ((pid = fork()) < 0) {
1036 const char *username = NULL, *home = NULL;
1037 uid_t uid = (uid_t) -1;
1038 gid_t gid = (gid_t) -1;
1039 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1041 int saved_stdout = -1, saved_stdin = -1;
1042 bool keep_stdout = false, keep_stdin = false, set_access = false;
1046 rename_process_from_path(command->path);
1048 /* We reset exactly these signals, since they are the
1049 * only ones we set to SIG_IGN in the main daemon. All
1050 * others we leave untouched because we set them to
1051 * SIG_DFL or a valid handler initially, both of which
1052 * will be demoted to SIG_DFL. */
1053 default_signals(SIGNALS_CRASH_HANDLER,
1054 SIGNALS_IGNORE, -1);
1056 if (context->ignore_sigpipe)
1057 ignore_signals(SIGPIPE, -1);
1059 assert_se(sigemptyset(&ss) == 0);
1060 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1062 r = EXIT_SIGNAL_MASK;
1067 if (idle_pipe[1] >= 0)
1068 close_nointr_nofail(idle_pipe[1]);
1069 if (idle_pipe[0] >= 0) {
1070 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1071 close_nointr_nofail(idle_pipe[0]);
1075 /* Close sockets very early to make sure we don't
1076 * block init reexecution because it cannot bind its
1079 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1080 socket_fd >= 0 ? 1 : n_fds);
1086 if (!context->same_pgrp)
1093 if (context->tcpwrap_name) {
1095 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1101 for (i = 0; i < (int) n_fds; i++) {
1102 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1110 exec_context_tty_reset(context);
1112 /* We skip the confirmation step if we shall not apply the TTY */
1113 if (confirm_spawn &&
1114 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1117 /* Set up terminal for the question */
1118 if ((r = setup_confirm_stdio(context,
1119 &saved_stdin, &saved_stdout))) {
1124 /* Now ask the question. */
1125 if (!(line = exec_command_line(argv))) {
1131 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1134 if (r < 0 || response == 'n') {
1138 } else if (response == 's') {
1143 /* Release terminal for the question */
1144 if ((r = restore_confirm_stdio(context,
1145 &saved_stdin, &saved_stdout,
1146 &keep_stdin, &keep_stdout))) {
1152 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1153 * must sure to drop O_NONBLOCK */
1155 fd_nonblock(socket_fd, false);
1158 err = setup_input(context, socket_fd, apply_tty_stdin);
1166 err = setup_output(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1173 err = setup_error(context, socket_fd, path_get_file_name(command->path), apply_tty_stdin);
1179 if (cgroup_bondings) {
1180 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1187 if (context->oom_score_adjust_set) {
1190 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1193 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1194 /* Compatibility with Linux <= 2.6.35 */
1198 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1199 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1201 snprintf(t, sizeof(t), "%i", adj);
1204 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1205 && errno != EACCES) {
1207 r = EXIT_OOM_ADJUST;
1213 if (context->nice_set)
1214 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1220 if (context->cpu_sched_set) {
1221 struct sched_param param;
1224 param.sched_priority = context->cpu_sched_priority;
1226 if (sched_setscheduler(0, context->cpu_sched_policy |
1227 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1229 r = EXIT_SETSCHEDULER;
1234 if (context->cpuset)
1235 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1237 r = EXIT_CPUAFFINITY;
1241 if (context->ioprio_set)
1242 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1248 if (context->timer_slack_nsec_set)
1249 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1251 r = EXIT_TIMERSLACK;
1255 if (context->utmp_id)
1256 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1258 if (context->user) {
1259 username = context->user;
1260 err = get_user_creds(&username, &uid, &gid, &home);
1266 if (is_terminal_input(context->std_input)) {
1267 err = chown_terminal(STDIN_FILENO, uid);
1274 if (cgroup_bondings && context->control_group_modify) {
1275 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1277 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1287 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1288 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1295 if (apply_permissions) {
1296 err = enforce_groups(context, username, gid);
1303 umask(context->umask);
1306 if (context->pam_name && username) {
1307 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1314 if (context->private_network) {
1315 if (unshare(CLONE_NEWNET) < 0) {
1324 if (strv_length(context->read_write_dirs) > 0 ||
1325 strv_length(context->read_only_dirs) > 0 ||
1326 strv_length(context->inaccessible_dirs) > 0 ||
1327 context->mount_flags != MS_SHARED ||
1328 context->private_tmp) {
1329 err = setup_namespace(context->read_write_dirs,
1330 context->read_only_dirs,
1331 context->inaccessible_dirs,
1332 context->private_tmp,
1333 context->mount_flags);
1341 if (context->root_directory)
1342 if (chroot(context->root_directory) < 0) {
1348 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1357 if (asprintf(&d, "%s/%s",
1358 context->root_directory ? context->root_directory : "",
1359 context->working_directory ? context->working_directory : "") < 0) {
1375 /* We repeat the fd closing here, to make sure that
1376 * nothing is leaked from the PAM modules */
1377 err = close_all_fds(fds, n_fds);
1379 err = shift_fds(fds, n_fds);
1381 err = flags_fds(fds, n_fds, context->non_blocking);
1387 if (apply_permissions) {
1389 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1390 if (!context->rlimit[i])
1393 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1400 if (context->capability_bounding_set_drop) {
1401 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1403 r = EXIT_CAPABILITIES;
1408 if (context->user) {
1409 err = enforce_user(context, uid);
1416 /* PR_GET_SECUREBITS is not privileged, while
1417 * PR_SET_SECUREBITS is. So to suppress
1418 * potential EPERMs we'll try not to call
1419 * PR_SET_SECUREBITS unless necessary. */
1420 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1421 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1423 r = EXIT_SECUREBITS;
1427 if (context->capabilities)
1428 if (cap_set_proc(context->capabilities) < 0) {
1430 r = EXIT_CAPABILITIES;
1435 if (!(our_env = new0(char*, 7))) {
1442 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1443 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1450 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1457 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1458 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1464 if (is_terminal_input(context->std_input) ||
1465 context->std_output == EXEC_OUTPUT_TTY ||
1466 context->std_error == EXEC_OUTPUT_TTY)
1467 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1475 if (!(final_env = strv_env_merge(
1479 context->environment,
1488 if (!(final_argv = replace_env_argv(argv, final_env))) {
1494 final_env = strv_env_clean(final_env);
1496 execve(command->path, final_argv, final_env);
1503 log_warning("Failed at step %s spawning %s: %s",
1504 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1505 command->path, strerror(-err));
1509 strv_free(final_env);
1511 strv_free(files_env);
1512 strv_free(final_argv);
1514 if (saved_stdin >= 0)
1515 close_nointr_nofail(saved_stdin);
1517 if (saved_stdout >= 0)
1518 close_nointr_nofail(saved_stdout);
1523 strv_free(files_env);
1525 /* We add the new process to the cgroup both in the child (so
1526 * that we can be sure that no user code is ever executed
1527 * outside of the cgroup) and in the parent (so that we can be
1528 * sure that when we kill the cgroup the process will be
1530 if (cgroup_bondings)
1531 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1533 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1535 exec_status_start(&command->exec_status, pid);
1541 strv_free(files_env);
1546 void exec_context_init(ExecContext *c) {
1550 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1551 c->cpu_sched_policy = SCHED_OTHER;
1552 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1553 c->syslog_level_prefix = true;
1554 c->mount_flags = MS_SHARED;
1555 c->kill_signal = SIGTERM;
1556 c->send_sigkill = true;
1557 c->control_group_persistent = -1;
1558 c->ignore_sigpipe = true;
1561 void exec_context_done(ExecContext *c) {
1566 strv_free(c->environment);
1567 c->environment = NULL;
1569 strv_free(c->environment_files);
1570 c->environment_files = NULL;
1572 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1574 c->rlimit[l] = NULL;
1577 free(c->working_directory);
1578 c->working_directory = NULL;
1579 free(c->root_directory);
1580 c->root_directory = NULL;
1585 free(c->tcpwrap_name);
1586 c->tcpwrap_name = NULL;
1588 free(c->syslog_identifier);
1589 c->syslog_identifier = NULL;
1597 strv_free(c->supplementary_groups);
1598 c->supplementary_groups = NULL;
1603 if (c->capabilities) {
1604 cap_free(c->capabilities);
1605 c->capabilities = NULL;
1608 strv_free(c->read_only_dirs);
1609 c->read_only_dirs = NULL;
1611 strv_free(c->read_write_dirs);
1612 c->read_write_dirs = NULL;
1614 strv_free(c->inaccessible_dirs);
1615 c->inaccessible_dirs = NULL;
1618 CPU_FREE(c->cpuset);
1624 void exec_command_done(ExecCommand *c) {
1634 void exec_command_done_array(ExecCommand *c, unsigned n) {
1637 for (i = 0; i < n; i++)
1638 exec_command_done(c+i);
1641 void exec_command_free_list(ExecCommand *c) {
1645 LIST_REMOVE(ExecCommand, command, c, i);
1646 exec_command_done(i);
1651 void exec_command_free_array(ExecCommand **c, unsigned n) {
1654 for (i = 0; i < n; i++) {
1655 exec_command_free_list(c[i]);
1660 int exec_context_load_environment(const ExecContext *c, char ***l) {
1661 char **i, **r = NULL;
1666 STRV_FOREACH(i, c->environment_files) {
1669 bool ignore = false;
1679 if (!path_is_absolute(fn)) {
1688 if ((k = load_env_file(fn, &p)) < 0) {
1702 m = strv_env_merge(2, r, p);
1718 static void strv_fprintf(FILE *f, char **l) {
1724 fprintf(f, " %s", *g);
1727 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1739 "%sWorkingDirectory: %s\n"
1740 "%sRootDirectory: %s\n"
1741 "%sNonBlocking: %s\n"
1742 "%sPrivateTmp: %s\n"
1743 "%sControlGroupModify: %s\n"
1744 "%sControlGroupPersistent: %s\n"
1745 "%sPrivateNetwork: %s\n",
1747 prefix, c->working_directory ? c->working_directory : "/",
1748 prefix, c->root_directory ? c->root_directory : "/",
1749 prefix, yes_no(c->non_blocking),
1750 prefix, yes_no(c->private_tmp),
1751 prefix, yes_no(c->control_group_modify),
1752 prefix, yes_no(c->control_group_persistent),
1753 prefix, yes_no(c->private_network));
1755 STRV_FOREACH(e, c->environment)
1756 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1758 STRV_FOREACH(e, c->environment_files)
1759 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1761 if (c->tcpwrap_name)
1763 "%sTCPWrapName: %s\n",
1764 prefix, c->tcpwrap_name);
1771 if (c->oom_score_adjust_set)
1773 "%sOOMScoreAdjust: %i\n",
1774 prefix, c->oom_score_adjust);
1776 for (i = 0; i < RLIM_NLIMITS; i++)
1778 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1782 "%sIOSchedulingClass: %s\n"
1783 "%sIOPriority: %i\n",
1784 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1785 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1787 if (c->cpu_sched_set)
1789 "%sCPUSchedulingPolicy: %s\n"
1790 "%sCPUSchedulingPriority: %i\n"
1791 "%sCPUSchedulingResetOnFork: %s\n",
1792 prefix, sched_policy_to_string(c->cpu_sched_policy),
1793 prefix, c->cpu_sched_priority,
1794 prefix, yes_no(c->cpu_sched_reset_on_fork));
1797 fprintf(f, "%sCPUAffinity:", prefix);
1798 for (i = 0; i < c->cpuset_ncpus; i++)
1799 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1800 fprintf(f, " %i", i);
1804 if (c->timer_slack_nsec_set)
1805 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1808 "%sStandardInput: %s\n"
1809 "%sStandardOutput: %s\n"
1810 "%sStandardError: %s\n",
1811 prefix, exec_input_to_string(c->std_input),
1812 prefix, exec_output_to_string(c->std_output),
1813 prefix, exec_output_to_string(c->std_error));
1819 "%sTTYVHangup: %s\n"
1820 "%sTTYVTDisallocate: %s\n",
1821 prefix, c->tty_path,
1822 prefix, yes_no(c->tty_reset),
1823 prefix, yes_no(c->tty_vhangup),
1824 prefix, yes_no(c->tty_vt_disallocate));
1826 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1827 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1828 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1829 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1831 "%sSyslogFacility: %s\n"
1832 "%sSyslogLevel: %s\n",
1833 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1834 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1836 if (c->capabilities) {
1838 if ((t = cap_to_text(c->capabilities, NULL))) {
1839 fprintf(f, "%sCapabilities: %s\n",
1846 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1848 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1849 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1850 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1851 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1852 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1853 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1855 if (c->capability_bounding_set_drop) {
1857 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1859 for (l = 0; l <= cap_last_cap(); l++)
1860 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1863 if ((t = cap_to_name(l))) {
1864 fprintf(f, " %s", t);
1873 fprintf(f, "%sUser: %s\n", prefix, c->user);
1875 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1877 if (strv_length(c->supplementary_groups) > 0) {
1878 fprintf(f, "%sSupplementaryGroups:", prefix);
1879 strv_fprintf(f, c->supplementary_groups);
1884 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1886 if (strv_length(c->read_write_dirs) > 0) {
1887 fprintf(f, "%sReadWriteDirs:", prefix);
1888 strv_fprintf(f, c->read_write_dirs);
1892 if (strv_length(c->read_only_dirs) > 0) {
1893 fprintf(f, "%sReadOnlyDirs:", prefix);
1894 strv_fprintf(f, c->read_only_dirs);
1898 if (strv_length(c->inaccessible_dirs) > 0) {
1899 fprintf(f, "%sInaccessibleDirs:", prefix);
1900 strv_fprintf(f, c->inaccessible_dirs);
1906 "%sKillSignal: SIG%s\n"
1907 "%sSendSIGKILL: %s\n"
1908 "%sIgnoreSIGPIPE: %s\n",
1909 prefix, kill_mode_to_string(c->kill_mode),
1910 prefix, signal_to_string(c->kill_signal),
1911 prefix, yes_no(c->send_sigkill),
1912 prefix, yes_no(c->ignore_sigpipe));
1916 "%sUtmpIdentifier: %s\n",
1917 prefix, c->utmp_id);
1920 void exec_status_start(ExecStatus *s, pid_t pid) {
1925 dual_timestamp_get(&s->start_timestamp);
1928 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1931 if (s->pid && s->pid != pid)
1935 dual_timestamp_get(&s->exit_timestamp);
1941 if (context->utmp_id)
1942 utmp_put_dead_process(context->utmp_id, pid, code, status);
1944 exec_context_tty_reset(context);
1948 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1949 char buf[FORMAT_TIMESTAMP_MAX];
1962 prefix, (unsigned long) s->pid);
1964 if (s->start_timestamp.realtime > 0)
1966 "%sStart Timestamp: %s\n",
1967 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1969 if (s->exit_timestamp.realtime > 0)
1971 "%sExit Timestamp: %s\n"
1973 "%sExit Status: %i\n",
1974 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1975 prefix, sigchld_code_to_string(s->code),
1979 char *exec_command_line(char **argv) {
1987 STRV_FOREACH(a, argv)
1990 if (!(n = new(char, k)))
1994 STRV_FOREACH(a, argv) {
2001 if (strpbrk(*a, WHITESPACE)) {
2012 /* FIXME: this doesn't really handle arguments that have
2013 * spaces and ticks in them */
2018 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2020 const char *prefix2;
2029 p2 = strappend(prefix, "\t");
2030 prefix2 = p2 ? p2 : prefix;
2032 cmd = exec_command_line(c->argv);
2035 "%sCommand Line: %s\n",
2036 prefix, cmd ? cmd : strerror(ENOMEM));
2040 exec_status_dump(&c->exec_status, f, prefix2);
2045 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2051 LIST_FOREACH(command, c, c)
2052 exec_command_dump(c, f, prefix);
2055 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2062 /* It's kind of important, that we keep the order here */
2063 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2064 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2069 int exec_command_set(ExecCommand *c, const char *path, ...) {
2077 l = strv_new_ap(path, ap);
2083 if (!(p = strdup(path))) {
2097 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2098 [EXEC_INPUT_NULL] = "null",
2099 [EXEC_INPUT_TTY] = "tty",
2100 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2101 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2102 [EXEC_INPUT_SOCKET] = "socket"
2105 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2107 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2108 [EXEC_OUTPUT_INHERIT] = "inherit",
2109 [EXEC_OUTPUT_NULL] = "null",
2110 [EXEC_OUTPUT_TTY] = "tty",
2111 [EXEC_OUTPUT_SYSLOG] = "syslog",
2112 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2113 [EXEC_OUTPUT_KMSG] = "kmsg",
2114 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2115 [EXEC_OUTPUT_JOURNAL] = "journal",
2116 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2117 [EXEC_OUTPUT_SOCKET] = "socket"
2120 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2122 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2123 [KILL_CONTROL_GROUP] = "control-group",
2124 [KILL_PROCESS] = "process",
2125 [KILL_NONE] = "none"
2128 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2130 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2131 [KILL_MAIN] = "main",
2132 [KILL_CONTROL] = "control",
2136 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);