1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
43 #include <security/pam_appl.h>
49 #include "capability.h"
53 #include "securebits.h"
55 #include "namespace.h"
57 #include "exit-status.h"
59 #include "utmp-wtmp.h"
61 #include "loopback-setup.h"
63 /* This assumes there is a 'tty' group */
66 static int shift_fds(int fds[], unsigned n_fds) {
67 int start, restart_from;
72 /* Modifies the fds array! (sorts it) */
82 for (i = start; i < (int) n_fds; i++) {
85 /* Already at right index? */
89 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
92 close_nointr_nofail(fds[i]);
95 /* Hmm, the fd we wanted isn't free? Then
96 * let's remember that and try again from here*/
97 if (nfd != i+3 && restart_from < 0)
101 if (restart_from < 0)
104 start = restart_from;
110 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
119 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
121 for (i = 0; i < n_fds; i++) {
123 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
126 /* We unconditionally drop FD_CLOEXEC from the fds,
127 * since after all we want to pass these fds to our
130 if ((r = fd_cloexec(fds[i], false)) < 0)
137 static const char *tty_path(const ExecContext *context) {
140 if (context->tty_path)
141 return context->tty_path;
143 return "/dev/console";
146 void exec_context_tty_reset(const ExecContext *context) {
149 if (context->tty_vhangup)
150 terminal_vhangup(tty_path(context));
152 if (context->tty_reset)
153 reset_terminal(tty_path(context));
155 if (context->tty_vt_disallocate && context->tty_path)
156 vt_disallocate(context->tty_path);
159 static int open_null_as(int flags, int nfd) {
164 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
168 r = dup2(fd, nfd) < 0 ? -errno : nfd;
169 close_nointr_nofail(fd);
176 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
178 union sockaddr_union sa;
181 assert(output < _EXEC_OUTPUT_MAX);
185 fd = socket(AF_UNIX, SOCK_STREAM, 0);
190 sa.un.sun_family = AF_UNIX;
191 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
193 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
195 close_nointr_nofail(fd);
199 if (shutdown(fd, SHUT_RD) < 0) {
200 close_nointr_nofail(fd);
211 context->syslog_identifier ? context->syslog_identifier : ident,
212 context->syslog_priority,
213 !!context->syslog_level_prefix,
214 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
215 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
216 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
219 r = dup2(fd, nfd) < 0 ? -errno : nfd;
220 close_nointr_nofail(fd);
226 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
232 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
236 r = dup2(fd, nfd) < 0 ? -errno : nfd;
237 close_nointr_nofail(fd);
244 static bool is_terminal_input(ExecInput i) {
246 i == EXEC_INPUT_TTY ||
247 i == EXEC_INPUT_TTY_FORCE ||
248 i == EXEC_INPUT_TTY_FAIL;
251 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
253 if (is_terminal_input(std_input) && !apply_tty_stdin)
254 return EXEC_INPUT_NULL;
256 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
257 return EXEC_INPUT_NULL;
262 static int fixup_output(ExecOutput std_output, int socket_fd) {
264 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
265 return EXEC_OUTPUT_INHERIT;
270 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
275 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
279 case EXEC_INPUT_NULL:
280 return open_null_as(O_RDONLY, STDIN_FILENO);
283 case EXEC_INPUT_TTY_FORCE:
284 case EXEC_INPUT_TTY_FAIL: {
287 if ((fd = acquire_terminal(
289 i == EXEC_INPUT_TTY_FAIL,
290 i == EXEC_INPUT_TTY_FORCE,
294 if (fd != STDIN_FILENO) {
295 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
296 close_nointr_nofail(fd);
303 case EXEC_INPUT_SOCKET:
304 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
307 assert_not_reached("Unknown input type");
311 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
318 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
319 o = fixup_output(context->std_output, socket_fd);
321 /* This expects the input is already set up */
325 case EXEC_OUTPUT_INHERIT:
327 /* If input got downgraded, inherit the original value */
328 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
329 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
331 /* If the input is connected to anything that's not a /dev/null, inherit that... */
332 if (i != EXEC_INPUT_NULL)
333 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
335 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
337 return STDOUT_FILENO;
339 /* We need to open /dev/null here anew, to get the
340 * right access mode. So we fall through */
342 case EXEC_OUTPUT_NULL:
343 return open_null_as(O_WRONLY, STDOUT_FILENO);
345 case EXEC_OUTPUT_TTY:
346 if (is_terminal_input(i))
347 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
349 /* We don't reset the terminal if this is just about output */
350 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
352 case EXEC_OUTPUT_SYSLOG:
353 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
354 case EXEC_OUTPUT_KMSG:
355 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
356 case EXEC_OUTPUT_JOURNAL:
357 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
358 return connect_logger_as(context, o, ident, STDOUT_FILENO);
360 case EXEC_OUTPUT_SOCKET:
361 assert(socket_fd >= 0);
362 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
365 assert_not_reached("Unknown output type");
369 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
376 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
377 o = fixup_output(context->std_output, socket_fd);
378 e = fixup_output(context->std_error, socket_fd);
380 /* This expects the input and output are already set up */
382 /* Don't change the stderr file descriptor if we inherit all
383 * the way and are not on a tty */
384 if (e == EXEC_OUTPUT_INHERIT &&
385 o == EXEC_OUTPUT_INHERIT &&
386 i == EXEC_INPUT_NULL &&
387 !is_terminal_input(context->std_input) &&
389 return STDERR_FILENO;
391 /* Duplicate from stdout if possible */
392 if (e == o || e == EXEC_OUTPUT_INHERIT)
393 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
397 case EXEC_OUTPUT_NULL:
398 return open_null_as(O_WRONLY, STDERR_FILENO);
400 case EXEC_OUTPUT_TTY:
401 if (is_terminal_input(i))
402 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
404 /* We don't reset the terminal if this is just about output */
405 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
407 case EXEC_OUTPUT_SYSLOG:
408 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
409 case EXEC_OUTPUT_KMSG:
410 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
411 case EXEC_OUTPUT_JOURNAL:
412 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
413 return connect_logger_as(context, e, ident, STDERR_FILENO);
415 case EXEC_OUTPUT_SOCKET:
416 assert(socket_fd >= 0);
417 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
420 assert_not_reached("Unknown error type");
424 static int chown_terminal(int fd, uid_t uid) {
429 /* This might fail. What matters are the results. */
430 (void) fchown(fd, uid, -1);
431 (void) fchmod(fd, TTY_MODE);
433 if (fstat(fd, &st) < 0)
436 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
442 static int setup_confirm_stdio(const ExecContext *context,
444 int *_saved_stdout) {
445 int fd = -1, saved_stdin, saved_stdout = -1, r;
448 assert(_saved_stdin);
449 assert(_saved_stdout);
451 /* This returns positive EXIT_xxx return values instead of
452 * negative errno style values! */
454 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
457 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
462 if ((fd = acquire_terminal(
464 context->std_input == EXEC_INPUT_TTY_FAIL,
465 context->std_input == EXEC_INPUT_TTY_FORCE,
471 if (chown_terminal(fd, getuid()) < 0) {
476 if (dup2(fd, STDIN_FILENO) < 0) {
481 if (dup2(fd, STDOUT_FILENO) < 0) {
487 close_nointr_nofail(fd);
489 *_saved_stdin = saved_stdin;
490 *_saved_stdout = saved_stdout;
495 if (saved_stdout >= 0)
496 close_nointr_nofail(saved_stdout);
498 if (saved_stdin >= 0)
499 close_nointr_nofail(saved_stdin);
502 close_nointr_nofail(fd);
507 static int restore_confirm_stdio(const ExecContext *context,
515 assert(*saved_stdin >= 0);
516 assert(saved_stdout);
517 assert(*saved_stdout >= 0);
519 /* This returns positive EXIT_xxx return values instead of
520 * negative errno style values! */
522 if (is_terminal_input(context->std_input)) {
524 /* The service wants terminal input. */
528 context->std_output == EXEC_OUTPUT_INHERIT ||
529 context->std_output == EXEC_OUTPUT_TTY;
532 /* If the service doesn't want a controlling terminal,
533 * then we need to get rid entirely of what we have
536 if (release_terminal() < 0)
539 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
542 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
545 *keep_stdout = *keep_stdin = false;
551 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
552 bool keep_groups = false;
557 /* Lookup and set GID and supplementary group list. Here too
558 * we avoid NSS lookups for gid=0. */
560 if (context->group || username) {
562 if (context->group) {
563 const char *g = context->group;
565 if ((r = get_group_creds(&g, &gid)) < 0)
569 /* First step, initialize groups from /etc/groups */
570 if (username && gid != 0) {
571 if (initgroups(username, gid) < 0)
577 /* Second step, set our gids */
578 if (setresgid(gid, gid, gid) < 0)
582 if (context->supplementary_groups) {
587 /* Final step, initialize any manually set supplementary groups */
588 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
590 if (!(gids = new(gid_t, ngroups_max)))
594 if ((k = getgroups(ngroups_max, gids)) < 0) {
601 STRV_FOREACH(i, context->supplementary_groups) {
604 if (k >= ngroups_max) {
610 r = get_group_creds(&g, gids+k);
619 if (setgroups(k, gids) < 0) {
630 static int enforce_user(const ExecContext *context, uid_t uid) {
634 /* Sets (but doesn't lookup) the uid and make sure we keep the
635 * capabilities while doing so. */
637 if (context->capabilities) {
639 static const cap_value_t bits[] = {
640 CAP_SETUID, /* Necessary so that we can run setresuid() below */
641 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
644 /* First step: If we need to keep capabilities but
645 * drop privileges we need to make sure we keep our
646 * caps, whiel we drop privileges. */
648 int sb = context->secure_bits|SECURE_KEEP_CAPS;
650 if (prctl(PR_GET_SECUREBITS) != sb)
651 if (prctl(PR_SET_SECUREBITS, sb) < 0)
655 /* Second step: set the capabilities. This will reduce
656 * the capabilities to the minimum we need. */
658 if (!(d = cap_dup(context->capabilities)))
661 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
662 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
668 if (cap_set_proc(d) < 0) {
677 /* Third step: actually set the uids */
678 if (setresuid(uid, uid, uid) < 0)
681 /* At this point we should have all necessary capabilities but
682 are otherwise a normal user. However, the caps might got
683 corrupted due to the setresuid() so we need clean them up
684 later. This is done outside of this call. */
691 static int null_conv(
693 const struct pam_message **msg,
694 struct pam_response **resp,
697 /* We don't support conversations */
702 static int setup_pam(
707 int fds[], unsigned n_fds) {
709 static const struct pam_conv conv = {
714 pam_handle_t *handle = NULL;
716 int pam_code = PAM_SUCCESS;
719 bool close_session = false;
720 pid_t pam_pid = 0, parent_pid;
726 /* We set up PAM in the parent process, then fork. The child
727 * will then stay around until killed via PR_GET_PDEATHSIG or
728 * systemd via the cgroup logic. It will then remove the PAM
729 * session again. The parent process will exec() the actual
730 * daemon. We do things this way to ensure that the main PID
731 * of the daemon is the one we initially fork()ed. */
733 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
739 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
742 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
745 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
748 close_session = true;
750 if ((!(e = pam_getenvlist(handle)))) {
751 pam_code = PAM_BUF_ERR;
755 /* Block SIGTERM, so that we know that it won't get lost in
757 if (sigemptyset(&ss) < 0 ||
758 sigaddset(&ss, SIGTERM) < 0 ||
759 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
762 parent_pid = getpid();
764 if ((pam_pid = fork()) < 0)
771 /* The child's job is to reset the PAM session on
774 /* This string must fit in 10 chars (i.e. the length
775 * of "/sbin/init"), to look pretty in /bin/ps */
776 rename_process("(sd-pam)");
778 /* Make sure we don't keep open the passed fds in this
779 child. We assume that otherwise only those fds are
780 open here that have been opened by PAM. */
781 close_many(fds, n_fds);
783 /* Wait until our parent died. This will most likely
784 * not work since the kernel does not allow
785 * unprivileged parents kill their privileged children
786 * this way. We rely on the control groups kill logic
787 * to do the rest for us. */
788 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
791 /* Check if our parent process might already have
793 if (getppid() == parent_pid) {
795 if (sigwait(&ss, &sig) < 0) {
802 assert(sig == SIGTERM);
807 /* If our parent died we'll end the session */
808 if (getppid() != parent_pid)
809 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
815 pam_end(handle, pam_code | PAM_DATA_SILENT);
819 /* If the child was forked off successfully it will do all the
820 * cleanups, so forget about the handle here. */
823 /* Unblock SIGTERM again in the parent */
824 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
827 /* We close the log explicitly here, since the PAM modules
828 * might have opened it, but we don't want this fd around. */
837 if (pam_code != PAM_SUCCESS)
838 err = -EPERM; /* PAM errors do not map to errno */
844 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
846 pam_end(handle, pam_code | PAM_DATA_SILENT);
854 kill(pam_pid, SIGTERM);
855 kill(pam_pid, SIGCONT);
862 static int do_capability_bounding_set_drop(uint64_t drop) {
864 cap_t old_cap = NULL, new_cap = NULL;
868 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
869 * in the effective set (yes, the kernel drops that when
870 * executing init!), so get it back temporarily so that we can
871 * call PR_CAPBSET_DROP. */
873 old_cap = cap_get_proc();
877 if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
883 static const cap_value_t v = CAP_SETPCAP;
885 new_cap = cap_dup(old_cap);
891 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
896 if (cap_set_proc(new_cap) < 0) {
902 for (i = 0; i <= cap_last_cap(); i++)
903 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
904 if (prctl(PR_CAPBSET_DROP, i) < 0) {
917 cap_set_proc(old_cap);
924 static void rename_process_from_path(const char *path) {
925 char process_name[11];
929 /* This resulting string must fit in 10 chars (i.e. the length
930 * of "/sbin/init") to look pretty in /bin/ps */
932 p = file_name_from_path(path);
934 rename_process("(...)");
940 /* The end of the process name is usually more
941 * interesting, since the first bit might just be
947 process_name[0] = '(';
948 memcpy(process_name+1, p, l);
949 process_name[1+l] = ')';
950 process_name[1+l+1] = 0;
952 rename_process(process_name);
955 int exec_spawn(ExecCommand *command,
957 const ExecContext *context,
958 int fds[], unsigned n_fds,
960 bool apply_permissions,
962 bool apply_tty_stdin,
964 CGroupBonding *cgroup_bondings,
965 CGroupAttribute *cgroup_attributes,
966 const char *cgroup_suffix,
974 char **files_env = NULL;
979 assert(fds || n_fds <= 0);
981 if (context->std_input == EXEC_INPUT_SOCKET ||
982 context->std_output == EXEC_OUTPUT_SOCKET ||
983 context->std_error == EXEC_OUTPUT_SOCKET) {
995 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
996 log_error("Failed to load environment files: %s", strerror(-r));
1001 argv = command->argv;
1003 if (!(line = exec_command_line(argv))) {
1008 log_debug("About to execute: %s", line);
1011 r = cgroup_bonding_realize_list(cgroup_bondings);
1015 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1017 if ((pid = fork()) < 0) {
1025 const char *username = NULL, *home = NULL;
1026 uid_t uid = (uid_t) -1;
1027 gid_t gid = (gid_t) -1;
1028 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1030 int saved_stdout = -1, saved_stdin = -1;
1031 bool keep_stdout = false, keep_stdin = false, set_access = false;
1035 rename_process_from_path(command->path);
1037 /* We reset exactly these signals, since they are the
1038 * only ones we set to SIG_IGN in the main daemon. All
1039 * others we leave untouched because we set them to
1040 * SIG_DFL or a valid handler initially, both of which
1041 * will be demoted to SIG_DFL. */
1042 default_signals(SIGNALS_CRASH_HANDLER,
1043 SIGNALS_IGNORE, -1);
1045 if (context->ignore_sigpipe)
1046 ignore_signals(SIGPIPE, -1);
1048 assert_se(sigemptyset(&ss) == 0);
1049 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1051 r = EXIT_SIGNAL_MASK;
1056 if (idle_pipe[1] >= 0)
1057 close_nointr_nofail(idle_pipe[1]);
1058 if (idle_pipe[0] >= 0) {
1059 fd_wait_for_event(idle_pipe[0], POLLHUP, DEFAULT_TIMEOUT_USEC);
1060 close_nointr_nofail(idle_pipe[0]);
1064 /* Close sockets very early to make sure we don't
1065 * block init reexecution because it cannot bind its
1068 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1069 socket_fd >= 0 ? 1 : n_fds);
1075 if (!context->same_pgrp)
1082 if (context->tcpwrap_name) {
1084 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1090 for (i = 0; i < (int) n_fds; i++) {
1091 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1099 exec_context_tty_reset(context);
1101 /* We skip the confirmation step if we shall not apply the TTY */
1102 if (confirm_spawn &&
1103 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1106 /* Set up terminal for the question */
1107 if ((r = setup_confirm_stdio(context,
1108 &saved_stdin, &saved_stdout))) {
1113 /* Now ask the question. */
1114 if (!(line = exec_command_line(argv))) {
1120 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1123 if (r < 0 || response == 'n') {
1127 } else if (response == 's') {
1132 /* Release terminal for the question */
1133 if ((r = restore_confirm_stdio(context,
1134 &saved_stdin, &saved_stdout,
1135 &keep_stdin, &keep_stdout))) {
1141 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1142 * must sure to drop O_NONBLOCK */
1144 fd_nonblock(socket_fd, false);
1147 err = setup_input(context, socket_fd, apply_tty_stdin);
1155 err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1162 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1168 if (cgroup_bondings) {
1169 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1176 if (context->oom_score_adjust_set) {
1179 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1182 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1183 /* Compatibility with Linux <= 2.6.35 */
1187 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1188 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1190 snprintf(t, sizeof(t), "%i", adj);
1193 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1194 && errno != EACCES) {
1196 r = EXIT_OOM_ADJUST;
1202 if (context->nice_set)
1203 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1209 if (context->cpu_sched_set) {
1210 struct sched_param param;
1213 param.sched_priority = context->cpu_sched_priority;
1215 if (sched_setscheduler(0, context->cpu_sched_policy |
1216 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1218 r = EXIT_SETSCHEDULER;
1223 if (context->cpuset)
1224 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1226 r = EXIT_CPUAFFINITY;
1230 if (context->ioprio_set)
1231 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1237 if (context->timer_slack_nsec_set)
1238 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1240 r = EXIT_TIMERSLACK;
1244 if (context->utmp_id)
1245 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1247 if (context->user) {
1248 username = context->user;
1249 err = get_user_creds(&username, &uid, &gid, &home);
1255 if (is_terminal_input(context->std_input)) {
1256 err = chown_terminal(STDIN_FILENO, uid);
1263 if (cgroup_bondings && context->control_group_modify) {
1264 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1266 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1276 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1277 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1284 if (apply_permissions) {
1285 err = enforce_groups(context, username, gid);
1292 umask(context->umask);
1295 if (context->pam_name && username) {
1296 err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1303 if (context->private_network) {
1304 if (unshare(CLONE_NEWNET) < 0) {
1313 if (strv_length(context->read_write_dirs) > 0 ||
1314 strv_length(context->read_only_dirs) > 0 ||
1315 strv_length(context->inaccessible_dirs) > 0 ||
1316 context->mount_flags != MS_SHARED ||
1317 context->private_tmp) {
1318 err = setup_namespace(context->read_write_dirs,
1319 context->read_only_dirs,
1320 context->inaccessible_dirs,
1321 context->private_tmp,
1322 context->mount_flags);
1330 if (context->root_directory)
1331 if (chroot(context->root_directory) < 0) {
1337 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1346 if (asprintf(&d, "%s/%s",
1347 context->root_directory ? context->root_directory : "",
1348 context->working_directory ? context->working_directory : "") < 0) {
1364 /* We repeat the fd closing here, to make sure that
1365 * nothing is leaked from the PAM modules */
1366 err = close_all_fds(fds, n_fds);
1368 err = shift_fds(fds, n_fds);
1370 err = flags_fds(fds, n_fds, context->non_blocking);
1376 if (apply_permissions) {
1378 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1379 if (!context->rlimit[i])
1382 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1389 if (context->capability_bounding_set_drop) {
1390 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1392 r = EXIT_CAPABILITIES;
1397 if (context->user) {
1398 err = enforce_user(context, uid);
1405 /* PR_GET_SECUREBITS is not privileged, while
1406 * PR_SET_SECUREBITS is. So to suppress
1407 * potential EPERMs we'll try not to call
1408 * PR_SET_SECUREBITS unless necessary. */
1409 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1410 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1412 r = EXIT_SECUREBITS;
1416 if (context->capabilities)
1417 if (cap_set_proc(context->capabilities) < 0) {
1419 r = EXIT_CAPABILITIES;
1424 if (!(our_env = new0(char*, 7))) {
1431 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1432 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1439 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1446 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1447 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1453 if (is_terminal_input(context->std_input) ||
1454 context->std_output == EXEC_OUTPUT_TTY ||
1455 context->std_error == EXEC_OUTPUT_TTY)
1456 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1464 if (!(final_env = strv_env_merge(
1468 context->environment,
1477 if (!(final_argv = replace_env_argv(argv, final_env))) {
1483 final_env = strv_env_clean(final_env);
1485 execve(command->path, final_argv, final_env);
1492 log_warning("Failed at step %s spawning %s: %s",
1493 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1494 command->path, strerror(-err));
1498 strv_free(final_env);
1500 strv_free(files_env);
1501 strv_free(final_argv);
1503 if (saved_stdin >= 0)
1504 close_nointr_nofail(saved_stdin);
1506 if (saved_stdout >= 0)
1507 close_nointr_nofail(saved_stdout);
1512 strv_free(files_env);
1514 /* We add the new process to the cgroup both in the child (so
1515 * that we can be sure that no user code is ever executed
1516 * outside of the cgroup) and in the parent (so that we can be
1517 * sure that when we kill the cgroup the process will be
1519 if (cgroup_bondings)
1520 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1522 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1524 exec_status_start(&command->exec_status, pid);
1530 strv_free(files_env);
1535 void exec_context_init(ExecContext *c) {
1539 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1540 c->cpu_sched_policy = SCHED_OTHER;
1541 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1542 c->syslog_level_prefix = true;
1543 c->mount_flags = MS_SHARED;
1544 c->kill_signal = SIGTERM;
1545 c->send_sigkill = true;
1546 c->control_group_persistent = -1;
1547 c->ignore_sigpipe = true;
1550 void exec_context_done(ExecContext *c) {
1555 strv_free(c->environment);
1556 c->environment = NULL;
1558 strv_free(c->environment_files);
1559 c->environment_files = NULL;
1561 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1563 c->rlimit[l] = NULL;
1566 free(c->working_directory);
1567 c->working_directory = NULL;
1568 free(c->root_directory);
1569 c->root_directory = NULL;
1574 free(c->tcpwrap_name);
1575 c->tcpwrap_name = NULL;
1577 free(c->syslog_identifier);
1578 c->syslog_identifier = NULL;
1586 strv_free(c->supplementary_groups);
1587 c->supplementary_groups = NULL;
1592 if (c->capabilities) {
1593 cap_free(c->capabilities);
1594 c->capabilities = NULL;
1597 strv_free(c->read_only_dirs);
1598 c->read_only_dirs = NULL;
1600 strv_free(c->read_write_dirs);
1601 c->read_write_dirs = NULL;
1603 strv_free(c->inaccessible_dirs);
1604 c->inaccessible_dirs = NULL;
1607 CPU_FREE(c->cpuset);
1613 void exec_command_done(ExecCommand *c) {
1623 void exec_command_done_array(ExecCommand *c, unsigned n) {
1626 for (i = 0; i < n; i++)
1627 exec_command_done(c+i);
1630 void exec_command_free_list(ExecCommand *c) {
1634 LIST_REMOVE(ExecCommand, command, c, i);
1635 exec_command_done(i);
1640 void exec_command_free_array(ExecCommand **c, unsigned n) {
1643 for (i = 0; i < n; i++) {
1644 exec_command_free_list(c[i]);
1649 int exec_context_load_environment(const ExecContext *c, char ***l) {
1650 char **i, **r = NULL;
1655 STRV_FOREACH(i, c->environment_files) {
1658 bool ignore = false;
1668 if (!path_is_absolute(fn)) {
1677 if ((k = load_env_file(fn, &p)) < 0) {
1691 m = strv_env_merge(2, r, p);
1707 static void strv_fprintf(FILE *f, char **l) {
1713 fprintf(f, " %s", *g);
1716 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1728 "%sWorkingDirectory: %s\n"
1729 "%sRootDirectory: %s\n"
1730 "%sNonBlocking: %s\n"
1731 "%sPrivateTmp: %s\n"
1732 "%sControlGroupModify: %s\n"
1733 "%sControlGroupPersistent: %s\n"
1734 "%sPrivateNetwork: %s\n",
1736 prefix, c->working_directory ? c->working_directory : "/",
1737 prefix, c->root_directory ? c->root_directory : "/",
1738 prefix, yes_no(c->non_blocking),
1739 prefix, yes_no(c->private_tmp),
1740 prefix, yes_no(c->control_group_modify),
1741 prefix, yes_no(c->control_group_persistent),
1742 prefix, yes_no(c->private_network));
1744 STRV_FOREACH(e, c->environment)
1745 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1747 STRV_FOREACH(e, c->environment_files)
1748 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1750 if (c->tcpwrap_name)
1752 "%sTCPWrapName: %s\n",
1753 prefix, c->tcpwrap_name);
1760 if (c->oom_score_adjust_set)
1762 "%sOOMScoreAdjust: %i\n",
1763 prefix, c->oom_score_adjust);
1765 for (i = 0; i < RLIM_NLIMITS; i++)
1767 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1771 "%sIOSchedulingClass: %s\n"
1772 "%sIOPriority: %i\n",
1773 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1774 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1776 if (c->cpu_sched_set)
1778 "%sCPUSchedulingPolicy: %s\n"
1779 "%sCPUSchedulingPriority: %i\n"
1780 "%sCPUSchedulingResetOnFork: %s\n",
1781 prefix, sched_policy_to_string(c->cpu_sched_policy),
1782 prefix, c->cpu_sched_priority,
1783 prefix, yes_no(c->cpu_sched_reset_on_fork));
1786 fprintf(f, "%sCPUAffinity:", prefix);
1787 for (i = 0; i < c->cpuset_ncpus; i++)
1788 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1789 fprintf(f, " %i", i);
1793 if (c->timer_slack_nsec_set)
1794 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1797 "%sStandardInput: %s\n"
1798 "%sStandardOutput: %s\n"
1799 "%sStandardError: %s\n",
1800 prefix, exec_input_to_string(c->std_input),
1801 prefix, exec_output_to_string(c->std_output),
1802 prefix, exec_output_to_string(c->std_error));
1808 "%sTTYVHangup: %s\n"
1809 "%sTTYVTDisallocate: %s\n",
1810 prefix, c->tty_path,
1811 prefix, yes_no(c->tty_reset),
1812 prefix, yes_no(c->tty_vhangup),
1813 prefix, yes_no(c->tty_vt_disallocate));
1815 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1816 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1817 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1818 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1820 "%sSyslogFacility: %s\n"
1821 "%sSyslogLevel: %s\n",
1822 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1823 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1825 if (c->capabilities) {
1827 if ((t = cap_to_text(c->capabilities, NULL))) {
1828 fprintf(f, "%sCapabilities: %s\n",
1835 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1837 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1838 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1839 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1840 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1841 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1842 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1844 if (c->capability_bounding_set_drop) {
1846 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1848 for (l = 0; l <= cap_last_cap(); l++)
1849 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1852 if ((t = cap_to_name(l))) {
1853 fprintf(f, " %s", t);
1862 fprintf(f, "%sUser: %s\n", prefix, c->user);
1864 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1866 if (strv_length(c->supplementary_groups) > 0) {
1867 fprintf(f, "%sSupplementaryGroups:", prefix);
1868 strv_fprintf(f, c->supplementary_groups);
1873 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1875 if (strv_length(c->read_write_dirs) > 0) {
1876 fprintf(f, "%sReadWriteDirs:", prefix);
1877 strv_fprintf(f, c->read_write_dirs);
1881 if (strv_length(c->read_only_dirs) > 0) {
1882 fprintf(f, "%sReadOnlyDirs:", prefix);
1883 strv_fprintf(f, c->read_only_dirs);
1887 if (strv_length(c->inaccessible_dirs) > 0) {
1888 fprintf(f, "%sInaccessibleDirs:", prefix);
1889 strv_fprintf(f, c->inaccessible_dirs);
1895 "%sKillSignal: SIG%s\n"
1896 "%sSendSIGKILL: %s\n"
1897 "%sIgnoreSIGPIPE: %s\n",
1898 prefix, kill_mode_to_string(c->kill_mode),
1899 prefix, signal_to_string(c->kill_signal),
1900 prefix, yes_no(c->send_sigkill),
1901 prefix, yes_no(c->ignore_sigpipe));
1905 "%sUtmpIdentifier: %s\n",
1906 prefix, c->utmp_id);
1909 void exec_status_start(ExecStatus *s, pid_t pid) {
1914 dual_timestamp_get(&s->start_timestamp);
1917 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1920 if (s->pid && s->pid != pid)
1924 dual_timestamp_get(&s->exit_timestamp);
1930 if (context->utmp_id)
1931 utmp_put_dead_process(context->utmp_id, pid, code, status);
1933 exec_context_tty_reset(context);
1937 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1938 char buf[FORMAT_TIMESTAMP_MAX];
1951 prefix, (unsigned long) s->pid);
1953 if (s->start_timestamp.realtime > 0)
1955 "%sStart Timestamp: %s\n",
1956 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1958 if (s->exit_timestamp.realtime > 0)
1960 "%sExit Timestamp: %s\n"
1962 "%sExit Status: %i\n",
1963 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1964 prefix, sigchld_code_to_string(s->code),
1968 char *exec_command_line(char **argv) {
1976 STRV_FOREACH(a, argv)
1979 if (!(n = new(char, k)))
1983 STRV_FOREACH(a, argv) {
1990 if (strpbrk(*a, WHITESPACE)) {
2001 /* FIXME: this doesn't really handle arguments that have
2002 * spaces and ticks in them */
2007 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2009 const char *prefix2;
2018 p2 = strappend(prefix, "\t");
2019 prefix2 = p2 ? p2 : prefix;
2021 cmd = exec_command_line(c->argv);
2024 "%sCommand Line: %s\n",
2025 prefix, cmd ? cmd : strerror(ENOMEM));
2029 exec_status_dump(&c->exec_status, f, prefix2);
2034 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2040 LIST_FOREACH(command, c, c)
2041 exec_command_dump(c, f, prefix);
2044 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2051 /* It's kind of important, that we keep the order here */
2052 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2053 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2058 int exec_command_set(ExecCommand *c, const char *path, ...) {
2066 l = strv_new_ap(path, ap);
2072 if (!(p = strdup(path))) {
2086 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2087 [EXEC_INPUT_NULL] = "null",
2088 [EXEC_INPUT_TTY] = "tty",
2089 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2090 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2091 [EXEC_INPUT_SOCKET] = "socket"
2094 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2096 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2097 [EXEC_OUTPUT_INHERIT] = "inherit",
2098 [EXEC_OUTPUT_NULL] = "null",
2099 [EXEC_OUTPUT_TTY] = "tty",
2100 [EXEC_OUTPUT_SYSLOG] = "syslog",
2101 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2102 [EXEC_OUTPUT_KMSG] = "kmsg",
2103 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2104 [EXEC_OUTPUT_JOURNAL] = "journal",
2105 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2106 [EXEC_OUTPUT_SOCKET] = "socket"
2109 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2111 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2112 [KILL_CONTROL_GROUP] = "control-group",
2113 [KILL_PROCESS] = "process",
2114 [KILL_NONE] = "none"
2117 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2119 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2120 [KILL_MAIN] = "main",
2121 [KILL_CONTROL] = "control",
2125 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);