1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
43 #include <security/pam_appl.h>
49 #include "capability.h"
53 #include "securebits.h"
55 #include "namespace.h"
57 #include "exit-status.h"
59 #include "utmp-wtmp.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
64 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
66 /* This assumes there is a 'tty' group */
69 static int shift_fds(int fds[], unsigned n_fds) {
70 int start, restart_from;
75 /* Modifies the fds array! (sorts it) */
85 for (i = start; i < (int) n_fds; i++) {
88 /* Already at right index? */
92 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
95 close_nointr_nofail(fds[i]);
98 /* Hmm, the fd we wanted isn't free? Then
99 * let's remember that and try again from here*/
100 if (nfd != i+3 && restart_from < 0)
104 if (restart_from < 0)
107 start = restart_from;
113 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
122 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
124 for (i = 0; i < n_fds; i++) {
126 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
129 /* We unconditionally drop FD_CLOEXEC from the fds,
130 * since after all we want to pass these fds to our
133 if ((r = fd_cloexec(fds[i], false)) < 0)
140 static const char *tty_path(const ExecContext *context) {
143 if (context->tty_path)
144 return context->tty_path;
146 return "/dev/console";
149 void exec_context_tty_reset(const ExecContext *context) {
152 if (context->tty_vhangup)
153 terminal_vhangup(tty_path(context));
155 if (context->tty_reset)
156 reset_terminal(tty_path(context));
158 if (context->tty_vt_disallocate && context->tty_path)
159 vt_disallocate(context->tty_path);
162 static int open_null_as(int flags, int nfd) {
167 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
171 r = dup2(fd, nfd) < 0 ? -errno : nfd;
172 close_nointr_nofail(fd);
179 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
181 union sockaddr_union sa;
184 assert(output < _EXEC_OUTPUT_MAX);
188 fd = socket(AF_UNIX, SOCK_STREAM, 0);
193 sa.un.sun_family = AF_UNIX;
194 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
196 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
198 close_nointr_nofail(fd);
202 if (shutdown(fd, SHUT_RD) < 0) {
203 close_nointr_nofail(fd);
215 context->syslog_identifier ? context->syslog_identifier : ident,
217 context->syslog_priority,
218 !!context->syslog_level_prefix,
219 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
220 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
221 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
224 r = dup2(fd, nfd) < 0 ? -errno : nfd;
225 close_nointr_nofail(fd);
231 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
237 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
241 r = dup2(fd, nfd) < 0 ? -errno : nfd;
242 close_nointr_nofail(fd);
249 static bool is_terminal_input(ExecInput i) {
251 i == EXEC_INPUT_TTY ||
252 i == EXEC_INPUT_TTY_FORCE ||
253 i == EXEC_INPUT_TTY_FAIL;
256 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
258 if (is_terminal_input(std_input) && !apply_tty_stdin)
259 return EXEC_INPUT_NULL;
261 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
262 return EXEC_INPUT_NULL;
267 static int fixup_output(ExecOutput std_output, int socket_fd) {
269 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
270 return EXEC_OUTPUT_INHERIT;
275 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
280 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
284 case EXEC_INPUT_NULL:
285 return open_null_as(O_RDONLY, STDIN_FILENO);
288 case EXEC_INPUT_TTY_FORCE:
289 case EXEC_INPUT_TTY_FAIL: {
292 if ((fd = acquire_terminal(
294 i == EXEC_INPUT_TTY_FAIL,
295 i == EXEC_INPUT_TTY_FORCE,
299 if (fd != STDIN_FILENO) {
300 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
301 close_nointr_nofail(fd);
308 case EXEC_INPUT_SOCKET:
309 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
312 assert_not_reached("Unknown input type");
316 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
323 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324 o = fixup_output(context->std_output, socket_fd);
326 /* This expects the input is already set up */
330 case EXEC_OUTPUT_INHERIT:
332 /* If input got downgraded, inherit the original value */
333 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
334 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
336 /* If the input is connected to anything that's not a /dev/null, inherit that... */
337 if (i != EXEC_INPUT_NULL)
338 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
340 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
342 return STDOUT_FILENO;
344 /* We need to open /dev/null here anew, to get the
345 * right access mode. So we fall through */
347 case EXEC_OUTPUT_NULL:
348 return open_null_as(O_WRONLY, STDOUT_FILENO);
350 case EXEC_OUTPUT_TTY:
351 if (is_terminal_input(i))
352 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
354 /* We don't reset the terminal if this is just about output */
355 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
357 case EXEC_OUTPUT_SYSLOG:
358 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
359 case EXEC_OUTPUT_KMSG:
360 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
361 case EXEC_OUTPUT_JOURNAL:
362 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
363 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
365 case EXEC_OUTPUT_SOCKET:
366 assert(socket_fd >= 0);
367 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
370 assert_not_reached("Unknown output type");
374 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
381 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
382 o = fixup_output(context->std_output, socket_fd);
383 e = fixup_output(context->std_error, socket_fd);
385 /* This expects the input and output are already set up */
387 /* Don't change the stderr file descriptor if we inherit all
388 * the way and are not on a tty */
389 if (e == EXEC_OUTPUT_INHERIT &&
390 o == EXEC_OUTPUT_INHERIT &&
391 i == EXEC_INPUT_NULL &&
392 !is_terminal_input(context->std_input) &&
394 return STDERR_FILENO;
396 /* Duplicate from stdout if possible */
397 if (e == o || e == EXEC_OUTPUT_INHERIT)
398 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
402 case EXEC_OUTPUT_NULL:
403 return open_null_as(O_WRONLY, STDERR_FILENO);
405 case EXEC_OUTPUT_TTY:
406 if (is_terminal_input(i))
407 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
409 /* We don't reset the terminal if this is just about output */
410 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
412 case EXEC_OUTPUT_SYSLOG:
413 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
414 case EXEC_OUTPUT_KMSG:
415 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
416 case EXEC_OUTPUT_JOURNAL:
417 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
418 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
420 case EXEC_OUTPUT_SOCKET:
421 assert(socket_fd >= 0);
422 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
425 assert_not_reached("Unknown error type");
429 static int chown_terminal(int fd, uid_t uid) {
434 /* This might fail. What matters are the results. */
435 (void) fchown(fd, uid, -1);
436 (void) fchmod(fd, TTY_MODE);
438 if (fstat(fd, &st) < 0)
441 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
447 static int setup_confirm_stdio(const ExecContext *context,
449 int *_saved_stdout) {
450 int fd = -1, saved_stdin, saved_stdout = -1, r;
453 assert(_saved_stdin);
454 assert(_saved_stdout);
456 /* This returns positive EXIT_xxx return values instead of
457 * negative errno style values! */
459 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
462 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
467 if ((fd = acquire_terminal(
469 context->std_input == EXEC_INPUT_TTY_FAIL,
470 context->std_input == EXEC_INPUT_TTY_FORCE,
476 if (chown_terminal(fd, getuid()) < 0) {
481 if (dup2(fd, STDIN_FILENO) < 0) {
486 if (dup2(fd, STDOUT_FILENO) < 0) {
492 close_nointr_nofail(fd);
494 *_saved_stdin = saved_stdin;
495 *_saved_stdout = saved_stdout;
500 if (saved_stdout >= 0)
501 close_nointr_nofail(saved_stdout);
503 if (saved_stdin >= 0)
504 close_nointr_nofail(saved_stdin);
507 close_nointr_nofail(fd);
512 static int restore_confirm_stdio(const ExecContext *context,
520 assert(*saved_stdin >= 0);
521 assert(saved_stdout);
522 assert(*saved_stdout >= 0);
524 /* This returns positive EXIT_xxx return values instead of
525 * negative errno style values! */
527 if (is_terminal_input(context->std_input)) {
529 /* The service wants terminal input. */
533 context->std_output == EXEC_OUTPUT_INHERIT ||
534 context->std_output == EXEC_OUTPUT_TTY;
537 /* If the service doesn't want a controlling terminal,
538 * then we need to get rid entirely of what we have
541 if (release_terminal() < 0)
544 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
547 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
550 *keep_stdout = *keep_stdin = false;
556 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
557 bool keep_groups = false;
562 /* Lookup and set GID and supplementary group list. Here too
563 * we avoid NSS lookups for gid=0. */
565 if (context->group || username) {
567 if (context->group) {
568 const char *g = context->group;
570 if ((r = get_group_creds(&g, &gid)) < 0)
574 /* First step, initialize groups from /etc/groups */
575 if (username && gid != 0) {
576 if (initgroups(username, gid) < 0)
582 /* Second step, set our gids */
583 if (setresgid(gid, gid, gid) < 0)
587 if (context->supplementary_groups) {
592 /* Final step, initialize any manually set supplementary groups */
593 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
595 if (!(gids = new(gid_t, ngroups_max)))
599 if ((k = getgroups(ngroups_max, gids)) < 0) {
606 STRV_FOREACH(i, context->supplementary_groups) {
609 if (k >= ngroups_max) {
615 r = get_group_creds(&g, gids+k);
624 if (setgroups(k, gids) < 0) {
635 static int enforce_user(const ExecContext *context, uid_t uid) {
639 /* Sets (but doesn't lookup) the uid and make sure we keep the
640 * capabilities while doing so. */
642 if (context->capabilities) {
644 static const cap_value_t bits[] = {
645 CAP_SETUID, /* Necessary so that we can run setresuid() below */
646 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
649 /* First step: If we need to keep capabilities but
650 * drop privileges we need to make sure we keep our
651 * caps, whiel we drop privileges. */
653 int sb = context->secure_bits|SECURE_KEEP_CAPS;
655 if (prctl(PR_GET_SECUREBITS) != sb)
656 if (prctl(PR_SET_SECUREBITS, sb) < 0)
660 /* Second step: set the capabilities. This will reduce
661 * the capabilities to the minimum we need. */
663 if (!(d = cap_dup(context->capabilities)))
666 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
667 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
673 if (cap_set_proc(d) < 0) {
682 /* Third step: actually set the uids */
683 if (setresuid(uid, uid, uid) < 0)
686 /* At this point we should have all necessary capabilities but
687 are otherwise a normal user. However, the caps might got
688 corrupted due to the setresuid() so we need clean them up
689 later. This is done outside of this call. */
696 static int null_conv(
698 const struct pam_message **msg,
699 struct pam_response **resp,
702 /* We don't support conversations */
707 static int setup_pam(
713 int fds[], unsigned n_fds) {
715 static const struct pam_conv conv = {
720 pam_handle_t *handle = NULL;
722 int pam_code = PAM_SUCCESS;
725 bool close_session = false;
726 pid_t pam_pid = 0, parent_pid;
732 /* We set up PAM in the parent process, then fork. The child
733 * will then stay around until killed via PR_GET_PDEATHSIG or
734 * systemd via the cgroup logic. It will then remove the PAM
735 * session again. The parent process will exec() the actual
736 * daemon. We do things this way to ensure that the main PID
737 * of the daemon is the one we initially fork()ed. */
739 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
745 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
748 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
751 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
754 close_session = true;
756 if ((!(e = pam_getenvlist(handle)))) {
757 pam_code = PAM_BUF_ERR;
761 /* Block SIGTERM, so that we know that it won't get lost in
763 if (sigemptyset(&ss) < 0 ||
764 sigaddset(&ss, SIGTERM) < 0 ||
765 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
768 parent_pid = getpid();
770 if ((pam_pid = fork()) < 0)
777 /* The child's job is to reset the PAM session on
780 /* This string must fit in 10 chars (i.e. the length
781 * of "/sbin/init"), to look pretty in /bin/ps */
782 rename_process("(sd-pam)");
784 /* Make sure we don't keep open the passed fds in this
785 child. We assume that otherwise only those fds are
786 open here that have been opened by PAM. */
787 close_many(fds, n_fds);
789 /* Drop privileges - we don't need any to pam_close_session
790 * and this will make PR_SET_PDEATHSIG work in most cases.
791 * If this fails, ignore the error - but expect sd-pam threads
792 * to fail to exit normally */
793 if (setresuid(uid, uid, uid) < 0)
794 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
796 /* Wait until our parent died. This will only work if
797 * the above setresuid() succeeds, otherwise the kernel
798 * will not allow unprivileged parents kill their privileged
799 * children this way. We rely on the control groups kill logic
800 * to do the rest for us. */
801 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
804 /* Check if our parent process might already have
806 if (getppid() == parent_pid) {
808 if (sigwait(&ss, &sig) < 0) {
815 assert(sig == SIGTERM);
820 /* If our parent died we'll end the session */
821 if (getppid() != parent_pid)
822 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
828 pam_end(handle, pam_code | PAM_DATA_SILENT);
832 /* If the child was forked off successfully it will do all the
833 * cleanups, so forget about the handle here. */
836 /* Unblock SIGTERM again in the parent */
837 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
840 /* We close the log explicitly here, since the PAM modules
841 * might have opened it, but we don't want this fd around. */
850 if (pam_code != PAM_SUCCESS)
851 err = -EPERM; /* PAM errors do not map to errno */
857 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
859 pam_end(handle, pam_code | PAM_DATA_SILENT);
867 kill(pam_pid, SIGTERM);
868 kill(pam_pid, SIGCONT);
875 static void rename_process_from_path(const char *path) {
876 char process_name[11];
880 /* This resulting string must fit in 10 chars (i.e. the length
881 * of "/sbin/init") to look pretty in /bin/ps */
883 p = path_get_file_name(path);
885 rename_process("(...)");
891 /* The end of the process name is usually more
892 * interesting, since the first bit might just be
898 process_name[0] = '(';
899 memcpy(process_name+1, p, l);
900 process_name[1+l] = ')';
901 process_name[1+l+1] = 0;
903 rename_process(process_name);
906 int exec_spawn(ExecCommand *command,
908 const ExecContext *context,
909 int fds[], unsigned n_fds,
911 bool apply_permissions,
913 bool apply_tty_stdin,
915 CGroupBonding *cgroup_bondings,
916 CGroupAttribute *cgroup_attributes,
917 const char *cgroup_suffix,
926 char **files_env = NULL;
931 assert(fds || n_fds <= 0);
933 if (context->std_input == EXEC_INPUT_SOCKET ||
934 context->std_output == EXEC_OUTPUT_SOCKET ||
935 context->std_error == EXEC_OUTPUT_SOCKET) {
947 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
948 log_error("Failed to load environment files: %s", strerror(-r));
953 argv = command->argv;
955 if (!(line = exec_command_line(argv))) {
960 log_debug("About to execute: %s", line);
963 r = cgroup_bonding_realize_list(cgroup_bondings);
967 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
969 if ((pid = fork()) < 0) {
977 const char *username = NULL, *home = NULL;
978 uid_t uid = (uid_t) -1;
979 gid_t gid = (gid_t) -1;
980 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
982 int saved_stdout = -1, saved_stdin = -1;
983 bool keep_stdout = false, keep_stdin = false, set_access = false;
987 rename_process_from_path(command->path);
989 /* We reset exactly these signals, since they are the
990 * only ones we set to SIG_IGN in the main daemon. All
991 * others we leave untouched because we set them to
992 * SIG_DFL or a valid handler initially, both of which
993 * will be demoted to SIG_DFL. */
994 default_signals(SIGNALS_CRASH_HANDLER,
997 if (context->ignore_sigpipe)
998 ignore_signals(SIGPIPE, -1);
1000 assert_se(sigemptyset(&ss) == 0);
1001 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1003 r = EXIT_SIGNAL_MASK;
1008 if (idle_pipe[1] >= 0)
1009 close_nointr_nofail(idle_pipe[1]);
1010 if (idle_pipe[0] >= 0) {
1011 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1012 close_nointr_nofail(idle_pipe[0]);
1016 /* Close sockets very early to make sure we don't
1017 * block init reexecution because it cannot bind its
1020 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1021 socket_fd >= 0 ? 1 : n_fds);
1027 if (!context->same_pgrp)
1034 if (context->tcpwrap_name) {
1036 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1042 for (i = 0; i < (int) n_fds; i++) {
1043 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1051 exec_context_tty_reset(context);
1053 /* We skip the confirmation step if we shall not apply the TTY */
1054 if (confirm_spawn &&
1055 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1058 /* Set up terminal for the question */
1059 if ((r = setup_confirm_stdio(context,
1060 &saved_stdin, &saved_stdout))) {
1065 /* Now ask the question. */
1066 if (!(line = exec_command_line(argv))) {
1072 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1075 if (r < 0 || response == 'n') {
1079 } else if (response == 's') {
1084 /* Release terminal for the question */
1085 if ((r = restore_confirm_stdio(context,
1086 &saved_stdin, &saved_stdout,
1087 &keep_stdin, &keep_stdout))) {
1093 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1094 * must sure to drop O_NONBLOCK */
1096 fd_nonblock(socket_fd, false);
1099 err = setup_input(context, socket_fd, apply_tty_stdin);
1107 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1114 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1120 if (cgroup_bondings) {
1121 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1128 if (context->oom_score_adjust_set) {
1131 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1134 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1136 r = EXIT_OOM_ADJUST;
1141 if (context->nice_set)
1142 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1148 if (context->cpu_sched_set) {
1149 struct sched_param param;
1152 param.sched_priority = context->cpu_sched_priority;
1154 if (sched_setscheduler(0, context->cpu_sched_policy |
1155 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1157 r = EXIT_SETSCHEDULER;
1162 if (context->cpuset)
1163 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1165 r = EXIT_CPUAFFINITY;
1169 if (context->ioprio_set)
1170 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1176 if (context->timer_slack_nsec != (nsec_t) -1)
1177 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1179 r = EXIT_TIMERSLACK;
1183 if (context->utmp_id)
1184 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1186 if (context->user) {
1187 username = context->user;
1188 err = get_user_creds(&username, &uid, &gid, &home);
1194 if (is_terminal_input(context->std_input)) {
1195 err = chown_terminal(STDIN_FILENO, uid);
1202 if (cgroup_bondings && context->control_group_modify) {
1203 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1205 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1215 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1216 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1223 if (apply_permissions) {
1224 err = enforce_groups(context, username, gid);
1231 umask(context->umask);
1234 if (context->pam_name && username) {
1235 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1242 if (context->private_network) {
1243 if (unshare(CLONE_NEWNET) < 0) {
1252 if (strv_length(context->read_write_dirs) > 0 ||
1253 strv_length(context->read_only_dirs) > 0 ||
1254 strv_length(context->inaccessible_dirs) > 0 ||
1255 context->mount_flags != MS_SHARED ||
1256 context->private_tmp) {
1257 err = setup_namespace(context->read_write_dirs,
1258 context->read_only_dirs,
1259 context->inaccessible_dirs,
1260 context->private_tmp,
1261 context->mount_flags);
1269 if (context->root_directory)
1270 if (chroot(context->root_directory) < 0) {
1276 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1285 if (asprintf(&d, "%s/%s",
1286 context->root_directory ? context->root_directory : "",
1287 context->working_directory ? context->working_directory : "") < 0) {
1303 /* We repeat the fd closing here, to make sure that
1304 * nothing is leaked from the PAM modules */
1305 err = close_all_fds(fds, n_fds);
1307 err = shift_fds(fds, n_fds);
1309 err = flags_fds(fds, n_fds, context->non_blocking);
1315 if (apply_permissions) {
1317 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1318 if (!context->rlimit[i])
1321 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1328 if (context->capability_bounding_set_drop) {
1329 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1331 r = EXIT_CAPABILITIES;
1336 if (context->user) {
1337 err = enforce_user(context, uid);
1344 /* PR_GET_SECUREBITS is not privileged, while
1345 * PR_SET_SECUREBITS is. So to suppress
1346 * potential EPERMs we'll try not to call
1347 * PR_SET_SECUREBITS unless necessary. */
1348 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1349 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1351 r = EXIT_SECUREBITS;
1355 if (context->capabilities)
1356 if (cap_set_proc(context->capabilities) < 0) {
1358 r = EXIT_CAPABILITIES;
1363 if (!(our_env = new0(char*, 7))) {
1370 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1371 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1378 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1385 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1386 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1392 if (is_terminal_input(context->std_input) ||
1393 context->std_output == EXEC_OUTPUT_TTY ||
1394 context->std_error == EXEC_OUTPUT_TTY)
1395 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1403 if (!(final_env = strv_env_merge(
1407 context->environment,
1416 if (!(final_argv = replace_env_argv(argv, final_env))) {
1422 final_env = strv_env_clean(final_env);
1424 execve(command->path, final_argv, final_env);
1431 log_warning("Failed at step %s spawning %s: %s",
1432 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1433 command->path, strerror(-err));
1437 strv_free(final_env);
1439 strv_free(files_env);
1440 strv_free(final_argv);
1442 if (saved_stdin >= 0)
1443 close_nointr_nofail(saved_stdin);
1445 if (saved_stdout >= 0)
1446 close_nointr_nofail(saved_stdout);
1451 strv_free(files_env);
1453 /* We add the new process to the cgroup both in the child (so
1454 * that we can be sure that no user code is ever executed
1455 * outside of the cgroup) and in the parent (so that we can be
1456 * sure that when we kill the cgroup the process will be
1458 if (cgroup_bondings)
1459 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1461 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1463 exec_status_start(&command->exec_status, pid);
1469 strv_free(files_env);
1474 void exec_context_init(ExecContext *c) {
1478 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1479 c->cpu_sched_policy = SCHED_OTHER;
1480 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1481 c->syslog_level_prefix = true;
1482 c->mount_flags = MS_SHARED;
1483 c->kill_signal = SIGTERM;
1484 c->send_sigkill = true;
1485 c->control_group_persistent = -1;
1486 c->ignore_sigpipe = true;
1487 c->timer_slack_nsec = (nsec_t) -1;
1490 void exec_context_done(ExecContext *c) {
1495 strv_free(c->environment);
1496 c->environment = NULL;
1498 strv_free(c->environment_files);
1499 c->environment_files = NULL;
1501 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1503 c->rlimit[l] = NULL;
1506 free(c->working_directory);
1507 c->working_directory = NULL;
1508 free(c->root_directory);
1509 c->root_directory = NULL;
1514 free(c->tcpwrap_name);
1515 c->tcpwrap_name = NULL;
1517 free(c->syslog_identifier);
1518 c->syslog_identifier = NULL;
1526 strv_free(c->supplementary_groups);
1527 c->supplementary_groups = NULL;
1532 if (c->capabilities) {
1533 cap_free(c->capabilities);
1534 c->capabilities = NULL;
1537 strv_free(c->read_only_dirs);
1538 c->read_only_dirs = NULL;
1540 strv_free(c->read_write_dirs);
1541 c->read_write_dirs = NULL;
1543 strv_free(c->inaccessible_dirs);
1544 c->inaccessible_dirs = NULL;
1547 CPU_FREE(c->cpuset);
1553 void exec_command_done(ExecCommand *c) {
1563 void exec_command_done_array(ExecCommand *c, unsigned n) {
1566 for (i = 0; i < n; i++)
1567 exec_command_done(c+i);
1570 void exec_command_free_list(ExecCommand *c) {
1574 LIST_REMOVE(ExecCommand, command, c, i);
1575 exec_command_done(i);
1580 void exec_command_free_array(ExecCommand **c, unsigned n) {
1583 for (i = 0; i < n; i++) {
1584 exec_command_free_list(c[i]);
1589 int exec_context_load_environment(const ExecContext *c, char ***l) {
1590 char **i, **r = NULL;
1595 STRV_FOREACH(i, c->environment_files) {
1598 bool ignore = false;
1608 if (!path_is_absolute(fn)) {
1617 if ((k = load_env_file(fn, &p)) < 0) {
1631 m = strv_env_merge(2, r, p);
1647 static void strv_fprintf(FILE *f, char **l) {
1653 fprintf(f, " %s", *g);
1656 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1668 "%sWorkingDirectory: %s\n"
1669 "%sRootDirectory: %s\n"
1670 "%sNonBlocking: %s\n"
1671 "%sPrivateTmp: %s\n"
1672 "%sControlGroupModify: %s\n"
1673 "%sControlGroupPersistent: %s\n"
1674 "%sPrivateNetwork: %s\n",
1676 prefix, c->working_directory ? c->working_directory : "/",
1677 prefix, c->root_directory ? c->root_directory : "/",
1678 prefix, yes_no(c->non_blocking),
1679 prefix, yes_no(c->private_tmp),
1680 prefix, yes_no(c->control_group_modify),
1681 prefix, yes_no(c->control_group_persistent),
1682 prefix, yes_no(c->private_network));
1684 STRV_FOREACH(e, c->environment)
1685 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1687 STRV_FOREACH(e, c->environment_files)
1688 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1690 if (c->tcpwrap_name)
1692 "%sTCPWrapName: %s\n",
1693 prefix, c->tcpwrap_name);
1700 if (c->oom_score_adjust_set)
1702 "%sOOMScoreAdjust: %i\n",
1703 prefix, c->oom_score_adjust);
1705 for (i = 0; i < RLIM_NLIMITS; i++)
1707 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1711 "%sIOSchedulingClass: %s\n"
1712 "%sIOPriority: %i\n",
1713 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1714 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1716 if (c->cpu_sched_set)
1718 "%sCPUSchedulingPolicy: %s\n"
1719 "%sCPUSchedulingPriority: %i\n"
1720 "%sCPUSchedulingResetOnFork: %s\n",
1721 prefix, sched_policy_to_string(c->cpu_sched_policy),
1722 prefix, c->cpu_sched_priority,
1723 prefix, yes_no(c->cpu_sched_reset_on_fork));
1726 fprintf(f, "%sCPUAffinity:", prefix);
1727 for (i = 0; i < c->cpuset_ncpus; i++)
1728 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1729 fprintf(f, " %i", i);
1733 if (c->timer_slack_nsec != (nsec_t) -1)
1734 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1737 "%sStandardInput: %s\n"
1738 "%sStandardOutput: %s\n"
1739 "%sStandardError: %s\n",
1740 prefix, exec_input_to_string(c->std_input),
1741 prefix, exec_output_to_string(c->std_output),
1742 prefix, exec_output_to_string(c->std_error));
1748 "%sTTYVHangup: %s\n"
1749 "%sTTYVTDisallocate: %s\n",
1750 prefix, c->tty_path,
1751 prefix, yes_no(c->tty_reset),
1752 prefix, yes_no(c->tty_vhangup),
1753 prefix, yes_no(c->tty_vt_disallocate));
1755 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1756 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1757 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1758 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1760 "%sSyslogFacility: %s\n"
1761 "%sSyslogLevel: %s\n",
1762 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1763 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1765 if (c->capabilities) {
1767 if ((t = cap_to_text(c->capabilities, NULL))) {
1768 fprintf(f, "%sCapabilities: %s\n",
1775 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1777 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1778 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1779 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1780 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1781 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1782 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1784 if (c->capability_bounding_set_drop) {
1786 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1788 for (l = 0; l <= cap_last_cap(); l++)
1789 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1792 if ((t = cap_to_name(l))) {
1793 fprintf(f, " %s", t);
1802 fprintf(f, "%sUser: %s\n", prefix, c->user);
1804 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1806 if (strv_length(c->supplementary_groups) > 0) {
1807 fprintf(f, "%sSupplementaryGroups:", prefix);
1808 strv_fprintf(f, c->supplementary_groups);
1813 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1815 if (strv_length(c->read_write_dirs) > 0) {
1816 fprintf(f, "%sReadWriteDirs:", prefix);
1817 strv_fprintf(f, c->read_write_dirs);
1821 if (strv_length(c->read_only_dirs) > 0) {
1822 fprintf(f, "%sReadOnlyDirs:", prefix);
1823 strv_fprintf(f, c->read_only_dirs);
1827 if (strv_length(c->inaccessible_dirs) > 0) {
1828 fprintf(f, "%sInaccessibleDirs:", prefix);
1829 strv_fprintf(f, c->inaccessible_dirs);
1835 "%sKillSignal: SIG%s\n"
1836 "%sSendSIGKILL: %s\n"
1837 "%sIgnoreSIGPIPE: %s\n",
1838 prefix, kill_mode_to_string(c->kill_mode),
1839 prefix, signal_to_string(c->kill_signal),
1840 prefix, yes_no(c->send_sigkill),
1841 prefix, yes_no(c->ignore_sigpipe));
1845 "%sUtmpIdentifier: %s\n",
1846 prefix, c->utmp_id);
1849 void exec_status_start(ExecStatus *s, pid_t pid) {
1854 dual_timestamp_get(&s->start_timestamp);
1857 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1860 if (s->pid && s->pid != pid)
1864 dual_timestamp_get(&s->exit_timestamp);
1870 if (context->utmp_id)
1871 utmp_put_dead_process(context->utmp_id, pid, code, status);
1873 exec_context_tty_reset(context);
1877 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1878 char buf[FORMAT_TIMESTAMP_MAX];
1891 prefix, (unsigned long) s->pid);
1893 if (s->start_timestamp.realtime > 0)
1895 "%sStart Timestamp: %s\n",
1896 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1898 if (s->exit_timestamp.realtime > 0)
1900 "%sExit Timestamp: %s\n"
1902 "%sExit Status: %i\n",
1903 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1904 prefix, sigchld_code_to_string(s->code),
1908 char *exec_command_line(char **argv) {
1916 STRV_FOREACH(a, argv)
1919 if (!(n = new(char, k)))
1923 STRV_FOREACH(a, argv) {
1930 if (strpbrk(*a, WHITESPACE)) {
1941 /* FIXME: this doesn't really handle arguments that have
1942 * spaces and ticks in them */
1947 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1949 const char *prefix2;
1958 p2 = strappend(prefix, "\t");
1959 prefix2 = p2 ? p2 : prefix;
1961 cmd = exec_command_line(c->argv);
1964 "%sCommand Line: %s\n",
1965 prefix, cmd ? cmd : strerror(ENOMEM));
1969 exec_status_dump(&c->exec_status, f, prefix2);
1974 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
1980 LIST_FOREACH(command, c, c)
1981 exec_command_dump(c, f, prefix);
1984 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
1991 /* It's kind of important, that we keep the order here */
1992 LIST_FIND_TAIL(ExecCommand, command, *l, end);
1993 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
1998 int exec_command_set(ExecCommand *c, const char *path, ...) {
2006 l = strv_new_ap(path, ap);
2012 if (!(p = strdup(path))) {
2026 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2027 [EXEC_INPUT_NULL] = "null",
2028 [EXEC_INPUT_TTY] = "tty",
2029 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2030 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2031 [EXEC_INPUT_SOCKET] = "socket"
2034 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2036 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2037 [EXEC_OUTPUT_INHERIT] = "inherit",
2038 [EXEC_OUTPUT_NULL] = "null",
2039 [EXEC_OUTPUT_TTY] = "tty",
2040 [EXEC_OUTPUT_SYSLOG] = "syslog",
2041 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2042 [EXEC_OUTPUT_KMSG] = "kmsg",
2043 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2044 [EXEC_OUTPUT_JOURNAL] = "journal",
2045 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2046 [EXEC_OUTPUT_SOCKET] = "socket"
2049 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2051 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2052 [KILL_CONTROL_GROUP] = "control-group",
2053 [KILL_PROCESS] = "process",
2054 [KILL_NONE] = "none"
2057 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2059 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2060 [KILL_MAIN] = "main",
2061 [KILL_CONTROL] = "control",
2065 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);