1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
43 #include <security/pam_appl.h>
49 #include "capability.h"
53 #include "securebits.h"
55 #include "namespace.h"
57 #include "exit-status.h"
59 #include "utmp-wtmp.h"
61 #include "loopback-setup.h"
62 #include "path-util.h"
64 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
66 /* This assumes there is a 'tty' group */
69 static int shift_fds(int fds[], unsigned n_fds) {
70 int start, restart_from;
75 /* Modifies the fds array! (sorts it) */
85 for (i = start; i < (int) n_fds; i++) {
88 /* Already at right index? */
92 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
95 close_nointr_nofail(fds[i]);
98 /* Hmm, the fd we wanted isn't free? Then
99 * let's remember that and try again from here*/
100 if (nfd != i+3 && restart_from < 0)
104 if (restart_from < 0)
107 start = restart_from;
113 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
122 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
124 for (i = 0; i < n_fds; i++) {
126 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
129 /* We unconditionally drop FD_CLOEXEC from the fds,
130 * since after all we want to pass these fds to our
133 if ((r = fd_cloexec(fds[i], false)) < 0)
140 static const char *tty_path(const ExecContext *context) {
143 if (context->tty_path)
144 return context->tty_path;
146 return "/dev/console";
149 void exec_context_tty_reset(const ExecContext *context) {
152 if (context->tty_vhangup)
153 terminal_vhangup(tty_path(context));
155 if (context->tty_reset)
156 reset_terminal(tty_path(context));
158 if (context->tty_vt_disallocate && context->tty_path)
159 vt_disallocate(context->tty_path);
162 static int open_null_as(int flags, int nfd) {
167 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
171 r = dup2(fd, nfd) < 0 ? -errno : nfd;
172 close_nointr_nofail(fd);
179 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
181 union sockaddr_union sa;
184 assert(output < _EXEC_OUTPUT_MAX);
188 fd = socket(AF_UNIX, SOCK_STREAM, 0);
193 sa.un.sun_family = AF_UNIX;
194 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
196 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
198 close_nointr_nofail(fd);
202 if (shutdown(fd, SHUT_RD) < 0) {
203 close_nointr_nofail(fd);
215 context->syslog_identifier ? context->syslog_identifier : ident,
217 context->syslog_priority,
218 !!context->syslog_level_prefix,
219 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
220 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
221 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
224 r = dup2(fd, nfd) < 0 ? -errno : nfd;
225 close_nointr_nofail(fd);
231 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
237 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
241 r = dup2(fd, nfd) < 0 ? -errno : nfd;
242 close_nointr_nofail(fd);
249 static bool is_terminal_input(ExecInput i) {
251 i == EXEC_INPUT_TTY ||
252 i == EXEC_INPUT_TTY_FORCE ||
253 i == EXEC_INPUT_TTY_FAIL;
256 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
258 if (is_terminal_input(std_input) && !apply_tty_stdin)
259 return EXEC_INPUT_NULL;
261 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
262 return EXEC_INPUT_NULL;
267 static int fixup_output(ExecOutput std_output, int socket_fd) {
269 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
270 return EXEC_OUTPUT_INHERIT;
275 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
280 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
284 case EXEC_INPUT_NULL:
285 return open_null_as(O_RDONLY, STDIN_FILENO);
288 case EXEC_INPUT_TTY_FORCE:
289 case EXEC_INPUT_TTY_FAIL: {
292 if ((fd = acquire_terminal(
294 i == EXEC_INPUT_TTY_FAIL,
295 i == EXEC_INPUT_TTY_FORCE,
300 if (fd != STDIN_FILENO) {
301 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
302 close_nointr_nofail(fd);
309 case EXEC_INPUT_SOCKET:
310 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
313 assert_not_reached("Unknown input type");
317 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
324 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
325 o = fixup_output(context->std_output, socket_fd);
327 /* This expects the input is already set up */
331 case EXEC_OUTPUT_INHERIT:
333 /* If input got downgraded, inherit the original value */
334 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
335 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
337 /* If the input is connected to anything that's not a /dev/null, inherit that... */
338 if (i != EXEC_INPUT_NULL)
339 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
341 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
343 return STDOUT_FILENO;
345 /* We need to open /dev/null here anew, to get the
346 * right access mode. So we fall through */
348 case EXEC_OUTPUT_NULL:
349 return open_null_as(O_WRONLY, STDOUT_FILENO);
351 case EXEC_OUTPUT_TTY:
352 if (is_terminal_input(i))
353 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
355 /* We don't reset the terminal if this is just about output */
356 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
358 case EXEC_OUTPUT_SYSLOG:
359 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
360 case EXEC_OUTPUT_KMSG:
361 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
362 case EXEC_OUTPUT_JOURNAL:
363 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
364 return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
366 case EXEC_OUTPUT_SOCKET:
367 assert(socket_fd >= 0);
368 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
371 assert_not_reached("Unknown output type");
375 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
382 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
383 o = fixup_output(context->std_output, socket_fd);
384 e = fixup_output(context->std_error, socket_fd);
386 /* This expects the input and output are already set up */
388 /* Don't change the stderr file descriptor if we inherit all
389 * the way and are not on a tty */
390 if (e == EXEC_OUTPUT_INHERIT &&
391 o == EXEC_OUTPUT_INHERIT &&
392 i == EXEC_INPUT_NULL &&
393 !is_terminal_input(context->std_input) &&
395 return STDERR_FILENO;
397 /* Duplicate from stdout if possible */
398 if (e == o || e == EXEC_OUTPUT_INHERIT)
399 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
403 case EXEC_OUTPUT_NULL:
404 return open_null_as(O_WRONLY, STDERR_FILENO);
406 case EXEC_OUTPUT_TTY:
407 if (is_terminal_input(i))
408 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
410 /* We don't reset the terminal if this is just about output */
411 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
413 case EXEC_OUTPUT_SYSLOG:
414 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
415 case EXEC_OUTPUT_KMSG:
416 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
417 case EXEC_OUTPUT_JOURNAL:
418 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
419 return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
421 case EXEC_OUTPUT_SOCKET:
422 assert(socket_fd >= 0);
423 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
426 assert_not_reached("Unknown error type");
430 static int chown_terminal(int fd, uid_t uid) {
435 /* This might fail. What matters are the results. */
436 (void) fchown(fd, uid, -1);
437 (void) fchmod(fd, TTY_MODE);
439 if (fstat(fd, &st) < 0)
442 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
448 static int setup_confirm_stdio(int *_saved_stdin,
449 int *_saved_stdout) {
450 int fd = -1, saved_stdin, saved_stdout = -1, r;
452 assert(_saved_stdin);
453 assert(_saved_stdout);
455 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
459 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
460 if (saved_stdout < 0) {
465 fd = acquire_terminal(
470 DEFAULT_CONFIRM_USEC);
476 r = chown_terminal(fd, getuid());
480 if (dup2(fd, STDIN_FILENO) < 0) {
485 if (dup2(fd, STDOUT_FILENO) < 0) {
491 close_nointr_nofail(fd);
493 *_saved_stdin = saved_stdin;
494 *_saved_stdout = saved_stdout;
499 if (saved_stdout >= 0)
500 close_nointr_nofail(saved_stdout);
502 if (saved_stdin >= 0)
503 close_nointr_nofail(saved_stdin);
506 close_nointr_nofail(fd);
511 static int write_confirm_message(const char *format, ...) {
517 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
521 va_start(ap, format);
522 vdprintf(fd, format, ap);
525 close_nointr_nofail(fd);
530 static int restore_confirm_stdio(int *saved_stdin,
536 assert(saved_stdout);
540 if (*saved_stdin >= 0)
541 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
544 if (*saved_stdout >= 0)
545 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
548 if (*saved_stdin >= 0)
549 close_nointr_nofail(*saved_stdin);
551 if (*saved_stdout >= 0)
552 close_nointr_nofail(*saved_stdout);
557 static int ask_for_confirmation(char *response, char **argv) {
558 int saved_stdout = -1, saved_stdin = -1, r;
561 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
565 line = exec_command_line(argv);
569 r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
572 restore_confirm_stdio(&saved_stdin, &saved_stdout);
577 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
578 bool keep_groups = false;
583 /* Lookup and set GID and supplementary group list. Here too
584 * we avoid NSS lookups for gid=0. */
586 if (context->group || username) {
588 if (context->group) {
589 const char *g = context->group;
591 if ((r = get_group_creds(&g, &gid)) < 0)
595 /* First step, initialize groups from /etc/groups */
596 if (username && gid != 0) {
597 if (initgroups(username, gid) < 0)
603 /* Second step, set our gids */
604 if (setresgid(gid, gid, gid) < 0)
608 if (context->supplementary_groups) {
613 /* Final step, initialize any manually set supplementary groups */
614 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
616 if (!(gids = new(gid_t, ngroups_max)))
620 if ((k = getgroups(ngroups_max, gids)) < 0) {
627 STRV_FOREACH(i, context->supplementary_groups) {
630 if (k >= ngroups_max) {
636 r = get_group_creds(&g, gids+k);
645 if (setgroups(k, gids) < 0) {
656 static int enforce_user(const ExecContext *context, uid_t uid) {
660 /* Sets (but doesn't lookup) the uid and make sure we keep the
661 * capabilities while doing so. */
663 if (context->capabilities) {
665 static const cap_value_t bits[] = {
666 CAP_SETUID, /* Necessary so that we can run setresuid() below */
667 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
670 /* First step: If we need to keep capabilities but
671 * drop privileges we need to make sure we keep our
672 * caps, whiel we drop privileges. */
674 int sb = context->secure_bits|SECURE_KEEP_CAPS;
676 if (prctl(PR_GET_SECUREBITS) != sb)
677 if (prctl(PR_SET_SECUREBITS, sb) < 0)
681 /* Second step: set the capabilities. This will reduce
682 * the capabilities to the minimum we need. */
684 if (!(d = cap_dup(context->capabilities)))
687 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
688 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
694 if (cap_set_proc(d) < 0) {
703 /* Third step: actually set the uids */
704 if (setresuid(uid, uid, uid) < 0)
707 /* At this point we should have all necessary capabilities but
708 are otherwise a normal user. However, the caps might got
709 corrupted due to the setresuid() so we need clean them up
710 later. This is done outside of this call. */
717 static int null_conv(
719 const struct pam_message **msg,
720 struct pam_response **resp,
723 /* We don't support conversations */
728 static int setup_pam(
734 int fds[], unsigned n_fds) {
736 static const struct pam_conv conv = {
741 pam_handle_t *handle = NULL;
743 int pam_code = PAM_SUCCESS;
746 bool close_session = false;
747 pid_t pam_pid = 0, parent_pid;
753 /* We set up PAM in the parent process, then fork. The child
754 * will then stay around until killed via PR_GET_PDEATHSIG or
755 * systemd via the cgroup logic. It will then remove the PAM
756 * session again. The parent process will exec() the actual
757 * daemon. We do things this way to ensure that the main PID
758 * of the daemon is the one we initially fork()ed. */
760 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
766 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
769 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
772 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
775 close_session = true;
777 if ((!(e = pam_getenvlist(handle)))) {
778 pam_code = PAM_BUF_ERR;
782 /* Block SIGTERM, so that we know that it won't get lost in
784 if (sigemptyset(&ss) < 0 ||
785 sigaddset(&ss, SIGTERM) < 0 ||
786 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
789 parent_pid = getpid();
791 if ((pam_pid = fork()) < 0)
798 /* The child's job is to reset the PAM session on
801 /* This string must fit in 10 chars (i.e. the length
802 * of "/sbin/init"), to look pretty in /bin/ps */
803 rename_process("(sd-pam)");
805 /* Make sure we don't keep open the passed fds in this
806 child. We assume that otherwise only those fds are
807 open here that have been opened by PAM. */
808 close_many(fds, n_fds);
810 /* Drop privileges - we don't need any to pam_close_session
811 * and this will make PR_SET_PDEATHSIG work in most cases.
812 * If this fails, ignore the error - but expect sd-pam threads
813 * to fail to exit normally */
814 if (setresuid(uid, uid, uid) < 0)
815 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
817 /* Wait until our parent died. This will only work if
818 * the above setresuid() succeeds, otherwise the kernel
819 * will not allow unprivileged parents kill their privileged
820 * children this way. We rely on the control groups kill logic
821 * to do the rest for us. */
822 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
825 /* Check if our parent process might already have
827 if (getppid() == parent_pid) {
829 if (sigwait(&ss, &sig) < 0) {
836 assert(sig == SIGTERM);
841 /* If our parent died we'll end the session */
842 if (getppid() != parent_pid)
843 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
849 pam_end(handle, pam_code | PAM_DATA_SILENT);
853 /* If the child was forked off successfully it will do all the
854 * cleanups, so forget about the handle here. */
857 /* Unblock SIGTERM again in the parent */
858 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
861 /* We close the log explicitly here, since the PAM modules
862 * might have opened it, but we don't want this fd around. */
871 if (pam_code != PAM_SUCCESS)
872 err = -EPERM; /* PAM errors do not map to errno */
878 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
880 pam_end(handle, pam_code | PAM_DATA_SILENT);
888 kill(pam_pid, SIGTERM);
889 kill(pam_pid, SIGCONT);
896 static void rename_process_from_path(const char *path) {
897 char process_name[11];
901 /* This resulting string must fit in 10 chars (i.e. the length
902 * of "/sbin/init") to look pretty in /bin/ps */
904 p = path_get_file_name(path);
906 rename_process("(...)");
912 /* The end of the process name is usually more
913 * interesting, since the first bit might just be
919 process_name[0] = '(';
920 memcpy(process_name+1, p, l);
921 process_name[1+l] = ')';
922 process_name[1+l+1] = 0;
924 rename_process(process_name);
927 int exec_spawn(ExecCommand *command,
929 const ExecContext *context,
930 int fds[], unsigned n_fds,
932 bool apply_permissions,
934 bool apply_tty_stdin,
936 CGroupBonding *cgroup_bondings,
937 CGroupAttribute *cgroup_attributes,
938 const char *cgroup_suffix,
947 char **files_env = NULL;
952 assert(fds || n_fds <= 0);
954 if (context->std_input == EXEC_INPUT_SOCKET ||
955 context->std_output == EXEC_OUTPUT_SOCKET ||
956 context->std_error == EXEC_OUTPUT_SOCKET) {
968 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
969 log_error("Failed to load environment files: %s", strerror(-r));
974 argv = command->argv;
976 line = exec_command_line(argv);
982 log_debug("About to execute: %s", line);
985 r = cgroup_bonding_realize_list(cgroup_bondings);
989 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
991 if ((pid = fork()) < 0) {
999 const char *username = NULL, *home = NULL;
1000 uid_t uid = (uid_t) -1;
1001 gid_t gid = (gid_t) -1;
1002 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1004 bool set_access = false;
1008 rename_process_from_path(command->path);
1010 /* We reset exactly these signals, since they are the
1011 * only ones we set to SIG_IGN in the main daemon. All
1012 * others we leave untouched because we set them to
1013 * SIG_DFL or a valid handler initially, both of which
1014 * will be demoted to SIG_DFL. */
1015 default_signals(SIGNALS_CRASH_HANDLER,
1016 SIGNALS_IGNORE, -1);
1018 if (context->ignore_sigpipe)
1019 ignore_signals(SIGPIPE, -1);
1021 assert_se(sigemptyset(&ss) == 0);
1022 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1024 r = EXIT_SIGNAL_MASK;
1029 if (idle_pipe[1] >= 0)
1030 close_nointr_nofail(idle_pipe[1]);
1031 if (idle_pipe[0] >= 0) {
1032 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1033 close_nointr_nofail(idle_pipe[0]);
1037 /* Close sockets very early to make sure we don't
1038 * block init reexecution because it cannot bind its
1041 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1042 socket_fd >= 0 ? 1 : n_fds);
1048 if (!context->same_pgrp)
1055 if (context->tcpwrap_name) {
1057 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1063 for (i = 0; i < (int) n_fds; i++) {
1064 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1072 exec_context_tty_reset(context);
1074 if (confirm_spawn) {
1077 err = ask_for_confirmation(&response, argv);
1078 if (err == -ETIMEDOUT)
1079 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1081 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1082 else if (response == 's') {
1083 write_confirm_message("Skipping execution.\n");
1087 } else if (response == 'n') {
1088 write_confirm_message("Failing execution.\n");
1094 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1095 * must sure to drop O_NONBLOCK */
1097 fd_nonblock(socket_fd, false);
1099 err = setup_input(context, socket_fd, apply_tty_stdin);
1105 err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1111 err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
1117 if (cgroup_bondings) {
1118 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1125 if (context->oom_score_adjust_set) {
1128 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1131 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1133 r = EXIT_OOM_ADJUST;
1138 if (context->nice_set)
1139 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1145 if (context->cpu_sched_set) {
1146 struct sched_param param;
1149 param.sched_priority = context->cpu_sched_priority;
1151 if (sched_setscheduler(0, context->cpu_sched_policy |
1152 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1154 r = EXIT_SETSCHEDULER;
1159 if (context->cpuset)
1160 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1162 r = EXIT_CPUAFFINITY;
1166 if (context->ioprio_set)
1167 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1173 if (context->timer_slack_nsec != (nsec_t) -1)
1174 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1176 r = EXIT_TIMERSLACK;
1180 if (context->utmp_id)
1181 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1183 if (context->user) {
1184 username = context->user;
1185 err = get_user_creds(&username, &uid, &gid, &home);
1191 if (is_terminal_input(context->std_input)) {
1192 err = chown_terminal(STDIN_FILENO, uid);
1199 if (cgroup_bondings && context->control_group_modify) {
1200 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1202 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1212 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1213 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1220 if (apply_permissions) {
1221 err = enforce_groups(context, username, gid);
1228 umask(context->umask);
1231 if (context->pam_name && username) {
1232 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1239 if (context->private_network) {
1240 if (unshare(CLONE_NEWNET) < 0) {
1249 if (strv_length(context->read_write_dirs) > 0 ||
1250 strv_length(context->read_only_dirs) > 0 ||
1251 strv_length(context->inaccessible_dirs) > 0 ||
1252 context->mount_flags != MS_SHARED ||
1253 context->private_tmp) {
1254 err = setup_namespace(context->read_write_dirs,
1255 context->read_only_dirs,
1256 context->inaccessible_dirs,
1257 context->private_tmp,
1258 context->mount_flags);
1266 if (context->root_directory)
1267 if (chroot(context->root_directory) < 0) {
1273 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1282 if (asprintf(&d, "%s/%s",
1283 context->root_directory ? context->root_directory : "",
1284 context->working_directory ? context->working_directory : "") < 0) {
1300 /* We repeat the fd closing here, to make sure that
1301 * nothing is leaked from the PAM modules */
1302 err = close_all_fds(fds, n_fds);
1304 err = shift_fds(fds, n_fds);
1306 err = flags_fds(fds, n_fds, context->non_blocking);
1312 if (apply_permissions) {
1314 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1315 if (!context->rlimit[i])
1318 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1325 if (context->capability_bounding_set_drop) {
1326 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1328 r = EXIT_CAPABILITIES;
1333 if (context->user) {
1334 err = enforce_user(context, uid);
1341 /* PR_GET_SECUREBITS is not privileged, while
1342 * PR_SET_SECUREBITS is. So to suppress
1343 * potential EPERMs we'll try not to call
1344 * PR_SET_SECUREBITS unless necessary. */
1345 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1346 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1348 r = EXIT_SECUREBITS;
1352 if (context->capabilities)
1353 if (cap_set_proc(context->capabilities) < 0) {
1355 r = EXIT_CAPABILITIES;
1360 if (!(our_env = new0(char*, 7))) {
1367 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1368 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1375 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1382 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1383 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1389 if (is_terminal_input(context->std_input) ||
1390 context->std_output == EXEC_OUTPUT_TTY ||
1391 context->std_error == EXEC_OUTPUT_TTY)
1392 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1400 if (!(final_env = strv_env_merge(
1404 context->environment,
1413 if (!(final_argv = replace_env_argv(argv, final_env))) {
1419 final_env = strv_env_clean(final_env);
1421 execve(command->path, final_argv, final_env);
1428 log_warning("Failed at step %s spawning %s: %s",
1429 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1430 command->path, strerror(-err));
1434 strv_free(final_env);
1436 strv_free(files_env);
1437 strv_free(final_argv);
1442 strv_free(files_env);
1444 /* We add the new process to the cgroup both in the child (so
1445 * that we can be sure that no user code is ever executed
1446 * outside of the cgroup) and in the parent (so that we can be
1447 * sure that when we kill the cgroup the process will be
1449 if (cgroup_bondings)
1450 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1452 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1454 exec_status_start(&command->exec_status, pid);
1460 strv_free(files_env);
1465 void exec_context_init(ExecContext *c) {
1469 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1470 c->cpu_sched_policy = SCHED_OTHER;
1471 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1472 c->syslog_level_prefix = true;
1473 c->mount_flags = MS_SHARED;
1474 c->kill_signal = SIGTERM;
1475 c->send_sigkill = true;
1476 c->control_group_persistent = -1;
1477 c->ignore_sigpipe = true;
1478 c->timer_slack_nsec = (nsec_t) -1;
1481 void exec_context_done(ExecContext *c) {
1486 strv_free(c->environment);
1487 c->environment = NULL;
1489 strv_free(c->environment_files);
1490 c->environment_files = NULL;
1492 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1494 c->rlimit[l] = NULL;
1497 free(c->working_directory);
1498 c->working_directory = NULL;
1499 free(c->root_directory);
1500 c->root_directory = NULL;
1505 free(c->tcpwrap_name);
1506 c->tcpwrap_name = NULL;
1508 free(c->syslog_identifier);
1509 c->syslog_identifier = NULL;
1517 strv_free(c->supplementary_groups);
1518 c->supplementary_groups = NULL;
1523 if (c->capabilities) {
1524 cap_free(c->capabilities);
1525 c->capabilities = NULL;
1528 strv_free(c->read_only_dirs);
1529 c->read_only_dirs = NULL;
1531 strv_free(c->read_write_dirs);
1532 c->read_write_dirs = NULL;
1534 strv_free(c->inaccessible_dirs);
1535 c->inaccessible_dirs = NULL;
1538 CPU_FREE(c->cpuset);
1544 void exec_command_done(ExecCommand *c) {
1554 void exec_command_done_array(ExecCommand *c, unsigned n) {
1557 for (i = 0; i < n; i++)
1558 exec_command_done(c+i);
1561 void exec_command_free_list(ExecCommand *c) {
1565 LIST_REMOVE(ExecCommand, command, c, i);
1566 exec_command_done(i);
1571 void exec_command_free_array(ExecCommand **c, unsigned n) {
1574 for (i = 0; i < n; i++) {
1575 exec_command_free_list(c[i]);
1580 int exec_context_load_environment(const ExecContext *c, char ***l) {
1581 char **i, **r = NULL;
1586 STRV_FOREACH(i, c->environment_files) {
1589 bool ignore = false;
1599 if (!path_is_absolute(fn)) {
1608 if ((k = load_env_file(fn, &p)) < 0) {
1622 m = strv_env_merge(2, r, p);
1638 static void strv_fprintf(FILE *f, char **l) {
1644 fprintf(f, " %s", *g);
1647 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1659 "%sWorkingDirectory: %s\n"
1660 "%sRootDirectory: %s\n"
1661 "%sNonBlocking: %s\n"
1662 "%sPrivateTmp: %s\n"
1663 "%sControlGroupModify: %s\n"
1664 "%sControlGroupPersistent: %s\n"
1665 "%sPrivateNetwork: %s\n",
1667 prefix, c->working_directory ? c->working_directory : "/",
1668 prefix, c->root_directory ? c->root_directory : "/",
1669 prefix, yes_no(c->non_blocking),
1670 prefix, yes_no(c->private_tmp),
1671 prefix, yes_no(c->control_group_modify),
1672 prefix, yes_no(c->control_group_persistent),
1673 prefix, yes_no(c->private_network));
1675 STRV_FOREACH(e, c->environment)
1676 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1678 STRV_FOREACH(e, c->environment_files)
1679 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1681 if (c->tcpwrap_name)
1683 "%sTCPWrapName: %s\n",
1684 prefix, c->tcpwrap_name);
1691 if (c->oom_score_adjust_set)
1693 "%sOOMScoreAdjust: %i\n",
1694 prefix, c->oom_score_adjust);
1696 for (i = 0; i < RLIM_NLIMITS; i++)
1698 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1702 "%sIOSchedulingClass: %s\n"
1703 "%sIOPriority: %i\n",
1704 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1705 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1707 if (c->cpu_sched_set)
1709 "%sCPUSchedulingPolicy: %s\n"
1710 "%sCPUSchedulingPriority: %i\n"
1711 "%sCPUSchedulingResetOnFork: %s\n",
1712 prefix, sched_policy_to_string(c->cpu_sched_policy),
1713 prefix, c->cpu_sched_priority,
1714 prefix, yes_no(c->cpu_sched_reset_on_fork));
1717 fprintf(f, "%sCPUAffinity:", prefix);
1718 for (i = 0; i < c->cpuset_ncpus; i++)
1719 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1720 fprintf(f, " %i", i);
1724 if (c->timer_slack_nsec != (nsec_t) -1)
1725 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
1728 "%sStandardInput: %s\n"
1729 "%sStandardOutput: %s\n"
1730 "%sStandardError: %s\n",
1731 prefix, exec_input_to_string(c->std_input),
1732 prefix, exec_output_to_string(c->std_output),
1733 prefix, exec_output_to_string(c->std_error));
1739 "%sTTYVHangup: %s\n"
1740 "%sTTYVTDisallocate: %s\n",
1741 prefix, c->tty_path,
1742 prefix, yes_no(c->tty_reset),
1743 prefix, yes_no(c->tty_vhangup),
1744 prefix, yes_no(c->tty_vt_disallocate));
1746 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1747 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1748 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1749 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1751 "%sSyslogFacility: %s\n"
1752 "%sSyslogLevel: %s\n",
1753 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1754 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1756 if (c->capabilities) {
1758 if ((t = cap_to_text(c->capabilities, NULL))) {
1759 fprintf(f, "%sCapabilities: %s\n",
1766 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1768 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1769 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1770 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1771 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1772 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1773 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1775 if (c->capability_bounding_set_drop) {
1777 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1779 for (l = 0; l <= cap_last_cap(); l++)
1780 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1783 if ((t = cap_to_name(l))) {
1784 fprintf(f, " %s", t);
1793 fprintf(f, "%sUser: %s\n", prefix, c->user);
1795 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1797 if (strv_length(c->supplementary_groups) > 0) {
1798 fprintf(f, "%sSupplementaryGroups:", prefix);
1799 strv_fprintf(f, c->supplementary_groups);
1804 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1806 if (strv_length(c->read_write_dirs) > 0) {
1807 fprintf(f, "%sReadWriteDirs:", prefix);
1808 strv_fprintf(f, c->read_write_dirs);
1812 if (strv_length(c->read_only_dirs) > 0) {
1813 fprintf(f, "%sReadOnlyDirs:", prefix);
1814 strv_fprintf(f, c->read_only_dirs);
1818 if (strv_length(c->inaccessible_dirs) > 0) {
1819 fprintf(f, "%sInaccessibleDirs:", prefix);
1820 strv_fprintf(f, c->inaccessible_dirs);
1826 "%sKillSignal: SIG%s\n"
1827 "%sSendSIGKILL: %s\n"
1828 "%sIgnoreSIGPIPE: %s\n",
1829 prefix, kill_mode_to_string(c->kill_mode),
1830 prefix, signal_to_string(c->kill_signal),
1831 prefix, yes_no(c->send_sigkill),
1832 prefix, yes_no(c->ignore_sigpipe));
1836 "%sUtmpIdentifier: %s\n",
1837 prefix, c->utmp_id);
1840 void exec_status_start(ExecStatus *s, pid_t pid) {
1845 dual_timestamp_get(&s->start_timestamp);
1848 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1851 if (s->pid && s->pid != pid)
1855 dual_timestamp_get(&s->exit_timestamp);
1861 if (context->utmp_id)
1862 utmp_put_dead_process(context->utmp_id, pid, code, status);
1864 exec_context_tty_reset(context);
1868 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1869 char buf[FORMAT_TIMESTAMP_MAX];
1882 prefix, (unsigned long) s->pid);
1884 if (s->start_timestamp.realtime > 0)
1886 "%sStart Timestamp: %s\n",
1887 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1889 if (s->exit_timestamp.realtime > 0)
1891 "%sExit Timestamp: %s\n"
1893 "%sExit Status: %i\n",
1894 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1895 prefix, sigchld_code_to_string(s->code),
1899 char *exec_command_line(char **argv) {
1907 STRV_FOREACH(a, argv)
1910 if (!(n = new(char, k)))
1914 STRV_FOREACH(a, argv) {
1921 if (strpbrk(*a, WHITESPACE)) {
1932 /* FIXME: this doesn't really handle arguments that have
1933 * spaces and ticks in them */
1938 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1940 const char *prefix2;
1949 p2 = strappend(prefix, "\t");
1950 prefix2 = p2 ? p2 : prefix;
1952 cmd = exec_command_line(c->argv);
1955 "%sCommand Line: %s\n",
1956 prefix, cmd ? cmd : strerror(ENOMEM));
1960 exec_status_dump(&c->exec_status, f, prefix2);
1965 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
1971 LIST_FOREACH(command, c, c)
1972 exec_command_dump(c, f, prefix);
1975 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
1982 /* It's kind of important, that we keep the order here */
1983 LIST_FIND_TAIL(ExecCommand, command, *l, end);
1984 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
1989 int exec_command_set(ExecCommand *c, const char *path, ...) {
1997 l = strv_new_ap(path, ap);
2003 if (!(p = strdup(path))) {
2017 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2018 [EXEC_INPUT_NULL] = "null",
2019 [EXEC_INPUT_TTY] = "tty",
2020 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2021 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2022 [EXEC_INPUT_SOCKET] = "socket"
2025 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2027 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2028 [EXEC_OUTPUT_INHERIT] = "inherit",
2029 [EXEC_OUTPUT_NULL] = "null",
2030 [EXEC_OUTPUT_TTY] = "tty",
2031 [EXEC_OUTPUT_SYSLOG] = "syslog",
2032 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2033 [EXEC_OUTPUT_KMSG] = "kmsg",
2034 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2035 [EXEC_OUTPUT_JOURNAL] = "journal",
2036 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2037 [EXEC_OUTPUT_SOCKET] = "socket"
2040 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2042 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2043 [KILL_CONTROL_GROUP] = "control-group",
2044 [KILL_PROCESS] = "process",
2045 [KILL_NONE] = "none"
2048 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2050 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2051 [KILL_MAIN] = "main",
2052 [KILL_CONTROL] = "control",
2056 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);