1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <security/pam_appl.h>
48 #include "capability.h"
52 #include "securebits.h"
54 #include "namespace.h"
56 #include "exit-status.h"
58 #include "utmp-wtmp.h"
60 #include "loopback-setup.h"
62 /* This assumes there is a 'tty' group */
65 static int shift_fds(int fds[], unsigned n_fds) {
66 int start, restart_from;
71 /* Modifies the fds array! (sorts it) */
81 for (i = start; i < (int) n_fds; i++) {
84 /* Already at right index? */
88 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
91 close_nointr_nofail(fds[i]);
94 /* Hmm, the fd we wanted isn't free? Then
95 * let's remember that and try again from here*/
96 if (nfd != i+3 && restart_from < 0)
100 if (restart_from < 0)
103 start = restart_from;
109 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
118 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
120 for (i = 0; i < n_fds; i++) {
122 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
125 /* We unconditionally drop FD_CLOEXEC from the fds,
126 * since after all we want to pass these fds to our
129 if ((r = fd_cloexec(fds[i], false)) < 0)
136 static const char *tty_path(const ExecContext *context) {
139 if (context->tty_path)
140 return context->tty_path;
142 return "/dev/console";
145 void exec_context_tty_reset(const ExecContext *context) {
148 if (context->tty_vhangup)
149 terminal_vhangup(tty_path(context));
151 if (context->tty_reset)
152 reset_terminal(tty_path(context));
154 if (context->tty_vt_disallocate && context->tty_path)
155 vt_disallocate(context->tty_path);
158 static int open_null_as(int flags, int nfd) {
163 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
167 r = dup2(fd, nfd) < 0 ? -errno : nfd;
168 close_nointr_nofail(fd);
175 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
177 union sockaddr_union sa;
180 assert(output < _EXEC_OUTPUT_MAX);
184 fd = socket(AF_UNIX, SOCK_STREAM, 0);
189 sa.un.sun_family = AF_UNIX;
190 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
192 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
194 close_nointr_nofail(fd);
198 if (shutdown(fd, SHUT_RD) < 0) {
199 close_nointr_nofail(fd);
210 context->syslog_identifier ? context->syslog_identifier : ident,
211 context->syslog_priority,
212 !!context->syslog_level_prefix,
213 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
214 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
215 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
218 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219 close_nointr_nofail(fd);
225 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
231 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
235 r = dup2(fd, nfd) < 0 ? -errno : nfd;
236 close_nointr_nofail(fd);
243 static bool is_terminal_input(ExecInput i) {
245 i == EXEC_INPUT_TTY ||
246 i == EXEC_INPUT_TTY_FORCE ||
247 i == EXEC_INPUT_TTY_FAIL;
250 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
252 if (is_terminal_input(std_input) && !apply_tty_stdin)
253 return EXEC_INPUT_NULL;
255 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
256 return EXEC_INPUT_NULL;
261 static int fixup_output(ExecOutput std_output, int socket_fd) {
263 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
264 return EXEC_OUTPUT_INHERIT;
269 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
274 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
278 case EXEC_INPUT_NULL:
279 return open_null_as(O_RDONLY, STDIN_FILENO);
282 case EXEC_INPUT_TTY_FORCE:
283 case EXEC_INPUT_TTY_FAIL: {
286 if ((fd = acquire_terminal(
288 i == EXEC_INPUT_TTY_FAIL,
289 i == EXEC_INPUT_TTY_FORCE,
293 if (fd != STDIN_FILENO) {
294 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
295 close_nointr_nofail(fd);
302 case EXEC_INPUT_SOCKET:
303 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306 assert_not_reached("Unknown input type");
310 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
317 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
318 o = fixup_output(context->std_output, socket_fd);
320 /* This expects the input is already set up */
324 case EXEC_OUTPUT_INHERIT:
326 /* If input got downgraded, inherit the original value */
327 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
328 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
330 /* If the input is connected to anything that's not a /dev/null, inherit that... */
331 if (i != EXEC_INPUT_NULL)
332 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
334 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
336 return STDOUT_FILENO;
338 /* We need to open /dev/null here anew, to get the
339 * right access mode. So we fall through */
341 case EXEC_OUTPUT_NULL:
342 return open_null_as(O_WRONLY, STDOUT_FILENO);
344 case EXEC_OUTPUT_TTY:
345 if (is_terminal_input(i))
346 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
348 /* We don't reset the terminal if this is just about output */
349 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
351 case EXEC_OUTPUT_SYSLOG:
352 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
353 case EXEC_OUTPUT_KMSG:
354 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
355 case EXEC_OUTPUT_JOURNAL:
356 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
357 return connect_logger_as(context, o, ident, STDOUT_FILENO);
359 case EXEC_OUTPUT_SOCKET:
360 assert(socket_fd >= 0);
361 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
364 assert_not_reached("Unknown output type");
368 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
375 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
376 o = fixup_output(context->std_output, socket_fd);
377 e = fixup_output(context->std_error, socket_fd);
379 /* This expects the input and output are already set up */
381 /* Don't change the stderr file descriptor if we inherit all
382 * the way and are not on a tty */
383 if (e == EXEC_OUTPUT_INHERIT &&
384 o == EXEC_OUTPUT_INHERIT &&
385 i == EXEC_INPUT_NULL &&
386 !is_terminal_input(context->std_input) &&
388 return STDERR_FILENO;
390 /* Duplicate from stdout if possible */
391 if (e == o || e == EXEC_OUTPUT_INHERIT)
392 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
396 case EXEC_OUTPUT_NULL:
397 return open_null_as(O_WRONLY, STDERR_FILENO);
399 case EXEC_OUTPUT_TTY:
400 if (is_terminal_input(i))
401 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
403 /* We don't reset the terminal if this is just about output */
404 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
406 case EXEC_OUTPUT_SYSLOG:
407 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
408 case EXEC_OUTPUT_KMSG:
409 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
410 case EXEC_OUTPUT_JOURNAL:
411 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
412 return connect_logger_as(context, e, ident, STDERR_FILENO);
414 case EXEC_OUTPUT_SOCKET:
415 assert(socket_fd >= 0);
416 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
419 assert_not_reached("Unknown error type");
423 static int chown_terminal(int fd, uid_t uid) {
428 /* This might fail. What matters are the results. */
429 (void) fchown(fd, uid, -1);
430 (void) fchmod(fd, TTY_MODE);
432 if (fstat(fd, &st) < 0)
435 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
441 static int setup_confirm_stdio(const ExecContext *context,
443 int *_saved_stdout) {
444 int fd = -1, saved_stdin, saved_stdout = -1, r;
447 assert(_saved_stdin);
448 assert(_saved_stdout);
450 /* This returns positive EXIT_xxx return values instead of
451 * negative errno style values! */
453 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
456 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
461 if ((fd = acquire_terminal(
463 context->std_input == EXEC_INPUT_TTY_FAIL,
464 context->std_input == EXEC_INPUT_TTY_FORCE,
470 if (chown_terminal(fd, getuid()) < 0) {
475 if (dup2(fd, STDIN_FILENO) < 0) {
480 if (dup2(fd, STDOUT_FILENO) < 0) {
486 close_nointr_nofail(fd);
488 *_saved_stdin = saved_stdin;
489 *_saved_stdout = saved_stdout;
494 if (saved_stdout >= 0)
495 close_nointr_nofail(saved_stdout);
497 if (saved_stdin >= 0)
498 close_nointr_nofail(saved_stdin);
501 close_nointr_nofail(fd);
506 static int restore_confirm_stdio(const ExecContext *context,
514 assert(*saved_stdin >= 0);
515 assert(saved_stdout);
516 assert(*saved_stdout >= 0);
518 /* This returns positive EXIT_xxx return values instead of
519 * negative errno style values! */
521 if (is_terminal_input(context->std_input)) {
523 /* The service wants terminal input. */
527 context->std_output == EXEC_OUTPUT_INHERIT ||
528 context->std_output == EXEC_OUTPUT_TTY;
531 /* If the service doesn't want a controlling terminal,
532 * then we need to get rid entirely of what we have
535 if (release_terminal() < 0)
538 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
541 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
544 *keep_stdout = *keep_stdin = false;
550 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
551 bool keep_groups = false;
556 /* Lookup and set GID and supplementary group list. Here too
557 * we avoid NSS lookups for gid=0. */
559 if (context->group || username) {
561 if (context->group) {
562 const char *g = context->group;
564 if ((r = get_group_creds(&g, &gid)) < 0)
568 /* First step, initialize groups from /etc/groups */
569 if (username && gid != 0) {
570 if (initgroups(username, gid) < 0)
576 /* Second step, set our gids */
577 if (setresgid(gid, gid, gid) < 0)
581 if (context->supplementary_groups) {
586 /* Final step, initialize any manually set supplementary groups */
587 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
589 if (!(gids = new(gid_t, ngroups_max)))
593 if ((k = getgroups(ngroups_max, gids)) < 0) {
600 STRV_FOREACH(i, context->supplementary_groups) {
603 if (k >= ngroups_max) {
609 r = get_group_creds(&g, gids+k);
618 if (setgroups(k, gids) < 0) {
629 static int enforce_user(const ExecContext *context, uid_t uid) {
633 /* Sets (but doesn't lookup) the uid and make sure we keep the
634 * capabilities while doing so. */
636 if (context->capabilities) {
638 static const cap_value_t bits[] = {
639 CAP_SETUID, /* Necessary so that we can run setresuid() below */
640 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
643 /* First step: If we need to keep capabilities but
644 * drop privileges we need to make sure we keep our
645 * caps, whiel we drop privileges. */
647 int sb = context->secure_bits|SECURE_KEEP_CAPS;
649 if (prctl(PR_GET_SECUREBITS) != sb)
650 if (prctl(PR_SET_SECUREBITS, sb) < 0)
654 /* Second step: set the capabilities. This will reduce
655 * the capabilities to the minimum we need. */
657 if (!(d = cap_dup(context->capabilities)))
660 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
661 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
667 if (cap_set_proc(d) < 0) {
676 /* Third step: actually set the uids */
677 if (setresuid(uid, uid, uid) < 0)
680 /* At this point we should have all necessary capabilities but
681 are otherwise a normal user. However, the caps might got
682 corrupted due to the setresuid() so we need clean them up
683 later. This is done outside of this call. */
690 static int null_conv(
692 const struct pam_message **msg,
693 struct pam_response **resp,
696 /* We don't support conversations */
701 static int setup_pam(
706 int fds[], unsigned n_fds) {
708 static const struct pam_conv conv = {
713 pam_handle_t *handle = NULL;
715 int pam_code = PAM_SUCCESS;
718 bool close_session = false;
719 pid_t pam_pid = 0, parent_pid;
725 /* We set up PAM in the parent process, then fork. The child
726 * will then stay around until killed via PR_GET_PDEATHSIG or
727 * systemd via the cgroup logic. It will then remove the PAM
728 * session again. The parent process will exec() the actual
729 * daemon. We do things this way to ensure that the main PID
730 * of the daemon is the one we initially fork()ed. */
732 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
738 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
741 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
744 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
747 close_session = true;
749 if ((!(e = pam_getenvlist(handle)))) {
750 pam_code = PAM_BUF_ERR;
754 /* Block SIGTERM, so that we know that it won't get lost in
756 if (sigemptyset(&ss) < 0 ||
757 sigaddset(&ss, SIGTERM) < 0 ||
758 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
761 parent_pid = getpid();
763 if ((pam_pid = fork()) < 0)
770 /* The child's job is to reset the PAM session on
773 /* This string must fit in 10 chars (i.e. the length
774 * of "/sbin/init"), to look pretty in /bin/ps */
775 rename_process("(sd-pam)");
777 /* Make sure we don't keep open the passed fds in this
778 child. We assume that otherwise only those fds are
779 open here that have been opened by PAM. */
780 close_many(fds, n_fds);
782 /* Wait until our parent died. This will most likely
783 * not work since the kernel does not allow
784 * unprivileged parents kill their privileged children
785 * this way. We rely on the control groups kill logic
786 * to do the rest for us. */
787 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
790 /* Check if our parent process might already have
792 if (getppid() == parent_pid) {
794 if (sigwait(&ss, &sig) < 0) {
801 assert(sig == SIGTERM);
806 /* If our parent died we'll end the session */
807 if (getppid() != parent_pid)
808 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
814 pam_end(handle, pam_code | PAM_DATA_SILENT);
818 /* If the child was forked off successfully it will do all the
819 * cleanups, so forget about the handle here. */
822 /* Unblock SIGTERM again in the parent */
823 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
826 /* We close the log explicitly here, since the PAM modules
827 * might have opened it, but we don't want this fd around. */
836 if (pam_code != PAM_SUCCESS)
837 err = -EPERM; /* PAM errors do not map to errno */
843 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
845 pam_end(handle, pam_code | PAM_DATA_SILENT);
853 kill(pam_pid, SIGTERM);
854 kill(pam_pid, SIGCONT);
861 static int do_capability_bounding_set_drop(uint64_t drop) {
863 cap_t old_cap = NULL, new_cap = NULL;
867 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
868 * in the effective set (yes, the kernel drops that when
869 * executing init!), so get it back temporarily so that we can
870 * call PR_CAPBSET_DROP. */
872 old_cap = cap_get_proc();
876 if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
882 static const cap_value_t v = CAP_SETPCAP;
884 new_cap = cap_dup(old_cap);
890 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
895 if (cap_set_proc(new_cap) < 0) {
901 for (i = 0; i <= cap_last_cap(); i++)
902 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
903 if (prctl(PR_CAPBSET_DROP, i) < 0) {
916 cap_set_proc(old_cap);
923 static void rename_process_from_path(const char *path) {
924 char process_name[11];
928 /* This resulting string must fit in 10 chars (i.e. the length
929 * of "/sbin/init") to look pretty in /bin/ps */
931 p = file_name_from_path(path);
933 rename_process("(...)");
939 /* The end of the process name is usually more
940 * interesting, since the first bit might just be
946 process_name[0] = '(';
947 memcpy(process_name+1, p, l);
948 process_name[1+l] = ')';
949 process_name[1+l+1] = 0;
951 rename_process(process_name);
954 int exec_spawn(ExecCommand *command,
956 const ExecContext *context,
957 int fds[], unsigned n_fds,
959 bool apply_permissions,
961 bool apply_tty_stdin,
963 CGroupBonding *cgroup_bondings,
964 CGroupAttribute *cgroup_attributes,
965 const char *cgroup_suffix,
972 char **files_env = NULL;
977 assert(fds || n_fds <= 0);
979 if (context->std_input == EXEC_INPUT_SOCKET ||
980 context->std_output == EXEC_OUTPUT_SOCKET ||
981 context->std_error == EXEC_OUTPUT_SOCKET) {
993 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
994 log_error("Failed to load environment files: %s", strerror(-r));
999 argv = command->argv;
1001 if (!(line = exec_command_line(argv))) {
1006 log_debug("About to execute: %s", line);
1009 r = cgroup_bonding_realize_list(cgroup_bondings);
1013 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1015 if ((pid = fork()) < 0) {
1023 const char *username = NULL, *home = NULL;
1024 uid_t uid = (uid_t) -1;
1025 gid_t gid = (gid_t) -1;
1026 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1028 int saved_stdout = -1, saved_stdin = -1;
1029 bool keep_stdout = false, keep_stdin = false, set_access = false;
1033 rename_process_from_path(command->path);
1035 /* We reset exactly these signals, since they are the
1036 * only ones we set to SIG_IGN in the main daemon. All
1037 * others we leave untouched because we set them to
1038 * SIG_DFL or a valid handler initially, both of which
1039 * will be demoted to SIG_DFL. */
1040 default_signals(SIGNALS_CRASH_HANDLER,
1041 SIGNALS_IGNORE, -1);
1043 if (context->ignore_sigpipe)
1044 ignore_signals(SIGPIPE, -1);
1046 assert_se(sigemptyset(&ss) == 0);
1047 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1049 r = EXIT_SIGNAL_MASK;
1053 /* Close sockets very early to make sure we don't
1054 * block init reexecution because it cannot bind its
1057 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1058 socket_fd >= 0 ? 1 : n_fds);
1064 if (!context->same_pgrp)
1071 if (context->tcpwrap_name) {
1073 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1079 for (i = 0; i < (int) n_fds; i++) {
1080 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1088 exec_context_tty_reset(context);
1090 /* We skip the confirmation step if we shall not apply the TTY */
1091 if (confirm_spawn &&
1092 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1095 /* Set up terminal for the question */
1096 if ((r = setup_confirm_stdio(context,
1097 &saved_stdin, &saved_stdout))) {
1102 /* Now ask the question. */
1103 if (!(line = exec_command_line(argv))) {
1109 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1112 if (r < 0 || response == 'n') {
1116 } else if (response == 's') {
1121 /* Release terminal for the question */
1122 if ((r = restore_confirm_stdio(context,
1123 &saved_stdin, &saved_stdout,
1124 &keep_stdin, &keep_stdout))) {
1130 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1131 * must sure to drop O_NONBLOCK */
1133 fd_nonblock(socket_fd, false);
1136 err = setup_input(context, socket_fd, apply_tty_stdin);
1144 err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1151 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1157 if (cgroup_bondings) {
1158 err = cgroup_bonding_install_list(cgroup_bondings, 0, cgroup_suffix);
1165 if (context->oom_score_adjust_set) {
1168 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1171 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1172 /* Compatibility with Linux <= 2.6.35 */
1176 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1177 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1179 snprintf(t, sizeof(t), "%i", adj);
1182 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1183 && errno != EACCES) {
1185 r = EXIT_OOM_ADJUST;
1191 if (context->nice_set)
1192 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1198 if (context->cpu_sched_set) {
1199 struct sched_param param;
1202 param.sched_priority = context->cpu_sched_priority;
1204 if (sched_setscheduler(0, context->cpu_sched_policy |
1205 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1207 r = EXIT_SETSCHEDULER;
1212 if (context->cpuset)
1213 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1215 r = EXIT_CPUAFFINITY;
1219 if (context->ioprio_set)
1220 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1226 if (context->timer_slack_nsec_set)
1227 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1229 r = EXIT_TIMERSLACK;
1233 if (context->utmp_id)
1234 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1236 if (context->user) {
1237 username = context->user;
1238 err = get_user_creds(&username, &uid, &gid, &home);
1244 if (is_terminal_input(context->std_input)) {
1245 err = chown_terminal(STDIN_FILENO, uid);
1252 if (cgroup_bondings && context->control_group_modify) {
1253 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1255 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1265 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1266 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1273 if (apply_permissions) {
1274 err = enforce_groups(context, username, gid);
1281 umask(context->umask);
1284 if (context->pam_name && username) {
1285 err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1292 if (context->private_network) {
1293 if (unshare(CLONE_NEWNET) < 0) {
1302 if (strv_length(context->read_write_dirs) > 0 ||
1303 strv_length(context->read_only_dirs) > 0 ||
1304 strv_length(context->inaccessible_dirs) > 0 ||
1305 context->mount_flags != MS_SHARED ||
1306 context->private_tmp) {
1307 err = setup_namespace(context->read_write_dirs,
1308 context->read_only_dirs,
1309 context->inaccessible_dirs,
1310 context->private_tmp,
1311 context->mount_flags);
1319 if (context->root_directory)
1320 if (chroot(context->root_directory) < 0) {
1326 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1335 if (asprintf(&d, "%s/%s",
1336 context->root_directory ? context->root_directory : "",
1337 context->working_directory ? context->working_directory : "") < 0) {
1353 /* We repeat the fd closing here, to make sure that
1354 * nothing is leaked from the PAM modules */
1355 err = close_all_fds(fds, n_fds);
1357 err = shift_fds(fds, n_fds);
1359 err = flags_fds(fds, n_fds, context->non_blocking);
1365 if (apply_permissions) {
1367 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1368 if (!context->rlimit[i])
1371 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1378 if (context->capability_bounding_set_drop) {
1379 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1381 r = EXIT_CAPABILITIES;
1386 if (context->user) {
1387 err = enforce_user(context, uid);
1394 /* PR_GET_SECUREBITS is not privileged, while
1395 * PR_SET_SECUREBITS is. So to suppress
1396 * potential EPERMs we'll try not to call
1397 * PR_SET_SECUREBITS unless necessary. */
1398 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1399 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1401 r = EXIT_SECUREBITS;
1405 if (context->capabilities)
1406 if (cap_set_proc(context->capabilities) < 0) {
1408 r = EXIT_CAPABILITIES;
1413 if (!(our_env = new0(char*, 7))) {
1420 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1421 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1428 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1435 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1436 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1442 if (is_terminal_input(context->std_input) ||
1443 context->std_output == EXEC_OUTPUT_TTY ||
1444 context->std_error == EXEC_OUTPUT_TTY)
1445 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1453 if (!(final_env = strv_env_merge(
1457 context->environment,
1466 if (!(final_argv = replace_env_argv(argv, final_env))) {
1472 final_env = strv_env_clean(final_env);
1474 execve(command->path, final_argv, final_env);
1481 log_warning("Failed at step %s spawning %s: %s",
1482 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1483 command->path, strerror(-err));
1487 strv_free(final_env);
1489 strv_free(files_env);
1490 strv_free(final_argv);
1492 if (saved_stdin >= 0)
1493 close_nointr_nofail(saved_stdin);
1495 if (saved_stdout >= 0)
1496 close_nointr_nofail(saved_stdout);
1501 strv_free(files_env);
1503 /* We add the new process to the cgroup both in the child (so
1504 * that we can be sure that no user code is ever executed
1505 * outside of the cgroup) and in the parent (so that we can be
1506 * sure that when we kill the cgroup the process will be
1508 if (cgroup_bondings)
1509 cgroup_bonding_install_list(cgroup_bondings, pid, cgroup_suffix);
1511 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1513 exec_status_start(&command->exec_status, pid);
1519 strv_free(files_env);
1524 void exec_context_init(ExecContext *c) {
1528 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1529 c->cpu_sched_policy = SCHED_OTHER;
1530 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1531 c->syslog_level_prefix = true;
1532 c->mount_flags = MS_SHARED;
1533 c->kill_signal = SIGTERM;
1534 c->send_sigkill = true;
1535 c->control_group_persistent = -1;
1536 c->ignore_sigpipe = true;
1539 void exec_context_done(ExecContext *c) {
1544 strv_free(c->environment);
1545 c->environment = NULL;
1547 strv_free(c->environment_files);
1548 c->environment_files = NULL;
1550 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1552 c->rlimit[l] = NULL;
1555 free(c->working_directory);
1556 c->working_directory = NULL;
1557 free(c->root_directory);
1558 c->root_directory = NULL;
1563 free(c->tcpwrap_name);
1564 c->tcpwrap_name = NULL;
1566 free(c->syslog_identifier);
1567 c->syslog_identifier = NULL;
1575 strv_free(c->supplementary_groups);
1576 c->supplementary_groups = NULL;
1581 if (c->capabilities) {
1582 cap_free(c->capabilities);
1583 c->capabilities = NULL;
1586 strv_free(c->read_only_dirs);
1587 c->read_only_dirs = NULL;
1589 strv_free(c->read_write_dirs);
1590 c->read_write_dirs = NULL;
1592 strv_free(c->inaccessible_dirs);
1593 c->inaccessible_dirs = NULL;
1596 CPU_FREE(c->cpuset);
1602 void exec_command_done(ExecCommand *c) {
1612 void exec_command_done_array(ExecCommand *c, unsigned n) {
1615 for (i = 0; i < n; i++)
1616 exec_command_done(c+i);
1619 void exec_command_free_list(ExecCommand *c) {
1623 LIST_REMOVE(ExecCommand, command, c, i);
1624 exec_command_done(i);
1629 void exec_command_free_array(ExecCommand **c, unsigned n) {
1632 for (i = 0; i < n; i++) {
1633 exec_command_free_list(c[i]);
1638 int exec_context_load_environment(const ExecContext *c, char ***l) {
1639 char **i, **r = NULL;
1644 STRV_FOREACH(i, c->environment_files) {
1647 bool ignore = false;
1657 if (!path_is_absolute(fn)) {
1666 if ((k = load_env_file(fn, &p)) < 0) {
1680 m = strv_env_merge(2, r, p);
1696 static void strv_fprintf(FILE *f, char **l) {
1702 fprintf(f, " %s", *g);
1705 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1717 "%sWorkingDirectory: %s\n"
1718 "%sRootDirectory: %s\n"
1719 "%sNonBlocking: %s\n"
1720 "%sPrivateTmp: %s\n"
1721 "%sControlGroupModify: %s\n"
1722 "%sControlGroupPersistent: %s\n"
1723 "%sPrivateNetwork: %s\n",
1725 prefix, c->working_directory ? c->working_directory : "/",
1726 prefix, c->root_directory ? c->root_directory : "/",
1727 prefix, yes_no(c->non_blocking),
1728 prefix, yes_no(c->private_tmp),
1729 prefix, yes_no(c->control_group_modify),
1730 prefix, yes_no(c->control_group_persistent),
1731 prefix, yes_no(c->private_network));
1733 STRV_FOREACH(e, c->environment)
1734 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1736 STRV_FOREACH(e, c->environment_files)
1737 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1739 if (c->tcpwrap_name)
1741 "%sTCPWrapName: %s\n",
1742 prefix, c->tcpwrap_name);
1749 if (c->oom_score_adjust_set)
1751 "%sOOMScoreAdjust: %i\n",
1752 prefix, c->oom_score_adjust);
1754 for (i = 0; i < RLIM_NLIMITS; i++)
1756 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1760 "%sIOSchedulingClass: %s\n"
1761 "%sIOPriority: %i\n",
1762 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1763 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1765 if (c->cpu_sched_set)
1767 "%sCPUSchedulingPolicy: %s\n"
1768 "%sCPUSchedulingPriority: %i\n"
1769 "%sCPUSchedulingResetOnFork: %s\n",
1770 prefix, sched_policy_to_string(c->cpu_sched_policy),
1771 prefix, c->cpu_sched_priority,
1772 prefix, yes_no(c->cpu_sched_reset_on_fork));
1775 fprintf(f, "%sCPUAffinity:", prefix);
1776 for (i = 0; i < c->cpuset_ncpus; i++)
1777 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1778 fprintf(f, " %i", i);
1782 if (c->timer_slack_nsec_set)
1783 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1786 "%sStandardInput: %s\n"
1787 "%sStandardOutput: %s\n"
1788 "%sStandardError: %s\n",
1789 prefix, exec_input_to_string(c->std_input),
1790 prefix, exec_output_to_string(c->std_output),
1791 prefix, exec_output_to_string(c->std_error));
1797 "%sTTYVHangup: %s\n"
1798 "%sTTYVTDisallocate: %s\n",
1799 prefix, c->tty_path,
1800 prefix, yes_no(c->tty_reset),
1801 prefix, yes_no(c->tty_vhangup),
1802 prefix, yes_no(c->tty_vt_disallocate));
1804 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1805 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1806 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1807 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1809 "%sSyslogFacility: %s\n"
1810 "%sSyslogLevel: %s\n",
1811 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1812 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1814 if (c->capabilities) {
1816 if ((t = cap_to_text(c->capabilities, NULL))) {
1817 fprintf(f, "%sCapabilities: %s\n",
1824 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1826 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1827 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1828 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1829 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1830 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1831 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1833 if (c->capability_bounding_set_drop) {
1835 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1837 for (l = 0; l <= cap_last_cap(); l++)
1838 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1841 if ((t = cap_to_name(l))) {
1842 fprintf(f, " %s", t);
1851 fprintf(f, "%sUser: %s\n", prefix, c->user);
1853 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1855 if (strv_length(c->supplementary_groups) > 0) {
1856 fprintf(f, "%sSupplementaryGroups:", prefix);
1857 strv_fprintf(f, c->supplementary_groups);
1862 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1864 if (strv_length(c->read_write_dirs) > 0) {
1865 fprintf(f, "%sReadWriteDirs:", prefix);
1866 strv_fprintf(f, c->read_write_dirs);
1870 if (strv_length(c->read_only_dirs) > 0) {
1871 fprintf(f, "%sReadOnlyDirs:", prefix);
1872 strv_fprintf(f, c->read_only_dirs);
1876 if (strv_length(c->inaccessible_dirs) > 0) {
1877 fprintf(f, "%sInaccessibleDirs:", prefix);
1878 strv_fprintf(f, c->inaccessible_dirs);
1884 "%sKillSignal: SIG%s\n"
1885 "%sSendSIGKILL: %s\n"
1886 "%sIgnoreSIGPIPE: %s\n",
1887 prefix, kill_mode_to_string(c->kill_mode),
1888 prefix, signal_to_string(c->kill_signal),
1889 prefix, yes_no(c->send_sigkill),
1890 prefix, yes_no(c->ignore_sigpipe));
1894 "%sUtmpIdentifier: %s\n",
1895 prefix, c->utmp_id);
1898 void exec_status_start(ExecStatus *s, pid_t pid) {
1903 dual_timestamp_get(&s->start_timestamp);
1906 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1909 if (s->pid && s->pid != pid)
1913 dual_timestamp_get(&s->exit_timestamp);
1919 if (context->utmp_id)
1920 utmp_put_dead_process(context->utmp_id, pid, code, status);
1922 exec_context_tty_reset(context);
1926 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1927 char buf[FORMAT_TIMESTAMP_MAX];
1940 prefix, (unsigned long) s->pid);
1942 if (s->start_timestamp.realtime > 0)
1944 "%sStart Timestamp: %s\n",
1945 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1947 if (s->exit_timestamp.realtime > 0)
1949 "%sExit Timestamp: %s\n"
1951 "%sExit Status: %i\n",
1952 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1953 prefix, sigchld_code_to_string(s->code),
1957 char *exec_command_line(char **argv) {
1965 STRV_FOREACH(a, argv)
1968 if (!(n = new(char, k)))
1972 STRV_FOREACH(a, argv) {
1979 if (strpbrk(*a, WHITESPACE)) {
1990 /* FIXME: this doesn't really handle arguments that have
1991 * spaces and ticks in them */
1996 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1998 const char *prefix2;
2007 p2 = strappend(prefix, "\t");
2008 prefix2 = p2 ? p2 : prefix;
2010 cmd = exec_command_line(c->argv);
2013 "%sCommand Line: %s\n",
2014 prefix, cmd ? cmd : strerror(ENOMEM));
2018 exec_status_dump(&c->exec_status, f, prefix2);
2023 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2029 LIST_FOREACH(command, c, c)
2030 exec_command_dump(c, f, prefix);
2033 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2040 /* It's kind of important, that we keep the order here */
2041 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2042 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2047 int exec_command_set(ExecCommand *c, const char *path, ...) {
2055 l = strv_new_ap(path, ap);
2061 if (!(p = strdup(path))) {
2075 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2076 [EXEC_INPUT_NULL] = "null",
2077 [EXEC_INPUT_TTY] = "tty",
2078 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2079 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2080 [EXEC_INPUT_SOCKET] = "socket"
2083 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2085 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2086 [EXEC_OUTPUT_INHERIT] = "inherit",
2087 [EXEC_OUTPUT_NULL] = "null",
2088 [EXEC_OUTPUT_TTY] = "tty",
2089 [EXEC_OUTPUT_SYSLOG] = "syslog",
2090 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2091 [EXEC_OUTPUT_KMSG] = "kmsg",
2092 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2093 [EXEC_OUTPUT_JOURNAL] = "journal",
2094 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2095 [EXEC_OUTPUT_SOCKET] = "socket"
2098 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2100 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2101 [KILL_CONTROL_GROUP] = "control-group",
2102 [KILL_PROCESS] = "process",
2103 [KILL_NONE] = "none"
2106 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2108 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2109 [KILL_MAIN] = "main",
2110 [KILL_CONTROL] = "control",
2114 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);