1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <security/pam_appl.h>
48 #include "capability.h"
52 #include "securebits.h"
54 #include "namespace.h"
56 #include "exit-status.h"
58 #include "utmp-wtmp.h"
60 #include "loopback-setup.h"
62 /* This assumes there is a 'tty' group */
65 static int shift_fds(int fds[], unsigned n_fds) {
66 int start, restart_from;
71 /* Modifies the fds array! (sorts it) */
81 for (i = start; i < (int) n_fds; i++) {
84 /* Already at right index? */
88 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
91 close_nointr_nofail(fds[i]);
94 /* Hmm, the fd we wanted isn't free? Then
95 * let's remember that and try again from here*/
96 if (nfd != i+3 && restart_from < 0)
100 if (restart_from < 0)
103 start = restart_from;
109 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
118 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
120 for (i = 0; i < n_fds; i++) {
122 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
125 /* We unconditionally drop FD_CLOEXEC from the fds,
126 * since after all we want to pass these fds to our
129 if ((r = fd_cloexec(fds[i], false)) < 0)
136 static const char *tty_path(const ExecContext *context) {
139 if (context->tty_path)
140 return context->tty_path;
142 return "/dev/console";
145 void exec_context_tty_reset(const ExecContext *context) {
148 if (context->tty_vhangup)
149 terminal_vhangup(tty_path(context));
151 if (context->tty_reset)
152 reset_terminal(tty_path(context));
154 if (context->tty_vt_disallocate && context->tty_path)
155 vt_disallocate(context->tty_path);
158 static int open_null_as(int flags, int nfd) {
163 if ((fd = open("/dev/null", flags|O_NOCTTY)) < 0)
167 r = dup2(fd, nfd) < 0 ? -errno : nfd;
168 close_nointr_nofail(fd);
175 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
177 union sockaddr_union sa;
180 assert(output < _EXEC_OUTPUT_MAX);
184 fd = socket(AF_UNIX, SOCK_STREAM, 0);
189 sa.un.sun_family = AF_UNIX;
190 strncpy(sa.un.sun_path, "/run/systemd/journal/stdout", sizeof(sa.un.sun_path));
192 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
194 close_nointr_nofail(fd);
198 if (shutdown(fd, SHUT_RD) < 0) {
199 close_nointr_nofail(fd);
210 context->syslog_identifier ? context->syslog_identifier : ident,
211 context->syslog_priority,
212 !!context->syslog_level_prefix,
213 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
214 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
215 output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || output == EXEC_OUTPUT_KMSG_AND_CONSOLE || output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE);
218 r = dup2(fd, nfd) < 0 ? -errno : nfd;
219 close_nointr_nofail(fd);
225 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
231 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
235 r = dup2(fd, nfd) < 0 ? -errno : nfd;
236 close_nointr_nofail(fd);
243 static bool is_terminal_input(ExecInput i) {
245 i == EXEC_INPUT_TTY ||
246 i == EXEC_INPUT_TTY_FORCE ||
247 i == EXEC_INPUT_TTY_FAIL;
250 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
252 if (is_terminal_input(std_input) && !apply_tty_stdin)
253 return EXEC_INPUT_NULL;
255 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
256 return EXEC_INPUT_NULL;
261 static int fixup_output(ExecOutput std_output, int socket_fd) {
263 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
264 return EXEC_OUTPUT_INHERIT;
269 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
274 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
278 case EXEC_INPUT_NULL:
279 return open_null_as(O_RDONLY, STDIN_FILENO);
282 case EXEC_INPUT_TTY_FORCE:
283 case EXEC_INPUT_TTY_FAIL: {
286 if ((fd = acquire_terminal(
288 i == EXEC_INPUT_TTY_FAIL,
289 i == EXEC_INPUT_TTY_FORCE,
293 if (fd != STDIN_FILENO) {
294 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
295 close_nointr_nofail(fd);
302 case EXEC_INPUT_SOCKET:
303 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
306 assert_not_reached("Unknown input type");
310 static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
317 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
318 o = fixup_output(context->std_output, socket_fd);
320 /* This expects the input is already set up */
324 case EXEC_OUTPUT_INHERIT:
326 /* If input got downgraded, inherit the original value */
327 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
328 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
330 /* If the input is connected to anything that's not a /dev/null, inherit that... */
331 if (i != EXEC_INPUT_NULL)
332 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
334 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
336 return STDOUT_FILENO;
338 /* We need to open /dev/null here anew, to get the
339 * right access mode. So we fall through */
341 case EXEC_OUTPUT_NULL:
342 return open_null_as(O_WRONLY, STDOUT_FILENO);
344 case EXEC_OUTPUT_TTY:
345 if (is_terminal_input(i))
346 return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
348 /* We don't reset the terminal if this is just about output */
349 return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO);
351 case EXEC_OUTPUT_SYSLOG:
352 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
353 case EXEC_OUTPUT_KMSG:
354 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
355 case EXEC_OUTPUT_JOURNAL:
356 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
357 return connect_logger_as(context, o, ident, STDOUT_FILENO);
359 case EXEC_OUTPUT_SOCKET:
360 assert(socket_fd >= 0);
361 return dup2(socket_fd, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO;
364 assert_not_reached("Unknown output type");
368 static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
375 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
376 o = fixup_output(context->std_output, socket_fd);
377 e = fixup_output(context->std_error, socket_fd);
379 /* This expects the input and output are already set up */
381 /* Don't change the stderr file descriptor if we inherit all
382 * the way and are not on a tty */
383 if (e == EXEC_OUTPUT_INHERIT &&
384 o == EXEC_OUTPUT_INHERIT &&
385 i == EXEC_INPUT_NULL &&
386 !is_terminal_input(context->std_input) &&
388 return STDERR_FILENO;
390 /* Duplicate from stdout if possible */
391 if (e == o || e == EXEC_OUTPUT_INHERIT)
392 return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
396 case EXEC_OUTPUT_NULL:
397 return open_null_as(O_WRONLY, STDERR_FILENO);
399 case EXEC_OUTPUT_TTY:
400 if (is_terminal_input(i))
401 return dup2(STDIN_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
403 /* We don't reset the terminal if this is just about output */
404 return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO);
406 case EXEC_OUTPUT_SYSLOG:
407 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
408 case EXEC_OUTPUT_KMSG:
409 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
410 case EXEC_OUTPUT_JOURNAL:
411 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
412 return connect_logger_as(context, e, ident, STDERR_FILENO);
414 case EXEC_OUTPUT_SOCKET:
415 assert(socket_fd >= 0);
416 return dup2(socket_fd, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO;
419 assert_not_reached("Unknown error type");
423 static int chown_terminal(int fd, uid_t uid) {
428 /* This might fail. What matters are the results. */
429 (void) fchown(fd, uid, -1);
430 (void) fchmod(fd, TTY_MODE);
432 if (fstat(fd, &st) < 0)
435 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
441 static int setup_confirm_stdio(const ExecContext *context,
443 int *_saved_stdout) {
444 int fd = -1, saved_stdin, saved_stdout = -1, r;
447 assert(_saved_stdin);
448 assert(_saved_stdout);
450 /* This returns positive EXIT_xxx return values instead of
451 * negative errno style values! */
453 if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
456 if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
461 if ((fd = acquire_terminal(
463 context->std_input == EXEC_INPUT_TTY_FAIL,
464 context->std_input == EXEC_INPUT_TTY_FORCE,
470 if (chown_terminal(fd, getuid()) < 0) {
475 if (dup2(fd, STDIN_FILENO) < 0) {
480 if (dup2(fd, STDOUT_FILENO) < 0) {
486 close_nointr_nofail(fd);
488 *_saved_stdin = saved_stdin;
489 *_saved_stdout = saved_stdout;
494 if (saved_stdout >= 0)
495 close_nointr_nofail(saved_stdout);
497 if (saved_stdin >= 0)
498 close_nointr_nofail(saved_stdin);
501 close_nointr_nofail(fd);
506 static int restore_confirm_stdio(const ExecContext *context,
514 assert(*saved_stdin >= 0);
515 assert(saved_stdout);
516 assert(*saved_stdout >= 0);
518 /* This returns positive EXIT_xxx return values instead of
519 * negative errno style values! */
521 if (is_terminal_input(context->std_input)) {
523 /* The service wants terminal input. */
527 context->std_output == EXEC_OUTPUT_INHERIT ||
528 context->std_output == EXEC_OUTPUT_TTY;
531 /* If the service doesn't want a controlling terminal,
532 * then we need to get rid entirely of what we have
535 if (release_terminal() < 0)
538 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
541 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
544 *keep_stdout = *keep_stdin = false;
550 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
551 bool keep_groups = false;
556 /* Lookup and set GID and supplementary group list. Here too
557 * we avoid NSS lookups for gid=0. */
559 if (context->group || username) {
561 if (context->group) {
562 const char *g = context->group;
564 if ((r = get_group_creds(&g, &gid)) < 0)
568 /* First step, initialize groups from /etc/groups */
569 if (username && gid != 0) {
570 if (initgroups(username, gid) < 0)
576 /* Second step, set our gids */
577 if (setresgid(gid, gid, gid) < 0)
581 if (context->supplementary_groups) {
586 /* Final step, initialize any manually set supplementary groups */
587 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
589 if (!(gids = new(gid_t, ngroups_max)))
593 if ((k = getgroups(ngroups_max, gids)) < 0) {
600 STRV_FOREACH(i, context->supplementary_groups) {
603 if (k >= ngroups_max) {
609 r = get_group_creds(&g, gids+k);
618 if (setgroups(k, gids) < 0) {
629 static int enforce_user(const ExecContext *context, uid_t uid) {
633 /* Sets (but doesn't lookup) the uid and make sure we keep the
634 * capabilities while doing so. */
636 if (context->capabilities) {
638 static const cap_value_t bits[] = {
639 CAP_SETUID, /* Necessary so that we can run setresuid() below */
640 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
643 /* First step: If we need to keep capabilities but
644 * drop privileges we need to make sure we keep our
645 * caps, whiel we drop privileges. */
647 int sb = context->secure_bits|SECURE_KEEP_CAPS;
649 if (prctl(PR_GET_SECUREBITS) != sb)
650 if (prctl(PR_SET_SECUREBITS, sb) < 0)
654 /* Second step: set the capabilities. This will reduce
655 * the capabilities to the minimum we need. */
657 if (!(d = cap_dup(context->capabilities)))
660 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
661 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0) {
667 if (cap_set_proc(d) < 0) {
676 /* Third step: actually set the uids */
677 if (setresuid(uid, uid, uid) < 0)
680 /* At this point we should have all necessary capabilities but
681 are otherwise a normal user. However, the caps might got
682 corrupted due to the setresuid() so we need clean them up
683 later. This is done outside of this call. */
690 static int null_conv(
692 const struct pam_message **msg,
693 struct pam_response **resp,
696 /* We don't support conversations */
701 static int setup_pam(
706 int fds[], unsigned n_fds) {
708 static const struct pam_conv conv = {
713 pam_handle_t *handle = NULL;
715 int pam_code = PAM_SUCCESS;
718 bool close_session = false;
719 pid_t pam_pid = 0, parent_pid;
725 /* We set up PAM in the parent process, then fork. The child
726 * will then stay around until killed via PR_GET_PDEATHSIG or
727 * systemd via the cgroup logic. It will then remove the PAM
728 * session again. The parent process will exec() the actual
729 * daemon. We do things this way to ensure that the main PID
730 * of the daemon is the one we initially fork()ed. */
732 if ((pam_code = pam_start(name, user, &conv, &handle)) != PAM_SUCCESS) {
738 if ((pam_code = pam_set_item(handle, PAM_TTY, tty)) != PAM_SUCCESS)
741 if ((pam_code = pam_acct_mgmt(handle, PAM_SILENT)) != PAM_SUCCESS)
744 if ((pam_code = pam_open_session(handle, PAM_SILENT)) != PAM_SUCCESS)
747 close_session = true;
749 if ((!(e = pam_getenvlist(handle)))) {
750 pam_code = PAM_BUF_ERR;
754 /* Block SIGTERM, so that we know that it won't get lost in
756 if (sigemptyset(&ss) < 0 ||
757 sigaddset(&ss, SIGTERM) < 0 ||
758 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
761 parent_pid = getpid();
763 if ((pam_pid = fork()) < 0)
770 /* The child's job is to reset the PAM session on
773 /* This string must fit in 10 chars (i.e. the length
774 * of "/sbin/init"), to look pretty in /bin/ps */
775 rename_process("(sd-pam)");
777 /* Make sure we don't keep open the passed fds in this
778 child. We assume that otherwise only those fds are
779 open here that have been opened by PAM. */
780 close_many(fds, n_fds);
782 /* Wait until our parent died. This will most likely
783 * not work since the kernel does not allow
784 * unprivileged parents kill their privileged children
785 * this way. We rely on the control groups kill logic
786 * to do the rest for us. */
787 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
790 /* Check if our parent process might already have
792 if (getppid() == parent_pid) {
794 if (sigwait(&ss, &sig) < 0) {
801 assert(sig == SIGTERM);
806 /* If our parent died we'll end the session */
807 if (getppid() != parent_pid)
808 if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS)
814 pam_end(handle, pam_code | PAM_DATA_SILENT);
818 /* If the child was forked off successfully it will do all the
819 * cleanups, so forget about the handle here. */
822 /* Unblock SIGTERM again in the parent */
823 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
826 /* We close the log explicitly here, since the PAM modules
827 * might have opened it, but we don't want this fd around. */
836 if (pam_code != PAM_SUCCESS)
837 err = -EPERM; /* PAM errors do not map to errno */
843 pam_code = pam_close_session(handle, PAM_DATA_SILENT);
845 pam_end(handle, pam_code | PAM_DATA_SILENT);
853 kill(pam_pid, SIGTERM);
854 kill(pam_pid, SIGCONT);
861 static int do_capability_bounding_set_drop(uint64_t drop) {
863 cap_t old_cap = NULL, new_cap = NULL;
867 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
868 * in the effective set (yes, the kernel drops that when
869 * executing init!), so get it back temporarily so that we can
870 * call PR_CAPBSET_DROP. */
872 old_cap = cap_get_proc();
876 if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
882 static const cap_value_t v = CAP_SETPCAP;
884 new_cap = cap_dup(old_cap);
890 if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
895 if (cap_set_proc(new_cap) < 0) {
901 for (i = 0; i <= cap_last_cap(); i++)
902 if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
903 if (prctl(PR_CAPBSET_DROP, i) < 0) {
916 cap_set_proc(old_cap);
923 static void rename_process_from_path(const char *path) {
924 char process_name[11];
928 /* This resulting string must fit in 10 chars (i.e. the length
929 * of "/sbin/init") to look pretty in /bin/ps */
931 p = file_name_from_path(path);
933 rename_process("(...)");
939 /* The end of the process name is usually more
940 * interesting, since the first bit might just be
946 process_name[0] = '(';
947 memcpy(process_name+1, p, l);
948 process_name[1+l] = ')';
949 process_name[1+l+1] = 0;
951 rename_process(process_name);
954 int exec_spawn(ExecCommand *command,
956 const ExecContext *context,
957 int fds[], unsigned n_fds,
959 bool apply_permissions,
961 bool apply_tty_stdin,
963 CGroupBonding *cgroup_bondings,
964 CGroupAttribute *cgroup_attributes,
971 char **files_env = NULL;
976 assert(fds || n_fds <= 0);
978 if (context->std_input == EXEC_INPUT_SOCKET ||
979 context->std_output == EXEC_OUTPUT_SOCKET ||
980 context->std_error == EXEC_OUTPUT_SOCKET) {
992 if ((r = exec_context_load_environment(context, &files_env)) < 0) {
993 log_error("Failed to load environment files: %s", strerror(-r));
998 argv = command->argv;
1000 if (!(line = exec_command_line(argv))) {
1005 log_debug("About to execute: %s", line);
1008 r = cgroup_bonding_realize_list(cgroup_bondings);
1012 cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
1014 if ((pid = fork()) < 0) {
1022 const char *username = NULL, *home = NULL;
1023 uid_t uid = (uid_t) -1;
1024 gid_t gid = (gid_t) -1;
1025 char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1027 int saved_stdout = -1, saved_stdin = -1;
1028 bool keep_stdout = false, keep_stdin = false, set_access = false;
1032 rename_process_from_path(command->path);
1034 /* We reset exactly these signals, since they are the
1035 * only ones we set to SIG_IGN in the main daemon. All
1036 * others we leave untouched because we set them to
1037 * SIG_DFL or a valid handler initially, both of which
1038 * will be demoted to SIG_DFL. */
1039 default_signals(SIGNALS_CRASH_HANDLER,
1040 SIGNALS_IGNORE, -1);
1042 if (context->ignore_sigpipe)
1043 ignore_signals(SIGPIPE, -1);
1045 assert_se(sigemptyset(&ss) == 0);
1046 if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) {
1048 r = EXIT_SIGNAL_MASK;
1052 /* Close sockets very early to make sure we don't
1053 * block init reexecution because it cannot bind its
1056 err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds,
1057 socket_fd >= 0 ? 1 : n_fds);
1063 if (!context->same_pgrp)
1070 if (context->tcpwrap_name) {
1072 if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) {
1078 for (i = 0; i < (int) n_fds; i++) {
1079 if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) {
1087 exec_context_tty_reset(context);
1089 /* We skip the confirmation step if we shall not apply the TTY */
1090 if (confirm_spawn &&
1091 (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
1094 /* Set up terminal for the question */
1095 if ((r = setup_confirm_stdio(context,
1096 &saved_stdin, &saved_stdout))) {
1101 /* Now ask the question. */
1102 if (!(line = exec_command_line(argv))) {
1108 r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
1111 if (r < 0 || response == 'n') {
1115 } else if (response == 's') {
1120 /* Release terminal for the question */
1121 if ((r = restore_confirm_stdio(context,
1122 &saved_stdin, &saved_stdout,
1123 &keep_stdin, &keep_stdout))) {
1129 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1130 * must sure to drop O_NONBLOCK */
1132 fd_nonblock(socket_fd, false);
1135 err = setup_input(context, socket_fd, apply_tty_stdin);
1143 err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1150 err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
1156 if (cgroup_bondings) {
1157 err = cgroup_bonding_install_list(cgroup_bondings, 0);
1164 if (context->oom_score_adjust_set) {
1167 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1170 if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
1171 /* Compatibility with Linux <= 2.6.35 */
1175 adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
1176 adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
1178 snprintf(t, sizeof(t), "%i", adj);
1181 if (write_one_line_file("/proc/self/oom_adj", t) < 0
1182 && errno != EACCES) {
1184 r = EXIT_OOM_ADJUST;
1190 if (context->nice_set)
1191 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1197 if (context->cpu_sched_set) {
1198 struct sched_param param;
1201 param.sched_priority = context->cpu_sched_priority;
1203 if (sched_setscheduler(0, context->cpu_sched_policy |
1204 (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) {
1206 r = EXIT_SETSCHEDULER;
1211 if (context->cpuset)
1212 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1214 r = EXIT_CPUAFFINITY;
1218 if (context->ioprio_set)
1219 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1225 if (context->timer_slack_nsec_set)
1226 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1228 r = EXIT_TIMERSLACK;
1232 if (context->utmp_id)
1233 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1235 if (context->user) {
1236 username = context->user;
1237 err = get_user_creds(&username, &uid, &gid, &home);
1243 if (is_terminal_input(context->std_input)) {
1244 err = chown_terminal(STDIN_FILENO, uid);
1251 if (cgroup_bondings && context->control_group_modify) {
1252 err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid);
1254 err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid, context->control_group_persistent);
1264 if (cgroup_bondings && !set_access && context->control_group_persistent >= 0) {
1265 err = cgroup_bonding_set_task_access_list(cgroup_bondings, (mode_t) -1, (uid_t) -1, (uid_t) -1, context->control_group_persistent);
1272 if (apply_permissions) {
1273 err = enforce_groups(context, username, gid);
1280 umask(context->umask);
1283 if (context->pam_name && username) {
1284 err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
1291 if (context->private_network) {
1292 if (unshare(CLONE_NEWNET) < 0) {
1301 if (strv_length(context->read_write_dirs) > 0 ||
1302 strv_length(context->read_only_dirs) > 0 ||
1303 strv_length(context->inaccessible_dirs) > 0 ||
1304 context->mount_flags != MS_SHARED ||
1305 context->private_tmp) {
1306 err = setup_namespace(context->read_write_dirs,
1307 context->read_only_dirs,
1308 context->inaccessible_dirs,
1309 context->private_tmp,
1310 context->mount_flags);
1318 if (context->root_directory)
1319 if (chroot(context->root_directory) < 0) {
1325 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1334 if (asprintf(&d, "%s/%s",
1335 context->root_directory ? context->root_directory : "",
1336 context->working_directory ? context->working_directory : "") < 0) {
1352 /* We repeat the fd closing here, to make sure that
1353 * nothing is leaked from the PAM modules */
1354 err = close_all_fds(fds, n_fds);
1356 err = shift_fds(fds, n_fds);
1358 err = flags_fds(fds, n_fds, context->non_blocking);
1364 if (apply_permissions) {
1366 for (i = 0; i < RLIMIT_NLIMITS; i++) {
1367 if (!context->rlimit[i])
1370 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1377 if (context->capability_bounding_set_drop) {
1378 err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
1380 r = EXIT_CAPABILITIES;
1385 if (context->user) {
1386 err = enforce_user(context, uid);
1393 /* PR_GET_SECUREBITS is not privileged, while
1394 * PR_SET_SECUREBITS is. So to suppress
1395 * potential EPERMs we'll try not to call
1396 * PR_SET_SECUREBITS unless necessary. */
1397 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1398 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1400 r = EXIT_SECUREBITS;
1404 if (context->capabilities)
1405 if (cap_set_proc(context->capabilities) < 0) {
1407 r = EXIT_CAPABILITIES;
1412 if (!(our_env = new0(char*, 7))) {
1419 if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 ||
1420 asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) {
1427 if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) {
1434 if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 ||
1435 asprintf(our_env + n_env++, "USER=%s", username) < 0) {
1441 if (is_terminal_input(context->std_input) ||
1442 context->std_output == EXEC_OUTPUT_TTY ||
1443 context->std_error == EXEC_OUTPUT_TTY)
1444 if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) {
1452 if (!(final_env = strv_env_merge(
1456 context->environment,
1465 if (!(final_argv = replace_env_argv(argv, final_env))) {
1471 final_env = strv_env_clean(final_env);
1473 execve(command->path, final_argv, final_env);
1480 log_warning("Failed at step %s spawning %s: %s",
1481 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1482 command->path, strerror(-err));
1486 strv_free(final_env);
1488 strv_free(files_env);
1489 strv_free(final_argv);
1491 if (saved_stdin >= 0)
1492 close_nointr_nofail(saved_stdin);
1494 if (saved_stdout >= 0)
1495 close_nointr_nofail(saved_stdout);
1500 strv_free(files_env);
1502 /* We add the new process to the cgroup both in the child (so
1503 * that we can be sure that no user code is ever executed
1504 * outside of the cgroup) and in the parent (so that we can be
1505 * sure that when we kill the cgroup the process will be
1507 if (cgroup_bondings)
1508 cgroup_bonding_install_list(cgroup_bondings, pid);
1510 log_debug("Forked %s as %lu", command->path, (unsigned long) pid);
1512 exec_status_start(&command->exec_status, pid);
1518 strv_free(files_env);
1523 void exec_context_init(ExecContext *c) {
1527 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1528 c->cpu_sched_policy = SCHED_OTHER;
1529 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1530 c->syslog_level_prefix = true;
1531 c->mount_flags = MS_SHARED;
1532 c->kill_signal = SIGTERM;
1533 c->send_sigkill = true;
1534 c->control_group_persistent = -1;
1535 c->ignore_sigpipe = true;
1538 void exec_context_done(ExecContext *c) {
1543 strv_free(c->environment);
1544 c->environment = NULL;
1546 strv_free(c->environment_files);
1547 c->environment_files = NULL;
1549 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1551 c->rlimit[l] = NULL;
1554 free(c->working_directory);
1555 c->working_directory = NULL;
1556 free(c->root_directory);
1557 c->root_directory = NULL;
1562 free(c->tcpwrap_name);
1563 c->tcpwrap_name = NULL;
1565 free(c->syslog_identifier);
1566 c->syslog_identifier = NULL;
1574 strv_free(c->supplementary_groups);
1575 c->supplementary_groups = NULL;
1580 if (c->capabilities) {
1581 cap_free(c->capabilities);
1582 c->capabilities = NULL;
1585 strv_free(c->read_only_dirs);
1586 c->read_only_dirs = NULL;
1588 strv_free(c->read_write_dirs);
1589 c->read_write_dirs = NULL;
1591 strv_free(c->inaccessible_dirs);
1592 c->inaccessible_dirs = NULL;
1595 CPU_FREE(c->cpuset);
1601 void exec_command_done(ExecCommand *c) {
1611 void exec_command_done_array(ExecCommand *c, unsigned n) {
1614 for (i = 0; i < n; i++)
1615 exec_command_done(c+i);
1618 void exec_command_free_list(ExecCommand *c) {
1622 LIST_REMOVE(ExecCommand, command, c, i);
1623 exec_command_done(i);
1628 void exec_command_free_array(ExecCommand **c, unsigned n) {
1631 for (i = 0; i < n; i++) {
1632 exec_command_free_list(c[i]);
1637 int exec_context_load_environment(const ExecContext *c, char ***l) {
1638 char **i, **r = NULL;
1643 STRV_FOREACH(i, c->environment_files) {
1646 bool ignore = false;
1656 if (!path_is_absolute(fn)) {
1665 if ((k = load_env_file(fn, &p)) < 0) {
1679 m = strv_env_merge(2, r, p);
1695 static void strv_fprintf(FILE *f, char **l) {
1701 fprintf(f, " %s", *g);
1704 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
1716 "%sWorkingDirectory: %s\n"
1717 "%sRootDirectory: %s\n"
1718 "%sNonBlocking: %s\n"
1719 "%sPrivateTmp: %s\n"
1720 "%sControlGroupModify: %s\n"
1721 "%sControlGroupPersistent: %s\n"
1722 "%sPrivateNetwork: %s\n",
1724 prefix, c->working_directory ? c->working_directory : "/",
1725 prefix, c->root_directory ? c->root_directory : "/",
1726 prefix, yes_no(c->non_blocking),
1727 prefix, yes_no(c->private_tmp),
1728 prefix, yes_no(c->control_group_modify),
1729 prefix, yes_no(c->control_group_persistent),
1730 prefix, yes_no(c->private_network));
1732 STRV_FOREACH(e, c->environment)
1733 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
1735 STRV_FOREACH(e, c->environment_files)
1736 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
1738 if (c->tcpwrap_name)
1740 "%sTCPWrapName: %s\n",
1741 prefix, c->tcpwrap_name);
1748 if (c->oom_score_adjust_set)
1750 "%sOOMScoreAdjust: %i\n",
1751 prefix, c->oom_score_adjust);
1753 for (i = 0; i < RLIM_NLIMITS; i++)
1755 fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
1759 "%sIOSchedulingClass: %s\n"
1760 "%sIOPriority: %i\n",
1761 prefix, ioprio_class_to_string(IOPRIO_PRIO_CLASS(c->ioprio)),
1762 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
1764 if (c->cpu_sched_set)
1766 "%sCPUSchedulingPolicy: %s\n"
1767 "%sCPUSchedulingPriority: %i\n"
1768 "%sCPUSchedulingResetOnFork: %s\n",
1769 prefix, sched_policy_to_string(c->cpu_sched_policy),
1770 prefix, c->cpu_sched_priority,
1771 prefix, yes_no(c->cpu_sched_reset_on_fork));
1774 fprintf(f, "%sCPUAffinity:", prefix);
1775 for (i = 0; i < c->cpuset_ncpus; i++)
1776 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
1777 fprintf(f, " %i", i);
1781 if (c->timer_slack_nsec_set)
1782 fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
1785 "%sStandardInput: %s\n"
1786 "%sStandardOutput: %s\n"
1787 "%sStandardError: %s\n",
1788 prefix, exec_input_to_string(c->std_input),
1789 prefix, exec_output_to_string(c->std_output),
1790 prefix, exec_output_to_string(c->std_error));
1796 "%sTTYVHangup: %s\n"
1797 "%sTTYVTDisallocate: %s\n",
1798 prefix, c->tty_path,
1799 prefix, yes_no(c->tty_reset),
1800 prefix, yes_no(c->tty_vhangup),
1801 prefix, yes_no(c->tty_vt_disallocate));
1803 if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || c->std_output == EXEC_OUTPUT_JOURNAL ||
1804 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
1805 c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || c->std_error == EXEC_OUTPUT_JOURNAL ||
1806 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE)
1808 "%sSyslogFacility: %s\n"
1809 "%sSyslogLevel: %s\n",
1810 prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3),
1811 prefix, log_level_to_string(LOG_PRI(c->syslog_priority)));
1813 if (c->capabilities) {
1815 if ((t = cap_to_text(c->capabilities, NULL))) {
1816 fprintf(f, "%sCapabilities: %s\n",
1823 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
1825 (c->secure_bits & SECURE_KEEP_CAPS) ? " keep-caps" : "",
1826 (c->secure_bits & SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
1827 (c->secure_bits & SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
1828 (c->secure_bits & SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
1829 (c->secure_bits & SECURE_NOROOT) ? " noroot" : "",
1830 (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
1832 if (c->capability_bounding_set_drop) {
1834 fprintf(f, "%sCapabilityBoundingSet:", prefix);
1836 for (l = 0; l <= cap_last_cap(); l++)
1837 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
1840 if ((t = cap_to_name(l))) {
1841 fprintf(f, " %s", t);
1850 fprintf(f, "%sUser: %s\n", prefix, c->user);
1852 fprintf(f, "%sGroup: %s\n", prefix, c->group);
1854 if (strv_length(c->supplementary_groups) > 0) {
1855 fprintf(f, "%sSupplementaryGroups:", prefix);
1856 strv_fprintf(f, c->supplementary_groups);
1861 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
1863 if (strv_length(c->read_write_dirs) > 0) {
1864 fprintf(f, "%sReadWriteDirs:", prefix);
1865 strv_fprintf(f, c->read_write_dirs);
1869 if (strv_length(c->read_only_dirs) > 0) {
1870 fprintf(f, "%sReadOnlyDirs:", prefix);
1871 strv_fprintf(f, c->read_only_dirs);
1875 if (strv_length(c->inaccessible_dirs) > 0) {
1876 fprintf(f, "%sInaccessibleDirs:", prefix);
1877 strv_fprintf(f, c->inaccessible_dirs);
1883 "%sKillSignal: SIG%s\n"
1884 "%sSendSIGKILL: %s\n"
1885 "%sIgnoreSIGPIPE: %s\n",
1886 prefix, kill_mode_to_string(c->kill_mode),
1887 prefix, signal_to_string(c->kill_signal),
1888 prefix, yes_no(c->send_sigkill),
1889 prefix, yes_no(c->ignore_sigpipe));
1893 "%sUtmpIdentifier: %s\n",
1894 prefix, c->utmp_id);
1897 void exec_status_start(ExecStatus *s, pid_t pid) {
1902 dual_timestamp_get(&s->start_timestamp);
1905 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
1908 if (s->pid && s->pid != pid)
1912 dual_timestamp_get(&s->exit_timestamp);
1918 if (context->utmp_id)
1919 utmp_put_dead_process(context->utmp_id, pid, code, status);
1921 exec_context_tty_reset(context);
1925 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
1926 char buf[FORMAT_TIMESTAMP_MAX];
1939 prefix, (unsigned long) s->pid);
1941 if (s->start_timestamp.realtime > 0)
1943 "%sStart Timestamp: %s\n",
1944 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
1946 if (s->exit_timestamp.realtime > 0)
1948 "%sExit Timestamp: %s\n"
1950 "%sExit Status: %i\n",
1951 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
1952 prefix, sigchld_code_to_string(s->code),
1956 char *exec_command_line(char **argv) {
1964 STRV_FOREACH(a, argv)
1967 if (!(n = new(char, k)))
1971 STRV_FOREACH(a, argv) {
1978 if (strpbrk(*a, WHITESPACE)) {
1989 /* FIXME: this doesn't really handle arguments that have
1990 * spaces and ticks in them */
1995 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
1997 const char *prefix2;
2006 p2 = strappend(prefix, "\t");
2007 prefix2 = p2 ? p2 : prefix;
2009 cmd = exec_command_line(c->argv);
2012 "%sCommand Line: %s\n",
2013 prefix, cmd ? cmd : strerror(ENOMEM));
2017 exec_status_dump(&c->exec_status, f, prefix2);
2022 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2028 LIST_FOREACH(command, c, c)
2029 exec_command_dump(c, f, prefix);
2032 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2039 /* It's kind of important, that we keep the order here */
2040 LIST_FIND_TAIL(ExecCommand, command, *l, end);
2041 LIST_INSERT_AFTER(ExecCommand, command, *l, end, e);
2046 int exec_command_set(ExecCommand *c, const char *path, ...) {
2054 l = strv_new_ap(path, ap);
2060 if (!(p = strdup(path))) {
2074 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2075 [EXEC_INPUT_NULL] = "null",
2076 [EXEC_INPUT_TTY] = "tty",
2077 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2078 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2079 [EXEC_INPUT_SOCKET] = "socket"
2082 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2084 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2085 [EXEC_OUTPUT_INHERIT] = "inherit",
2086 [EXEC_OUTPUT_NULL] = "null",
2087 [EXEC_OUTPUT_TTY] = "tty",
2088 [EXEC_OUTPUT_SYSLOG] = "syslog",
2089 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2090 [EXEC_OUTPUT_KMSG] = "kmsg",
2091 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2092 [EXEC_OUTPUT_JOURNAL] = "journal",
2093 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2094 [EXEC_OUTPUT_SOCKET] = "socket"
2097 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
2099 static const char* const kill_mode_table[_KILL_MODE_MAX] = {
2100 [KILL_CONTROL_GROUP] = "control-group",
2101 [KILL_PROCESS] = "process",
2102 [KILL_NONE] = "none"
2105 DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
2107 static const char* const kill_who_table[_KILL_WHO_MAX] = {
2108 [KILL_MAIN] = "main",
2109 [KILL_CONTROL] = "control",
2113 DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);