1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
45 #include <security/pam_appl.h>
49 #include <selinux/selinux.h>
57 #include <sys/apparmor.h>
63 #include "capability.h"
66 #include "sd-messages.h"
68 #include "securebits.h"
69 #include "namespace.h"
70 #include "exit-status.h"
72 #include "utmp-wtmp.h"
74 #include "path-util.h"
79 #include "selinux-util.h"
80 #include "errno-list.h"
83 #include "apparmor-util.h"
84 #include "smack-util.h"
85 #include "bus-endpoint.h"
90 #include "seccomp-util.h"
93 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
94 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
96 /* This assumes there is a 'tty' group */
99 #define SNDBUF_SIZE (8*1024*1024)
101 static int shift_fds(int fds[], unsigned n_fds) {
102 int start, restart_from;
107 /* Modifies the fds array! (sorts it) */
117 for (i = start; i < (int) n_fds; i++) {
120 /* Already at right index? */
124 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
130 /* Hmm, the fd we wanted isn't free? Then
131 * let's remember that and try again from here */
132 if (nfd != i+3 && restart_from < 0)
136 if (restart_from < 0)
139 start = restart_from;
145 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
154 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
156 for (i = 0; i < n_fds; i++) {
158 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
161 /* We unconditionally drop FD_CLOEXEC from the fds,
162 * since after all we want to pass these fds to our
165 if ((r = fd_cloexec(fds[i], false)) < 0)
172 _pure_ static const char *tty_path(const ExecContext *context) {
175 if (context->tty_path)
176 return context->tty_path;
178 return "/dev/console";
181 static void exec_context_tty_reset(const ExecContext *context) {
184 if (context->tty_vhangup)
185 terminal_vhangup(tty_path(context));
187 if (context->tty_reset)
188 reset_terminal(tty_path(context));
190 if (context->tty_vt_disallocate && context->tty_path)
191 vt_disallocate(context->tty_path);
194 static bool is_terminal_output(ExecOutput o) {
196 o == EXEC_OUTPUT_TTY ||
197 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
198 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
199 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
202 static int open_null_as(int flags, int nfd) {
207 fd = open("/dev/null", flags|O_NOCTTY);
212 r = dup2(fd, nfd) < 0 ? -errno : nfd;
220 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
221 union sockaddr_union sa = {
222 .un.sun_family = AF_UNIX,
223 .un.sun_path = "/run/systemd/journal/stdout",
225 uid_t olduid = UID_INVALID;
226 gid_t oldgid = GID_INVALID;
229 if (gid != GID_INVALID) {
237 if (uid != UID_INVALID) {
247 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
251 /* If we fail to restore the uid or gid, things will likely
252 fail later on. This should only happen if an LSM interferes. */
254 if (uid != UID_INVALID)
255 (void) seteuid(olduid);
258 if (gid != GID_INVALID)
259 (void) setegid(oldgid);
264 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
268 assert(output < _EXEC_OUTPUT_MAX);
272 fd = socket(AF_UNIX, SOCK_STREAM, 0);
276 r = connect_journal_socket(fd, uid, gid);
280 if (shutdown(fd, SHUT_RD) < 0) {
285 fd_inc_sndbuf(fd, SNDBUF_SIZE);
295 context->syslog_identifier ? context->syslog_identifier : ident,
297 context->syslog_priority,
298 !!context->syslog_level_prefix,
299 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
300 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
301 is_terminal_output(output));
304 r = dup2(fd, nfd) < 0 ? -errno : nfd;
311 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
317 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
321 r = dup2(fd, nfd) < 0 ? -errno : nfd;
329 static bool is_terminal_input(ExecInput i) {
331 i == EXEC_INPUT_TTY ||
332 i == EXEC_INPUT_TTY_FORCE ||
333 i == EXEC_INPUT_TTY_FAIL;
336 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
338 if (is_terminal_input(std_input) && !apply_tty_stdin)
339 return EXEC_INPUT_NULL;
341 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
342 return EXEC_INPUT_NULL;
347 static int fixup_output(ExecOutput std_output, int socket_fd) {
349 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
350 return EXEC_OUTPUT_INHERIT;
355 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
360 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
364 case EXEC_INPUT_NULL:
365 return open_null_as(O_RDONLY, STDIN_FILENO);
368 case EXEC_INPUT_TTY_FORCE:
369 case EXEC_INPUT_TTY_FAIL: {
372 fd = acquire_terminal(tty_path(context),
373 i == EXEC_INPUT_TTY_FAIL,
374 i == EXEC_INPUT_TTY_FORCE,
380 if (fd != STDIN_FILENO) {
381 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
389 case EXEC_INPUT_SOCKET:
390 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
393 assert_not_reached("Unknown input type");
397 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
405 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
406 o = fixup_output(context->std_output, socket_fd);
408 if (fileno == STDERR_FILENO) {
410 e = fixup_output(context->std_error, socket_fd);
412 /* This expects the input and output are already set up */
414 /* Don't change the stderr file descriptor if we inherit all
415 * the way and are not on a tty */
416 if (e == EXEC_OUTPUT_INHERIT &&
417 o == EXEC_OUTPUT_INHERIT &&
418 i == EXEC_INPUT_NULL &&
419 !is_terminal_input(context->std_input) &&
423 /* Duplicate from stdout if possible */
424 if (e == o || e == EXEC_OUTPUT_INHERIT)
425 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
429 } else if (o == EXEC_OUTPUT_INHERIT) {
430 /* If input got downgraded, inherit the original value */
431 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
432 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
434 /* If the input is connected to anything that's not a /dev/null, inherit that... */
435 if (i != EXEC_INPUT_NULL)
436 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
438 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
442 /* We need to open /dev/null here anew, to get the right access mode. */
443 return open_null_as(O_WRONLY, fileno);
448 case EXEC_OUTPUT_NULL:
449 return open_null_as(O_WRONLY, fileno);
451 case EXEC_OUTPUT_TTY:
452 if (is_terminal_input(i))
453 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
455 /* We don't reset the terminal if this is just about output */
456 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
458 case EXEC_OUTPUT_SYSLOG:
459 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
460 case EXEC_OUTPUT_KMSG:
461 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
462 case EXEC_OUTPUT_JOURNAL:
463 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
464 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
466 log_unit_struct(unit_id,
468 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
469 fileno == STDOUT_FILENO ? "stdout" : "stderr",
470 unit_id, strerror(-r)),
473 r = open_null_as(O_WRONLY, fileno);
477 case EXEC_OUTPUT_SOCKET:
478 assert(socket_fd >= 0);
479 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
482 assert_not_reached("Unknown error type");
486 static int chown_terminal(int fd, uid_t uid) {
491 /* This might fail. What matters are the results. */
492 (void) fchown(fd, uid, -1);
493 (void) fchmod(fd, TTY_MODE);
495 if (fstat(fd, &st) < 0)
498 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
504 static int setup_confirm_stdio(int *_saved_stdin,
505 int *_saved_stdout) {
506 int fd = -1, saved_stdin, saved_stdout = -1, r;
508 assert(_saved_stdin);
509 assert(_saved_stdout);
511 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
515 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
516 if (saved_stdout < 0) {
521 fd = acquire_terminal(
526 DEFAULT_CONFIRM_USEC);
532 r = chown_terminal(fd, getuid());
536 if (dup2(fd, STDIN_FILENO) < 0) {
541 if (dup2(fd, STDOUT_FILENO) < 0) {
549 *_saved_stdin = saved_stdin;
550 *_saved_stdout = saved_stdout;
555 safe_close(saved_stdout);
556 safe_close(saved_stdin);
562 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
563 _cleanup_close_ int fd = -1;
568 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
572 va_start(ap, format);
573 vdprintf(fd, format, ap);
579 static int restore_confirm_stdio(int *saved_stdin,
585 assert(saved_stdout);
589 if (*saved_stdin >= 0)
590 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
593 if (*saved_stdout >= 0)
594 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
597 safe_close(*saved_stdin);
598 safe_close(*saved_stdout);
603 static int ask_for_confirmation(char *response, char **argv) {
604 int saved_stdout = -1, saved_stdin = -1, r;
605 _cleanup_free_ char *line = NULL;
607 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
611 line = exec_command_line(argv);
615 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
617 restore_confirm_stdio(&saved_stdin, &saved_stdout);
622 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
623 bool keep_groups = false;
628 /* Lookup and set GID and supplementary group list. Here too
629 * we avoid NSS lookups for gid=0. */
631 if (context->group || username) {
633 if (context->group) {
634 const char *g = context->group;
636 if ((r = get_group_creds(&g, &gid)) < 0)
640 /* First step, initialize groups from /etc/groups */
641 if (username && gid != 0) {
642 if (initgroups(username, gid) < 0)
648 /* Second step, set our gids */
649 if (setresgid(gid, gid, gid) < 0)
653 if (context->supplementary_groups) {
658 /* Final step, initialize any manually set supplementary groups */
659 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
661 if (!(gids = new(gid_t, ngroups_max)))
665 if ((k = getgroups(ngroups_max, gids)) < 0) {
672 STRV_FOREACH(i, context->supplementary_groups) {
675 if (k >= ngroups_max) {
681 r = get_group_creds(&g, gids+k);
690 if (setgroups(k, gids) < 0) {
701 static int enforce_user(const ExecContext *context, uid_t uid) {
704 /* Sets (but doesn't lookup) the uid and make sure we keep the
705 * capabilities while doing so. */
707 if (context->capabilities) {
708 _cleanup_cap_free_ cap_t d = NULL;
709 static const cap_value_t bits[] = {
710 CAP_SETUID, /* Necessary so that we can run setresuid() below */
711 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
714 /* First step: If we need to keep capabilities but
715 * drop privileges we need to make sure we keep our
716 * caps, while we drop privileges. */
718 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
720 if (prctl(PR_GET_SECUREBITS) != sb)
721 if (prctl(PR_SET_SECUREBITS, sb) < 0)
725 /* Second step: set the capabilities. This will reduce
726 * the capabilities to the minimum we need. */
728 d = cap_dup(context->capabilities);
732 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
733 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
736 if (cap_set_proc(d) < 0)
740 /* Third step: actually set the uids */
741 if (setresuid(uid, uid, uid) < 0)
744 /* At this point we should have all necessary capabilities but
745 are otherwise a normal user. However, the caps might got
746 corrupted due to the setresuid() so we need clean them up
747 later. This is done outside of this call. */
754 static int null_conv(
756 const struct pam_message **msg,
757 struct pam_response **resp,
760 /* We don't support conversations */
765 static int setup_pam(
771 int fds[], unsigned n_fds) {
773 static const struct pam_conv conv = {
778 pam_handle_t *handle = NULL;
780 int pam_code = PAM_SUCCESS;
783 bool close_session = false;
784 pid_t pam_pid = 0, parent_pid;
791 /* We set up PAM in the parent process, then fork. The child
792 * will then stay around until killed via PR_GET_PDEATHSIG or
793 * systemd via the cgroup logic. It will then remove the PAM
794 * session again. The parent process will exec() the actual
795 * daemon. We do things this way to ensure that the main PID
796 * of the daemon is the one we initially fork()ed. */
798 if (log_get_max_level() < LOG_DEBUG)
801 pam_code = pam_start(name, user, &conv, &handle);
802 if (pam_code != PAM_SUCCESS) {
808 pam_code = pam_set_item(handle, PAM_TTY, tty);
809 if (pam_code != PAM_SUCCESS)
813 pam_code = pam_acct_mgmt(handle, flags);
814 if (pam_code != PAM_SUCCESS)
817 pam_code = pam_open_session(handle, flags);
818 if (pam_code != PAM_SUCCESS)
821 close_session = true;
823 e = pam_getenvlist(handle);
825 pam_code = PAM_BUF_ERR;
829 /* Block SIGTERM, so that we know that it won't get lost in
831 if (sigemptyset(&ss) < 0 ||
832 sigaddset(&ss, SIGTERM) < 0 ||
833 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
836 parent_pid = getpid();
846 /* The child's job is to reset the PAM session on
849 /* This string must fit in 10 chars (i.e. the length
850 * of "/sbin/init"), to look pretty in /bin/ps */
851 rename_process("(sd-pam)");
853 /* Make sure we don't keep open the passed fds in this
854 child. We assume that otherwise only those fds are
855 open here that have been opened by PAM. */
856 close_many(fds, n_fds);
858 /* Drop privileges - we don't need any to pam_close_session
859 * and this will make PR_SET_PDEATHSIG work in most cases.
860 * If this fails, ignore the error - but expect sd-pam threads
861 * to fail to exit normally */
862 if (setresuid(uid, uid, uid) < 0)
863 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
865 /* Wait until our parent died. This will only work if
866 * the above setresuid() succeeds, otherwise the kernel
867 * will not allow unprivileged parents kill their privileged
868 * children this way. We rely on the control groups kill logic
869 * to do the rest for us. */
870 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
873 /* Check if our parent process might already have
875 if (getppid() == parent_pid) {
877 if (sigwait(&ss, &sig) < 0) {
884 assert(sig == SIGTERM);
889 /* If our parent died we'll end the session */
890 if (getppid() != parent_pid) {
891 pam_code = pam_close_session(handle, flags);
892 if (pam_code != PAM_SUCCESS)
899 pam_end(handle, pam_code | flags);
903 /* If the child was forked off successfully it will do all the
904 * cleanups, so forget about the handle here. */
907 /* Unblock SIGTERM again in the parent */
908 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
911 /* We close the log explicitly here, since the PAM modules
912 * might have opened it, but we don't want this fd around. */
921 if (pam_code != PAM_SUCCESS) {
922 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
923 err = -EPERM; /* PAM errors do not map to errno */
925 log_error_errno(errno, "PAM failed: %m");
931 pam_code = pam_close_session(handle, flags);
933 pam_end(handle, pam_code | flags);
941 kill(pam_pid, SIGTERM);
942 kill(pam_pid, SIGCONT);
949 static void rename_process_from_path(const char *path) {
950 char process_name[11];
954 /* This resulting string must fit in 10 chars (i.e. the length
955 * of "/sbin/init") to look pretty in /bin/ps */
959 rename_process("(...)");
965 /* The end of the process name is usually more
966 * interesting, since the first bit might just be
972 process_name[0] = '(';
973 memcpy(process_name+1, p, l);
974 process_name[1+l] = ')';
975 process_name[1+l+1] = 0;
977 rename_process(process_name);
982 static int apply_seccomp(const ExecContext *c) {
983 uint32_t negative_action, action;
984 scmp_filter_ctx *seccomp;
991 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
993 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
997 if (c->syscall_archs) {
999 SET_FOREACH(id, c->syscall_archs, i) {
1000 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1008 r = seccomp_add_secondary_archs(seccomp);
1013 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1014 SET_FOREACH(id, c->syscall_filter, i) {
1015 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1020 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1024 r = seccomp_load(seccomp);
1027 seccomp_release(seccomp);
1031 static int apply_address_families(const ExecContext *c) {
1032 scmp_filter_ctx *seccomp;
1038 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1042 r = seccomp_add_secondary_archs(seccomp);
1046 if (c->address_families_whitelist) {
1047 int af, first = 0, last = 0;
1050 /* If this is a whitelist, we first block the address
1051 * families that are out of range and then everything
1052 * that is not in the set. First, we find the lowest
1053 * and highest address family in the set. */
1055 SET_FOREACH(afp, c->address_families, i) {
1056 af = PTR_TO_INT(afp);
1058 if (af <= 0 || af >= af_max())
1061 if (first == 0 || af < first)
1064 if (last == 0 || af > last)
1068 assert((first == 0) == (last == 0));
1072 /* No entries in the valid range, block everything */
1073 r = seccomp_rule_add(
1075 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1083 /* Block everything below the first entry */
1084 r = seccomp_rule_add(
1086 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1089 SCMP_A0(SCMP_CMP_LT, first));
1093 /* Block everything above the last entry */
1094 r = seccomp_rule_add(
1096 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1099 SCMP_A0(SCMP_CMP_GT, last));
1103 /* Block everything between the first and last
1105 for (af = 1; af < af_max(); af++) {
1107 if (set_contains(c->address_families, INT_TO_PTR(af)))
1110 r = seccomp_rule_add(
1112 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1115 SCMP_A0(SCMP_CMP_EQ, af));
1124 /* If this is a blacklist, then generate one rule for
1125 * each address family that are then combined in OR
1128 SET_FOREACH(af, c->address_families, i) {
1130 r = seccomp_rule_add(
1132 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1135 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1141 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1145 r = seccomp_load(seccomp);
1148 seccomp_release(seccomp);
1154 static void do_idle_pipe_dance(int idle_pipe[4]) {
1158 safe_close(idle_pipe[1]);
1159 safe_close(idle_pipe[2]);
1161 if (idle_pipe[0] >= 0) {
1164 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1166 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1167 /* Signal systemd that we are bored and want to continue. */
1168 write(idle_pipe[3], "x", 1);
1170 /* Wait for systemd to react to the signal above. */
1171 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1174 safe_close(idle_pipe[0]);
1178 safe_close(idle_pipe[3]);
1181 static int build_environment(
1182 const ExecContext *c,
1184 usec_t watchdog_usec,
1186 const char *username,
1190 _cleanup_strv_free_ char **our_env = NULL;
1197 our_env = new0(char*, 10);
1202 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1204 our_env[n_env++] = x;
1206 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1208 our_env[n_env++] = x;
1211 if (watchdog_usec > 0) {
1212 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1214 our_env[n_env++] = x;
1216 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1218 our_env[n_env++] = x;
1222 x = strappend("HOME=", home);
1225 our_env[n_env++] = x;
1229 x = strappend("LOGNAME=", username);
1232 our_env[n_env++] = x;
1234 x = strappend("USER=", username);
1237 our_env[n_env++] = x;
1241 x = strappend("SHELL=", shell);
1244 our_env[n_env++] = x;
1247 if (is_terminal_input(c->std_input) ||
1248 c->std_output == EXEC_OUTPUT_TTY ||
1249 c->std_error == EXEC_OUTPUT_TTY ||
1252 x = strdup(default_term_for_tty(tty_path(c)));
1255 our_env[n_env++] = x;
1258 our_env[n_env++] = NULL;
1259 assert(n_env <= 10);
1267 static int exec_child(
1268 ExecCommand *command,
1269 const ExecContext *context,
1270 const ExecParameters *params,
1271 ExecRuntime *runtime,
1274 int *fds, unsigned n_fds,
1278 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1279 _cleanup_free_ char *mac_selinux_context_net = NULL;
1280 const char *username = NULL, *home = NULL, *shell = NULL;
1281 unsigned n_dont_close = 0;
1282 int dont_close[n_fds + 4];
1283 uid_t uid = UID_INVALID;
1284 gid_t gid = GID_INVALID;
1290 assert(exit_status);
1292 rename_process_from_path(command->path);
1294 /* We reset exactly these signals, since they are the
1295 * only ones we set to SIG_IGN in the main daemon. All
1296 * others we leave untouched because we set them to
1297 * SIG_DFL or a valid handler initially, both of which
1298 * will be demoted to SIG_DFL. */
1299 default_signals(SIGNALS_CRASH_HANDLER,
1300 SIGNALS_IGNORE, -1);
1302 if (context->ignore_sigpipe)
1303 ignore_signals(SIGPIPE, -1);
1305 r = reset_signal_mask();
1307 *exit_status = EXIT_SIGNAL_MASK;
1311 if (params->idle_pipe)
1312 do_idle_pipe_dance(params->idle_pipe);
1314 /* Close sockets very early to make sure we don't
1315 * block init reexecution because it cannot bind its
1321 dont_close[n_dont_close++] = socket_fd;
1323 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1324 n_dont_close += n_fds;
1326 if (params->bus_endpoint_fd >= 0)
1327 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1329 if (runtime->netns_storage_socket[0] >= 0)
1330 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1331 if (runtime->netns_storage_socket[1] >= 0)
1332 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1335 r = close_all_fds(dont_close, n_dont_close);
1337 *exit_status = EXIT_FDS;
1341 if (!context->same_pgrp)
1343 *exit_status = EXIT_SETSID;
1347 exec_context_tty_reset(context);
1349 if (params->confirm_spawn) {
1352 r = ask_for_confirmation(&response, argv);
1353 if (r == -ETIMEDOUT)
1354 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1356 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1357 else if (response == 's') {
1358 write_confirm_message("Skipping execution.\n");
1359 *exit_status = EXIT_CONFIRM;
1361 } else if (response == 'n') {
1362 write_confirm_message("Failing execution.\n");
1368 if (context->user) {
1369 username = context->user;
1370 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1372 *exit_status = EXIT_USER;
1377 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1378 * must sure to drop O_NONBLOCK */
1380 fd_nonblock(socket_fd, false);
1382 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1384 *exit_status = EXIT_STDIN;
1388 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1390 *exit_status = EXIT_STDOUT;
1394 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1396 *exit_status = EXIT_STDERR;
1400 if (params->cgroup_path) {
1401 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1403 *exit_status = EXIT_CGROUP;
1408 if (context->oom_score_adjust_set) {
1409 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1411 /* When we can't make this change due to EPERM, then
1412 * let's silently skip over it. User namespaces
1413 * prohibit write access to this file, and we
1414 * shouldn't trip up over that. */
1416 sprintf(t, "%i", context->oom_score_adjust);
1417 r = write_string_file("/proc/self/oom_score_adj", t);
1418 if (r == -EPERM || r == -EACCES) {
1420 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1423 *exit_status = EXIT_OOM_ADJUST;
1428 if (context->nice_set)
1429 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1430 *exit_status = EXIT_NICE;
1434 if (context->cpu_sched_set) {
1435 struct sched_param param = {
1436 .sched_priority = context->cpu_sched_priority,
1439 r = sched_setscheduler(0,
1440 context->cpu_sched_policy |
1441 (context->cpu_sched_reset_on_fork ?
1442 SCHED_RESET_ON_FORK : 0),
1445 *exit_status = EXIT_SETSCHEDULER;
1450 if (context->cpuset)
1451 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1452 *exit_status = EXIT_CPUAFFINITY;
1456 if (context->ioprio_set)
1457 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1458 *exit_status = EXIT_IOPRIO;
1462 if (context->timer_slack_nsec != NSEC_INFINITY)
1463 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1464 *exit_status = EXIT_TIMERSLACK;
1468 if (context->personality != 0xffffffffUL)
1469 if (personality(context->personality) < 0) {
1470 *exit_status = EXIT_PERSONALITY;
1474 if (context->utmp_id)
1475 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1477 if (context->user && is_terminal_input(context->std_input)) {
1478 r = chown_terminal(STDIN_FILENO, uid);
1480 *exit_status = EXIT_STDIN;
1486 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1487 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1489 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1491 *exit_status = EXIT_BUS_ENDPOINT;
1497 /* If delegation is enabled we'll pass ownership of the cgroup
1498 * (but only in systemd's own controller hierarchy!) to the
1499 * user of the new process. */
1500 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1501 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1503 *exit_status = EXIT_CGROUP;
1508 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1510 *exit_status = EXIT_CGROUP;
1515 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1518 STRV_FOREACH(rt, context->runtime_directory) {
1519 _cleanup_free_ char *p;
1521 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1523 *exit_status = EXIT_RUNTIME_DIRECTORY;
1527 r = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1529 *exit_status = EXIT_RUNTIME_DIRECTORY;
1535 if (params->apply_permissions) {
1536 r = enforce_groups(context, username, gid);
1538 *exit_status = EXIT_GROUP;
1543 umask(context->umask);
1546 if (params->apply_permissions && context->pam_name && username) {
1547 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1549 *exit_status = EXIT_PAM;
1555 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1556 r = setup_netns(runtime->netns_storage_socket);
1558 *exit_status = EXIT_NETWORK;
1563 if (!strv_isempty(context->read_write_dirs) ||
1564 !strv_isempty(context->read_only_dirs) ||
1565 !strv_isempty(context->inaccessible_dirs) ||
1566 context->mount_flags != 0 ||
1567 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1568 params->bus_endpoint_path ||
1569 context->private_devices ||
1570 context->protect_system != PROTECT_SYSTEM_NO ||
1571 context->protect_home != PROTECT_HOME_NO) {
1573 char *tmp = NULL, *var = NULL;
1575 /* The runtime struct only contains the parent
1576 * of the private /tmp, which is
1577 * non-accessible to world users. Inside of it
1578 * there's a /tmp that is sticky, and that's
1579 * the one we want to use here. */
1581 if (context->private_tmp && runtime) {
1582 if (runtime->tmp_dir)
1583 tmp = strjoina(runtime->tmp_dir, "/tmp");
1584 if (runtime->var_tmp_dir)
1585 var = strjoina(runtime->var_tmp_dir, "/tmp");
1588 r = setup_namespace(
1589 context->read_write_dirs,
1590 context->read_only_dirs,
1591 context->inaccessible_dirs,
1594 params->bus_endpoint_path,
1595 context->private_devices,
1596 context->protect_home,
1597 context->protect_system,
1598 context->mount_flags);
1600 /* If we couldn't set up the namespace this is
1601 * probably due to a missing capability. In this case,
1602 * silently proceeed. */
1603 if (r == -EPERM || r == -EACCES) {
1605 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1608 *exit_status = EXIT_NAMESPACE;
1613 if (params->apply_chroot) {
1614 if (context->root_directory)
1615 if (chroot(context->root_directory) < 0) {
1616 *exit_status = EXIT_CHROOT;
1620 if (chdir(context->working_directory ?: "/") < 0 &&
1621 !context->working_directory_missing_ok) {
1622 *exit_status = EXIT_CHDIR;
1626 _cleanup_free_ char *d = NULL;
1628 if (asprintf(&d, "%s/%s",
1629 context->root_directory ? context->root_directory : "",
1630 context->working_directory ? context->working_directory : "") < 0) {
1631 *exit_status = EXIT_MEMORY;
1636 *exit_status = EXIT_CHDIR;
1642 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1643 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1645 *exit_status = EXIT_SELINUX_CONTEXT;
1651 /* We repeat the fd closing here, to make sure that
1652 * nothing is leaked from the PAM modules. Note that
1653 * we are more aggressive this time since socket_fd
1654 * and the netns fds we don't need anymore. The custom
1655 * endpoint fd was needed to upload the policy and can
1656 * now be closed as well. */
1657 r = close_all_fds(fds, n_fds);
1659 r = shift_fds(fds, n_fds);
1661 r = flags_fds(fds, n_fds, context->non_blocking);
1663 *exit_status = EXIT_FDS;
1667 if (params->apply_permissions) {
1669 for (i = 0; i < _RLIMIT_MAX; i++) {
1670 if (!context->rlimit[i])
1673 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1674 *exit_status = EXIT_LIMITS;
1679 if (context->capability_bounding_set_drop) {
1680 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1682 *exit_status = EXIT_CAPABILITIES;
1688 if (context->smack_process_label) {
1689 r = mac_smack_apply_pid(0, context->smack_process_label);
1691 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1697 if (context->user) {
1698 r = enforce_user(context, uid);
1700 *exit_status = EXIT_USER;
1705 /* PR_GET_SECUREBITS is not privileged, while
1706 * PR_SET_SECUREBITS is. So to suppress
1707 * potential EPERMs we'll try not to call
1708 * PR_SET_SECUREBITS unless necessary. */
1709 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1710 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1711 *exit_status = EXIT_SECUREBITS;
1715 if (context->capabilities)
1716 if (cap_set_proc(context->capabilities) < 0) {
1717 *exit_status = EXIT_CAPABILITIES;
1721 if (context->no_new_privileges)
1722 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1723 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1728 if (context->address_families_whitelist ||
1729 !set_isempty(context->address_families)) {
1730 r = apply_address_families(context);
1732 *exit_status = EXIT_ADDRESS_FAMILIES;
1737 if (context->syscall_whitelist ||
1738 !set_isempty(context->syscall_filter) ||
1739 !set_isempty(context->syscall_archs)) {
1740 r = apply_seccomp(context);
1742 *exit_status = EXIT_SECCOMP;
1749 if (mac_selinux_use()) {
1750 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1753 r = setexeccon(exec_context);
1755 *exit_status = EXIT_SELINUX_CONTEXT;
1762 #ifdef HAVE_APPARMOR
1763 if (context->apparmor_profile && mac_apparmor_use()) {
1764 r = aa_change_onexec(context->apparmor_profile);
1765 if (r < 0 && !context->apparmor_profile_ignore) {
1766 *exit_status = EXIT_APPARMOR_PROFILE;
1773 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1775 *exit_status = EXIT_MEMORY;
1779 final_env = strv_env_merge(5,
1780 params->environment,
1782 context->environment,
1787 *exit_status = EXIT_MEMORY;
1791 final_argv = replace_env_argv(argv, final_env);
1793 *exit_status = EXIT_MEMORY;
1797 final_env = strv_env_clean(final_env);
1799 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1800 _cleanup_free_ char *line;
1802 line = exec_command_line(final_argv);
1805 log_unit_struct(params->unit_id,
1807 "EXECUTABLE=%s", command->path,
1808 LOG_MESSAGE("Executing: %s", line),
1813 execve(command->path, final_argv, final_env);
1814 *exit_status = EXIT_EXEC;
1818 int exec_spawn(ExecCommand *command,
1819 const ExecContext *context,
1820 const ExecParameters *params,
1821 ExecRuntime *runtime,
1824 _cleanup_strv_free_ char **files_env = NULL;
1825 int *fds = NULL; unsigned n_fds = 0;
1826 _cleanup_free_ char *line = NULL;
1835 assert(params->fds || params->n_fds <= 0);
1837 if (context->std_input == EXEC_INPUT_SOCKET ||
1838 context->std_output == EXEC_OUTPUT_SOCKET ||
1839 context->std_error == EXEC_OUTPUT_SOCKET) {
1841 if (params->n_fds != 1) {
1842 log_unit_error(params->unit_id, "Got more than one socket.");
1846 socket_fd = params->fds[0];
1850 n_fds = params->n_fds;
1853 r = exec_context_load_environment(context, params->unit_id, &files_env);
1855 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1857 argv = params->argv ?: command->argv;
1858 line = exec_command_line(argv);
1862 log_unit_struct(params->unit_id,
1864 "EXECUTABLE=%s", command->path,
1865 LOG_MESSAGE("About to execute: %s", line),
1869 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1874 r = exec_child(command,
1885 log_unit_struct(params->unit_id,
1887 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1888 "EXECUTABLE=%s", command->path,
1889 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1890 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1891 command->path, strerror(-r)),
1899 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1901 /* We add the new process to the cgroup both in the child (so
1902 * that we can be sure that no user code is ever executed
1903 * outside of the cgroup) and in the parent (so that we can be
1904 * sure that when we kill the cgroup the process will be
1906 if (params->cgroup_path)
1907 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1909 exec_status_start(&command->exec_status, pid);
1915 void exec_context_init(ExecContext *c) {
1919 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1920 c->cpu_sched_policy = SCHED_OTHER;
1921 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1922 c->syslog_level_prefix = true;
1923 c->ignore_sigpipe = true;
1924 c->timer_slack_nsec = NSEC_INFINITY;
1925 c->personality = 0xffffffffUL;
1926 c->runtime_directory_mode = 0755;
1929 void exec_context_done(ExecContext *c) {
1934 strv_free(c->environment);
1935 c->environment = NULL;
1937 strv_free(c->environment_files);
1938 c->environment_files = NULL;
1940 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1942 c->rlimit[l] = NULL;
1945 free(c->working_directory);
1946 c->working_directory = NULL;
1947 free(c->root_directory);
1948 c->root_directory = NULL;
1953 free(c->syslog_identifier);
1954 c->syslog_identifier = NULL;
1962 strv_free(c->supplementary_groups);
1963 c->supplementary_groups = NULL;
1968 if (c->capabilities) {
1969 cap_free(c->capabilities);
1970 c->capabilities = NULL;
1973 strv_free(c->read_only_dirs);
1974 c->read_only_dirs = NULL;
1976 strv_free(c->read_write_dirs);
1977 c->read_write_dirs = NULL;
1979 strv_free(c->inaccessible_dirs);
1980 c->inaccessible_dirs = NULL;
1983 CPU_FREE(c->cpuset);
1988 free(c->selinux_context);
1989 c->selinux_context = NULL;
1991 free(c->apparmor_profile);
1992 c->apparmor_profile = NULL;
1994 set_free(c->syscall_filter);
1995 c->syscall_filter = NULL;
1997 set_free(c->syscall_archs);
1998 c->syscall_archs = NULL;
2000 set_free(c->address_families);
2001 c->address_families = NULL;
2003 strv_free(c->runtime_directory);
2004 c->runtime_directory = NULL;
2006 bus_endpoint_free(c->bus_endpoint);
2007 c->bus_endpoint = NULL;
2010 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2015 if (!runtime_prefix)
2018 STRV_FOREACH(i, c->runtime_directory) {
2019 _cleanup_free_ char *p;
2021 p = strjoin(runtime_prefix, "/", *i, NULL);
2025 /* We execute this synchronously, since we need to be
2026 * sure this is gone when we start the service
2028 rm_rf(p, false, true, false);
2034 void exec_command_done(ExecCommand *c) {
2044 void exec_command_done_array(ExecCommand *c, unsigned n) {
2047 for (i = 0; i < n; i++)
2048 exec_command_done(c+i);
2051 ExecCommand* exec_command_free_list(ExecCommand *c) {
2055 LIST_REMOVE(command, c, i);
2056 exec_command_done(i);
2063 void exec_command_free_array(ExecCommand **c, unsigned n) {
2066 for (i = 0; i < n; i++)
2067 c[i] = exec_command_free_list(c[i]);
2070 typedef struct InvalidEnvInfo {
2071 const char *unit_id;
2075 static void invalid_env(const char *p, void *userdata) {
2076 InvalidEnvInfo *info = userdata;
2078 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2081 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2082 char **i, **r = NULL;
2087 STRV_FOREACH(i, c->environment_files) {
2090 bool ignore = false;
2092 _cleanup_globfree_ glob_t pglob = {};
2102 if (!path_is_absolute(fn)) {
2110 /* Filename supports globbing, take all matching files */
2112 if (glob(fn, 0, NULL, &pglob) != 0) {
2117 return errno ? -errno : -EINVAL;
2119 count = pglob.gl_pathc;
2127 for (n = 0; n < count; n++) {
2128 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2136 /* Log invalid environment variables with filename */
2138 InvalidEnvInfo info = {
2140 .path = pglob.gl_pathv[n]
2143 p = strv_env_clean_with_callback(p, invalid_env, &info);
2151 m = strv_env_merge(2, r, p);
2167 static bool tty_may_match_dev_console(const char *tty) {
2168 _cleanup_free_ char *active = NULL;
2171 if (startswith(tty, "/dev/"))
2174 /* trivial identity? */
2175 if (streq(tty, "console"))
2178 console = resolve_dev_console(&active);
2179 /* if we could not resolve, assume it may */
2183 /* "tty0" means the active VC, so it may be the same sometimes */
2184 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2187 bool exec_context_may_touch_console(ExecContext *ec) {
2188 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2189 is_terminal_input(ec->std_input) ||
2190 is_terminal_output(ec->std_output) ||
2191 is_terminal_output(ec->std_error)) &&
2192 tty_may_match_dev_console(tty_path(ec));
2195 static void strv_fprintf(FILE *f, char **l) {
2201 fprintf(f, " %s", *g);
2204 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2211 prefix = strempty(prefix);
2215 "%sWorkingDirectory: %s\n"
2216 "%sRootDirectory: %s\n"
2217 "%sNonBlocking: %s\n"
2218 "%sPrivateTmp: %s\n"
2219 "%sPrivateNetwork: %s\n"
2220 "%sPrivateDevices: %s\n"
2221 "%sProtectHome: %s\n"
2222 "%sProtectSystem: %s\n"
2223 "%sIgnoreSIGPIPE: %s\n",
2225 prefix, c->working_directory ? c->working_directory : "/",
2226 prefix, c->root_directory ? c->root_directory : "/",
2227 prefix, yes_no(c->non_blocking),
2228 prefix, yes_no(c->private_tmp),
2229 prefix, yes_no(c->private_network),
2230 prefix, yes_no(c->private_devices),
2231 prefix, protect_home_to_string(c->protect_home),
2232 prefix, protect_system_to_string(c->protect_system),
2233 prefix, yes_no(c->ignore_sigpipe));
2235 STRV_FOREACH(e, c->environment)
2236 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2238 STRV_FOREACH(e, c->environment_files)
2239 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2246 if (c->oom_score_adjust_set)
2248 "%sOOMScoreAdjust: %i\n",
2249 prefix, c->oom_score_adjust);
2251 for (i = 0; i < RLIM_NLIMITS; i++)
2253 fprintf(f, "%s%s: "RLIM_FMT"\n",
2254 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2256 if (c->ioprio_set) {
2257 _cleanup_free_ char *class_str = NULL;
2259 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2261 "%sIOSchedulingClass: %s\n"
2262 "%sIOPriority: %i\n",
2263 prefix, strna(class_str),
2264 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2267 if (c->cpu_sched_set) {
2268 _cleanup_free_ char *policy_str = NULL;
2270 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2272 "%sCPUSchedulingPolicy: %s\n"
2273 "%sCPUSchedulingPriority: %i\n"
2274 "%sCPUSchedulingResetOnFork: %s\n",
2275 prefix, strna(policy_str),
2276 prefix, c->cpu_sched_priority,
2277 prefix, yes_no(c->cpu_sched_reset_on_fork));
2281 fprintf(f, "%sCPUAffinity:", prefix);
2282 for (i = 0; i < c->cpuset_ncpus; i++)
2283 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2284 fprintf(f, " %u", i);
2288 if (c->timer_slack_nsec != NSEC_INFINITY)
2289 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2292 "%sStandardInput: %s\n"
2293 "%sStandardOutput: %s\n"
2294 "%sStandardError: %s\n",
2295 prefix, exec_input_to_string(c->std_input),
2296 prefix, exec_output_to_string(c->std_output),
2297 prefix, exec_output_to_string(c->std_error));
2303 "%sTTYVHangup: %s\n"
2304 "%sTTYVTDisallocate: %s\n",
2305 prefix, c->tty_path,
2306 prefix, yes_no(c->tty_reset),
2307 prefix, yes_no(c->tty_vhangup),
2308 prefix, yes_no(c->tty_vt_disallocate));
2310 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2311 c->std_output == EXEC_OUTPUT_KMSG ||
2312 c->std_output == EXEC_OUTPUT_JOURNAL ||
2313 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2314 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2315 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2316 c->std_error == EXEC_OUTPUT_SYSLOG ||
2317 c->std_error == EXEC_OUTPUT_KMSG ||
2318 c->std_error == EXEC_OUTPUT_JOURNAL ||
2319 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2320 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2321 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2323 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2325 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2326 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2329 "%sSyslogFacility: %s\n"
2330 "%sSyslogLevel: %s\n",
2331 prefix, strna(fac_str),
2332 prefix, strna(lvl_str));
2335 if (c->capabilities) {
2336 _cleanup_cap_free_charp_ char *t;
2338 t = cap_to_text(c->capabilities, NULL);
2340 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2344 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2346 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2347 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2348 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2349 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2350 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2351 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2353 if (c->capability_bounding_set_drop) {
2355 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2357 for (l = 0; l <= cap_last_cap(); l++)
2358 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2359 fprintf(f, " %s", strna(capability_to_name(l)));
2365 fprintf(f, "%sUser: %s\n", prefix, c->user);
2367 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2369 if (strv_length(c->supplementary_groups) > 0) {
2370 fprintf(f, "%sSupplementaryGroups:", prefix);
2371 strv_fprintf(f, c->supplementary_groups);
2376 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2378 if (strv_length(c->read_write_dirs) > 0) {
2379 fprintf(f, "%sReadWriteDirs:", prefix);
2380 strv_fprintf(f, c->read_write_dirs);
2384 if (strv_length(c->read_only_dirs) > 0) {
2385 fprintf(f, "%sReadOnlyDirs:", prefix);
2386 strv_fprintf(f, c->read_only_dirs);
2390 if (strv_length(c->inaccessible_dirs) > 0) {
2391 fprintf(f, "%sInaccessibleDirs:", prefix);
2392 strv_fprintf(f, c->inaccessible_dirs);
2398 "%sUtmpIdentifier: %s\n",
2399 prefix, c->utmp_id);
2401 if (c->selinux_context)
2403 "%sSELinuxContext: %s%s\n",
2404 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2406 if (c->personality != 0xffffffffUL)
2408 "%sPersonality: %s\n",
2409 prefix, strna(personality_to_string(c->personality)));
2411 if (c->syscall_filter) {
2419 "%sSystemCallFilter: ",
2422 if (!c->syscall_whitelist)
2426 SET_FOREACH(id, c->syscall_filter, j) {
2427 _cleanup_free_ char *name = NULL;
2434 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2435 fputs(strna(name), f);
2442 if (c->syscall_archs) {
2449 "%sSystemCallArchitectures:",
2453 SET_FOREACH(id, c->syscall_archs, j)
2454 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2459 if (c->syscall_errno != 0)
2461 "%sSystemCallErrorNumber: %s\n",
2462 prefix, strna(errno_to_name(c->syscall_errno)));
2464 if (c->apparmor_profile)
2466 "%sAppArmorProfile: %s%s\n",
2467 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2470 bool exec_context_maintains_privileges(ExecContext *c) {
2473 /* Returns true if the process forked off would run run under
2474 * an unchanged UID or as root. */
2479 if (streq(c->user, "root") || streq(c->user, "0"))
2485 void exec_status_start(ExecStatus *s, pid_t pid) {
2490 dual_timestamp_get(&s->start_timestamp);
2493 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2496 if (s->pid && s->pid != pid)
2500 dual_timestamp_get(&s->exit_timestamp);
2506 if (context->utmp_id)
2507 utmp_put_dead_process(context->utmp_id, pid, code, status);
2509 exec_context_tty_reset(context);
2513 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2514 char buf[FORMAT_TIMESTAMP_MAX];
2522 prefix = strempty(prefix);
2525 "%sPID: "PID_FMT"\n",
2528 if (s->start_timestamp.realtime > 0)
2530 "%sStart Timestamp: %s\n",
2531 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2533 if (s->exit_timestamp.realtime > 0)
2535 "%sExit Timestamp: %s\n"
2537 "%sExit Status: %i\n",
2538 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2539 prefix, sigchld_code_to_string(s->code),
2543 char *exec_command_line(char **argv) {
2551 STRV_FOREACH(a, argv)
2554 if (!(n = new(char, k)))
2558 STRV_FOREACH(a, argv) {
2565 if (strpbrk(*a, WHITESPACE)) {
2576 /* FIXME: this doesn't really handle arguments that have
2577 * spaces and ticks in them */
2582 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2583 _cleanup_free_ char *cmd = NULL;
2584 const char *prefix2;
2589 prefix = strempty(prefix);
2590 prefix2 = strjoina(prefix, "\t");
2592 cmd = exec_command_line(c->argv);
2594 "%sCommand Line: %s\n",
2595 prefix, cmd ? cmd : strerror(ENOMEM));
2597 exec_status_dump(&c->exec_status, f, prefix2);
2600 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2603 prefix = strempty(prefix);
2605 LIST_FOREACH(command, c, c)
2606 exec_command_dump(c, f, prefix);
2609 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2616 /* It's kind of important, that we keep the order here */
2617 LIST_FIND_TAIL(command, *l, end);
2618 LIST_INSERT_AFTER(command, *l, end, e);
2623 int exec_command_set(ExecCommand *c, const char *path, ...) {
2631 l = strv_new_ap(path, ap);
2652 int exec_command_append(ExecCommand *c, const char *path, ...) {
2653 _cleanup_strv_free_ char **l = NULL;
2661 l = strv_new_ap(path, ap);
2667 r = strv_extend_strv(&c->argv, l);
2675 static int exec_runtime_allocate(ExecRuntime **rt) {
2680 *rt = new0(ExecRuntime, 1);
2685 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2690 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2700 if (!c->private_network && !c->private_tmp)
2703 r = exec_runtime_allocate(rt);
2707 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2708 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2712 if (c->private_tmp && !(*rt)->tmp_dir) {
2713 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2721 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2723 assert(r->n_ref > 0);
2729 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2734 assert(r->n_ref > 0);
2737 if (r->n_ref <= 0) {
2739 free(r->var_tmp_dir);
2740 safe_close_pair(r->netns_storage_socket);
2747 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2756 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2758 if (rt->var_tmp_dir)
2759 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2761 if (rt->netns_storage_socket[0] >= 0) {
2764 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2768 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2771 if (rt->netns_storage_socket[1] >= 0) {
2774 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2778 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2784 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2791 if (streq(key, "tmp-dir")) {
2794 r = exec_runtime_allocate(rt);
2798 copy = strdup(value);
2802 free((*rt)->tmp_dir);
2803 (*rt)->tmp_dir = copy;
2805 } else if (streq(key, "var-tmp-dir")) {
2808 r = exec_runtime_allocate(rt);
2812 copy = strdup(value);
2816 free((*rt)->var_tmp_dir);
2817 (*rt)->var_tmp_dir = copy;
2819 } else if (streq(key, "netns-socket-0")) {
2822 r = exec_runtime_allocate(rt);
2826 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2827 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2829 safe_close((*rt)->netns_storage_socket[0]);
2830 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2832 } else if (streq(key, "netns-socket-1")) {
2835 r = exec_runtime_allocate(rt);
2839 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2840 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2842 safe_close((*rt)->netns_storage_socket[1]);
2843 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2851 static void *remove_tmpdir_thread(void *p) {
2852 _cleanup_free_ char *path = p;
2854 rm_rf_dangerous(path, false, true, false);
2858 void exec_runtime_destroy(ExecRuntime *rt) {
2864 /* If there are multiple users of this, let's leave the stuff around */
2869 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2871 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2873 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2880 if (rt->var_tmp_dir) {
2881 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2883 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2885 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2886 free(rt->var_tmp_dir);
2889 rt->var_tmp_dir = NULL;
2892 safe_close_pair(rt->netns_storage_socket);
2895 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2896 [EXEC_INPUT_NULL] = "null",
2897 [EXEC_INPUT_TTY] = "tty",
2898 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2899 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2900 [EXEC_INPUT_SOCKET] = "socket"
2903 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2905 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2906 [EXEC_OUTPUT_INHERIT] = "inherit",
2907 [EXEC_OUTPUT_NULL] = "null",
2908 [EXEC_OUTPUT_TTY] = "tty",
2909 [EXEC_OUTPUT_SYSLOG] = "syslog",
2910 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2911 [EXEC_OUTPUT_KMSG] = "kmsg",
2912 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2913 [EXEC_OUTPUT_JOURNAL] = "journal",
2914 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2915 [EXEC_OUTPUT_SOCKET] = "socket"
2918 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);