1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
55 #include "capability.h"
58 #include "sd-messages.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
65 #include "path-util.h"
70 #include "selinux-util.h"
71 #include "errno-list.h"
74 #include "smack-util.h"
75 #include "bus-endpoint.h"
79 #include "apparmor-util.h"
83 #include "seccomp-util.h"
86 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
87 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
89 /* This assumes there is a 'tty' group */
92 #define SNDBUF_SIZE (8*1024*1024)
94 static int shift_fds(int fds[], unsigned n_fds) {
95 int start, restart_from;
100 /* Modifies the fds array! (sorts it) */
110 for (i = start; i < (int) n_fds; i++) {
113 /* Already at right index? */
117 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
123 /* Hmm, the fd we wanted isn't free? Then
124 * let's remember that and try again from here */
125 if (nfd != i+3 && restart_from < 0)
129 if (restart_from < 0)
132 start = restart_from;
138 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
147 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
149 for (i = 0; i < n_fds; i++) {
151 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
154 /* We unconditionally drop FD_CLOEXEC from the fds,
155 * since after all we want to pass these fds to our
158 if ((r = fd_cloexec(fds[i], false)) < 0)
165 _pure_ static const char *tty_path(const ExecContext *context) {
168 if (context->tty_path)
169 return context->tty_path;
171 return "/dev/console";
174 static void exec_context_tty_reset(const ExecContext *context) {
177 if (context->tty_vhangup)
178 terminal_vhangup(tty_path(context));
180 if (context->tty_reset)
181 reset_terminal(tty_path(context));
183 if (context->tty_vt_disallocate && context->tty_path)
184 vt_disallocate(context->tty_path);
187 static bool is_terminal_output(ExecOutput o) {
189 o == EXEC_OUTPUT_TTY ||
190 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
191 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
192 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
195 static int open_null_as(int flags, int nfd) {
200 fd = open("/dev/null", flags|O_NOCTTY);
205 r = dup2(fd, nfd) < 0 ? -errno : nfd;
213 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
214 union sockaddr_union sa = {
215 .un.sun_family = AF_UNIX,
216 .un.sun_path = "/run/systemd/journal/stdout",
218 uid_t olduid = UID_INVALID;
219 gid_t oldgid = GID_INVALID;
222 if (gid != GID_INVALID) {
230 if (uid != UID_INVALID) {
240 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
244 /* If we fail to restore the uid or gid, things will likely
245 fail later on. This should only happen if an LSM interferes. */
247 if (uid != UID_INVALID)
248 (void) seteuid(olduid);
251 if (gid != GID_INVALID)
252 (void) setegid(oldgid);
257 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
261 assert(output < _EXEC_OUTPUT_MAX);
265 fd = socket(AF_UNIX, SOCK_STREAM, 0);
269 r = connect_journal_socket(fd, uid, gid);
273 if (shutdown(fd, SHUT_RD) < 0) {
278 fd_inc_sndbuf(fd, SNDBUF_SIZE);
288 context->syslog_identifier ? context->syslog_identifier : ident,
290 context->syslog_priority,
291 !!context->syslog_level_prefix,
292 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
293 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
294 is_terminal_output(output));
297 r = dup2(fd, nfd) < 0 ? -errno : nfd;
304 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
310 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
314 r = dup2(fd, nfd) < 0 ? -errno : nfd;
322 static bool is_terminal_input(ExecInput i) {
324 i == EXEC_INPUT_TTY ||
325 i == EXEC_INPUT_TTY_FORCE ||
326 i == EXEC_INPUT_TTY_FAIL;
329 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
331 if (is_terminal_input(std_input) && !apply_tty_stdin)
332 return EXEC_INPUT_NULL;
334 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
335 return EXEC_INPUT_NULL;
340 static int fixup_output(ExecOutput std_output, int socket_fd) {
342 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
343 return EXEC_OUTPUT_INHERIT;
348 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
353 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
357 case EXEC_INPUT_NULL:
358 return open_null_as(O_RDONLY, STDIN_FILENO);
361 case EXEC_INPUT_TTY_FORCE:
362 case EXEC_INPUT_TTY_FAIL: {
365 fd = acquire_terminal(tty_path(context),
366 i == EXEC_INPUT_TTY_FAIL,
367 i == EXEC_INPUT_TTY_FORCE,
373 if (fd != STDIN_FILENO) {
374 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
382 case EXEC_INPUT_SOCKET:
383 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
386 assert_not_reached("Unknown input type");
390 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
398 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
399 o = fixup_output(context->std_output, socket_fd);
401 if (fileno == STDERR_FILENO) {
403 e = fixup_output(context->std_error, socket_fd);
405 /* This expects the input and output are already set up */
407 /* Don't change the stderr file descriptor if we inherit all
408 * the way and are not on a tty */
409 if (e == EXEC_OUTPUT_INHERIT &&
410 o == EXEC_OUTPUT_INHERIT &&
411 i == EXEC_INPUT_NULL &&
412 !is_terminal_input(context->std_input) &&
416 /* Duplicate from stdout if possible */
417 if (e == o || e == EXEC_OUTPUT_INHERIT)
418 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
422 } else if (o == EXEC_OUTPUT_INHERIT) {
423 /* If input got downgraded, inherit the original value */
424 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
425 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
427 /* If the input is connected to anything that's not a /dev/null, inherit that... */
428 if (i != EXEC_INPUT_NULL)
429 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
431 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
435 /* We need to open /dev/null here anew, to get the right access mode. */
436 return open_null_as(O_WRONLY, fileno);
441 case EXEC_OUTPUT_NULL:
442 return open_null_as(O_WRONLY, fileno);
444 case EXEC_OUTPUT_TTY:
445 if (is_terminal_input(i))
446 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
448 /* We don't reset the terminal if this is just about output */
449 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
451 case EXEC_OUTPUT_SYSLOG:
452 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
453 case EXEC_OUTPUT_KMSG:
454 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
455 case EXEC_OUTPUT_JOURNAL:
456 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
457 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
459 log_unit_struct(unit_id,
461 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
462 fileno == STDOUT_FILENO ? "stdout" : "stderr",
463 unit_id, strerror(-r)),
466 r = open_null_as(O_WRONLY, fileno);
470 case EXEC_OUTPUT_SOCKET:
471 assert(socket_fd >= 0);
472 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
475 assert_not_reached("Unknown error type");
479 static int chown_terminal(int fd, uid_t uid) {
484 /* This might fail. What matters are the results. */
485 (void) fchown(fd, uid, -1);
486 (void) fchmod(fd, TTY_MODE);
488 if (fstat(fd, &st) < 0)
491 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
497 static int setup_confirm_stdio(int *_saved_stdin,
498 int *_saved_stdout) {
499 int fd = -1, saved_stdin, saved_stdout = -1, r;
501 assert(_saved_stdin);
502 assert(_saved_stdout);
504 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
508 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
509 if (saved_stdout < 0) {
514 fd = acquire_terminal(
519 DEFAULT_CONFIRM_USEC);
525 r = chown_terminal(fd, getuid());
529 if (dup2(fd, STDIN_FILENO) < 0) {
534 if (dup2(fd, STDOUT_FILENO) < 0) {
542 *_saved_stdin = saved_stdin;
543 *_saved_stdout = saved_stdout;
548 safe_close(saved_stdout);
549 safe_close(saved_stdin);
555 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
556 _cleanup_close_ int fd = -1;
561 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
565 va_start(ap, format);
566 vdprintf(fd, format, ap);
572 static int restore_confirm_stdio(int *saved_stdin,
578 assert(saved_stdout);
582 if (*saved_stdin >= 0)
583 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
586 if (*saved_stdout >= 0)
587 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
590 safe_close(*saved_stdin);
591 safe_close(*saved_stdout);
596 static int ask_for_confirmation(char *response, char **argv) {
597 int saved_stdout = -1, saved_stdin = -1, r;
598 _cleanup_free_ char *line = NULL;
600 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
604 line = exec_command_line(argv);
608 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
610 restore_confirm_stdio(&saved_stdin, &saved_stdout);
615 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
616 bool keep_groups = false;
621 /* Lookup and set GID and supplementary group list. Here too
622 * we avoid NSS lookups for gid=0. */
624 if (context->group || username) {
626 if (context->group) {
627 const char *g = context->group;
629 if ((r = get_group_creds(&g, &gid)) < 0)
633 /* First step, initialize groups from /etc/groups */
634 if (username && gid != 0) {
635 if (initgroups(username, gid) < 0)
641 /* Second step, set our gids */
642 if (setresgid(gid, gid, gid) < 0)
646 if (context->supplementary_groups) {
651 /* Final step, initialize any manually set supplementary groups */
652 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
654 if (!(gids = new(gid_t, ngroups_max)))
658 if ((k = getgroups(ngroups_max, gids)) < 0) {
665 STRV_FOREACH(i, context->supplementary_groups) {
668 if (k >= ngroups_max) {
674 r = get_group_creds(&g, gids+k);
683 if (setgroups(k, gids) < 0) {
694 static int enforce_user(const ExecContext *context, uid_t uid) {
697 /* Sets (but doesn't lookup) the uid and make sure we keep the
698 * capabilities while doing so. */
700 if (context->capabilities) {
701 _cleanup_cap_free_ cap_t d = NULL;
702 static const cap_value_t bits[] = {
703 CAP_SETUID, /* Necessary so that we can run setresuid() below */
704 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
707 /* First step: If we need to keep capabilities but
708 * drop privileges we need to make sure we keep our
709 * caps, while we drop privileges. */
711 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
713 if (prctl(PR_GET_SECUREBITS) != sb)
714 if (prctl(PR_SET_SECUREBITS, sb) < 0)
718 /* Second step: set the capabilities. This will reduce
719 * the capabilities to the minimum we need. */
721 d = cap_dup(context->capabilities);
725 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
726 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
729 if (cap_set_proc(d) < 0)
733 /* Third step: actually set the uids */
734 if (setresuid(uid, uid, uid) < 0)
737 /* At this point we should have all necessary capabilities but
738 are otherwise a normal user. However, the caps might got
739 corrupted due to the setresuid() so we need clean them up
740 later. This is done outside of this call. */
747 static int null_conv(
749 const struct pam_message **msg,
750 struct pam_response **resp,
753 /* We don't support conversations */
758 static int setup_pam(
764 int fds[], unsigned n_fds) {
766 static const struct pam_conv conv = {
771 pam_handle_t *handle = NULL;
773 int pam_code = PAM_SUCCESS;
776 bool close_session = false;
777 pid_t pam_pid = 0, parent_pid;
784 /* We set up PAM in the parent process, then fork. The child
785 * will then stay around until killed via PR_GET_PDEATHSIG or
786 * systemd via the cgroup logic. It will then remove the PAM
787 * session again. The parent process will exec() the actual
788 * daemon. We do things this way to ensure that the main PID
789 * of the daemon is the one we initially fork()ed. */
791 if (log_get_max_level() < LOG_DEBUG)
794 pam_code = pam_start(name, user, &conv, &handle);
795 if (pam_code != PAM_SUCCESS) {
801 pam_code = pam_set_item(handle, PAM_TTY, tty);
802 if (pam_code != PAM_SUCCESS)
806 pam_code = pam_acct_mgmt(handle, flags);
807 if (pam_code != PAM_SUCCESS)
810 pam_code = pam_open_session(handle, flags);
811 if (pam_code != PAM_SUCCESS)
814 close_session = true;
816 e = pam_getenvlist(handle);
818 pam_code = PAM_BUF_ERR;
822 /* Block SIGTERM, so that we know that it won't get lost in
824 if (sigemptyset(&ss) < 0 ||
825 sigaddset(&ss, SIGTERM) < 0 ||
826 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
829 parent_pid = getpid();
839 /* The child's job is to reset the PAM session on
842 /* This string must fit in 10 chars (i.e. the length
843 * of "/sbin/init"), to look pretty in /bin/ps */
844 rename_process("(sd-pam)");
846 /* Make sure we don't keep open the passed fds in this
847 child. We assume that otherwise only those fds are
848 open here that have been opened by PAM. */
849 close_many(fds, n_fds);
851 /* Drop privileges - we don't need any to pam_close_session
852 * and this will make PR_SET_PDEATHSIG work in most cases.
853 * If this fails, ignore the error - but expect sd-pam threads
854 * to fail to exit normally */
855 if (setresuid(uid, uid, uid) < 0)
856 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
858 /* Wait until our parent died. This will only work if
859 * the above setresuid() succeeds, otherwise the kernel
860 * will not allow unprivileged parents kill their privileged
861 * children this way. We rely on the control groups kill logic
862 * to do the rest for us. */
863 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
866 /* Check if our parent process might already have
868 if (getppid() == parent_pid) {
870 if (sigwait(&ss, &sig) < 0) {
877 assert(sig == SIGTERM);
882 /* If our parent died we'll end the session */
883 if (getppid() != parent_pid) {
884 pam_code = pam_close_session(handle, flags);
885 if (pam_code != PAM_SUCCESS)
892 pam_end(handle, pam_code | flags);
896 /* If the child was forked off successfully it will do all the
897 * cleanups, so forget about the handle here. */
900 /* Unblock SIGTERM again in the parent */
901 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
904 /* We close the log explicitly here, since the PAM modules
905 * might have opened it, but we don't want this fd around. */
914 if (pam_code != PAM_SUCCESS) {
915 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
916 err = -EPERM; /* PAM errors do not map to errno */
918 log_error_errno(errno, "PAM failed: %m");
924 pam_code = pam_close_session(handle, flags);
926 pam_end(handle, pam_code | flags);
934 kill(pam_pid, SIGTERM);
935 kill(pam_pid, SIGCONT);
942 static void rename_process_from_path(const char *path) {
943 char process_name[11];
947 /* This resulting string must fit in 10 chars (i.e. the length
948 * of "/sbin/init") to look pretty in /bin/ps */
952 rename_process("(...)");
958 /* The end of the process name is usually more
959 * interesting, since the first bit might just be
965 process_name[0] = '(';
966 memcpy(process_name+1, p, l);
967 process_name[1+l] = ')';
968 process_name[1+l+1] = 0;
970 rename_process(process_name);
975 static int apply_seccomp(const ExecContext *c) {
976 uint32_t negative_action, action;
977 scmp_filter_ctx *seccomp;
984 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
986 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
990 if (c->syscall_archs) {
992 SET_FOREACH(id, c->syscall_archs, i) {
993 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1001 r = seccomp_add_secondary_archs(seccomp);
1006 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1007 SET_FOREACH(id, c->syscall_filter, i) {
1008 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1013 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1017 r = seccomp_load(seccomp);
1020 seccomp_release(seccomp);
1024 static int apply_address_families(const ExecContext *c) {
1025 scmp_filter_ctx *seccomp;
1031 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1035 r = seccomp_add_secondary_archs(seccomp);
1039 if (c->address_families_whitelist) {
1040 int af, first = 0, last = 0;
1043 /* If this is a whitelist, we first block the address
1044 * families that are out of range and then everything
1045 * that is not in the set. First, we find the lowest
1046 * and highest address family in the set. */
1048 SET_FOREACH(afp, c->address_families, i) {
1049 af = PTR_TO_INT(afp);
1051 if (af <= 0 || af >= af_max())
1054 if (first == 0 || af < first)
1057 if (last == 0 || af > last)
1061 assert((first == 0) == (last == 0));
1065 /* No entries in the valid range, block everything */
1066 r = seccomp_rule_add(
1068 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1076 /* Block everything below the first entry */
1077 r = seccomp_rule_add(
1079 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1082 SCMP_A0(SCMP_CMP_LT, first));
1086 /* Block everything above the last entry */
1087 r = seccomp_rule_add(
1089 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1092 SCMP_A0(SCMP_CMP_GT, last));
1096 /* Block everything between the first and last
1098 for (af = 1; af < af_max(); af++) {
1100 if (set_contains(c->address_families, INT_TO_PTR(af)))
1103 r = seccomp_rule_add(
1105 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1108 SCMP_A0(SCMP_CMP_EQ, af));
1117 /* If this is a blacklist, then generate one rule for
1118 * each address family that are then combined in OR
1121 SET_FOREACH(af, c->address_families, i) {
1123 r = seccomp_rule_add(
1125 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1128 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1134 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1138 r = seccomp_load(seccomp);
1141 seccomp_release(seccomp);
1147 static void do_idle_pipe_dance(int idle_pipe[4]) {
1151 safe_close(idle_pipe[1]);
1152 safe_close(idle_pipe[2]);
1154 if (idle_pipe[0] >= 0) {
1157 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1159 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1160 /* Signal systemd that we are bored and want to continue. */
1161 r = write(idle_pipe[3], "x", 1);
1163 /* Wait for systemd to react to the signal above. */
1164 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1167 safe_close(idle_pipe[0]);
1171 safe_close(idle_pipe[3]);
1174 static int build_environment(
1175 const ExecContext *c,
1177 usec_t watchdog_usec,
1179 const char *username,
1183 _cleanup_strv_free_ char **our_env = NULL;
1190 our_env = new0(char*, 10);
1195 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1197 our_env[n_env++] = x;
1199 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1201 our_env[n_env++] = x;
1204 if (watchdog_usec > 0) {
1205 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1207 our_env[n_env++] = x;
1209 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1211 our_env[n_env++] = x;
1215 x = strappend("HOME=", home);
1218 our_env[n_env++] = x;
1222 x = strappend("LOGNAME=", username);
1225 our_env[n_env++] = x;
1227 x = strappend("USER=", username);
1230 our_env[n_env++] = x;
1234 x = strappend("SHELL=", shell);
1237 our_env[n_env++] = x;
1240 if (is_terminal_input(c->std_input) ||
1241 c->std_output == EXEC_OUTPUT_TTY ||
1242 c->std_error == EXEC_OUTPUT_TTY ||
1245 x = strdup(default_term_for_tty(tty_path(c)));
1248 our_env[n_env++] = x;
1251 our_env[n_env++] = NULL;
1252 assert(n_env <= 10);
1260 static int exec_child(
1261 ExecCommand *command,
1262 const ExecContext *context,
1263 const ExecParameters *params,
1264 ExecRuntime *runtime,
1267 int *fds, unsigned n_fds,
1271 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1272 _cleanup_free_ char *mac_selinux_context_net = NULL;
1273 const char *username = NULL, *home = NULL, *shell = NULL;
1274 unsigned n_dont_close = 0;
1275 int dont_close[n_fds + 4];
1276 uid_t uid = UID_INVALID;
1277 gid_t gid = GID_INVALID;
1283 assert(exit_status);
1285 rename_process_from_path(command->path);
1287 /* We reset exactly these signals, since they are the
1288 * only ones we set to SIG_IGN in the main daemon. All
1289 * others we leave untouched because we set them to
1290 * SIG_DFL or a valid handler initially, both of which
1291 * will be demoted to SIG_DFL. */
1292 default_signals(SIGNALS_CRASH_HANDLER,
1293 SIGNALS_IGNORE, -1);
1295 if (context->ignore_sigpipe)
1296 ignore_signals(SIGPIPE, -1);
1298 r = reset_signal_mask();
1300 *exit_status = EXIT_SIGNAL_MASK;
1304 if (params->idle_pipe)
1305 do_idle_pipe_dance(params->idle_pipe);
1307 /* Close sockets very early to make sure we don't
1308 * block init reexecution because it cannot bind its
1314 dont_close[n_dont_close++] = socket_fd;
1316 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1317 n_dont_close += n_fds;
1319 if (params->bus_endpoint_fd >= 0)
1320 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1322 if (runtime->netns_storage_socket[0] >= 0)
1323 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1324 if (runtime->netns_storage_socket[1] >= 0)
1325 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1328 r = close_all_fds(dont_close, n_dont_close);
1330 *exit_status = EXIT_FDS;
1334 if (!context->same_pgrp)
1336 *exit_status = EXIT_SETSID;
1340 exec_context_tty_reset(context);
1342 if (params->confirm_spawn) {
1345 r = ask_for_confirmation(&response, argv);
1346 if (r == -ETIMEDOUT)
1347 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1349 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1350 else if (response == 's') {
1351 write_confirm_message("Skipping execution.\n");
1352 *exit_status = EXIT_CONFIRM;
1354 } else if (response == 'n') {
1355 write_confirm_message("Failing execution.\n");
1361 if (context->user) {
1362 username = context->user;
1363 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1365 *exit_status = EXIT_USER;
1370 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1371 * must sure to drop O_NONBLOCK */
1373 fd_nonblock(socket_fd, false);
1375 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1377 *exit_status = EXIT_STDIN;
1381 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1383 *exit_status = EXIT_STDOUT;
1387 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1389 *exit_status = EXIT_STDERR;
1393 if (params->cgroup_path) {
1394 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1396 *exit_status = EXIT_CGROUP;
1401 if (context->oom_score_adjust_set) {
1402 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1404 /* When we can't make this change due to EPERM, then
1405 * let's silently skip over it. User namespaces
1406 * prohibit write access to this file, and we
1407 * shouldn't trip up over that. */
1409 sprintf(t, "%i", context->oom_score_adjust);
1410 r = write_string_file("/proc/self/oom_score_adj", t);
1411 if (r == -EPERM || r == -EACCES) {
1413 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1416 *exit_status = EXIT_OOM_ADJUST;
1421 if (context->nice_set)
1422 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1423 *exit_status = EXIT_NICE;
1427 if (context->cpu_sched_set) {
1428 struct sched_param param = {
1429 .sched_priority = context->cpu_sched_priority,
1432 r = sched_setscheduler(0,
1433 context->cpu_sched_policy |
1434 (context->cpu_sched_reset_on_fork ?
1435 SCHED_RESET_ON_FORK : 0),
1438 *exit_status = EXIT_SETSCHEDULER;
1443 if (context->cpuset)
1444 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1445 *exit_status = EXIT_CPUAFFINITY;
1449 if (context->ioprio_set)
1450 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1451 *exit_status = EXIT_IOPRIO;
1455 if (context->timer_slack_nsec != NSEC_INFINITY)
1456 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1457 *exit_status = EXIT_TIMERSLACK;
1461 if (context->personality != 0xffffffffUL)
1462 if (personality(context->personality) < 0) {
1463 *exit_status = EXIT_PERSONALITY;
1467 if (context->utmp_id)
1468 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1470 if (context->user && is_terminal_input(context->std_input)) {
1471 r = chown_terminal(STDIN_FILENO, uid);
1473 *exit_status = EXIT_STDIN;
1479 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1480 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1482 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1484 *exit_status = EXIT_BUS_ENDPOINT;
1490 /* If delegation is enabled we'll pass ownership of the cgroup
1491 * (but only in systemd's own controller hierarchy!) to the
1492 * user of the new process. */
1493 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1494 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1496 *exit_status = EXIT_CGROUP;
1501 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1503 *exit_status = EXIT_CGROUP;
1508 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1511 STRV_FOREACH(rt, context->runtime_directory) {
1512 _cleanup_free_ char *p;
1514 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1516 *exit_status = EXIT_RUNTIME_DIRECTORY;
1520 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1522 *exit_status = EXIT_RUNTIME_DIRECTORY;
1528 if (params->apply_permissions) {
1529 r = enforce_groups(context, username, gid);
1531 *exit_status = EXIT_GROUP;
1536 umask(context->umask);
1539 if (params->apply_permissions && context->pam_name && username) {
1540 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1542 *exit_status = EXIT_PAM;
1548 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1549 r = setup_netns(runtime->netns_storage_socket);
1551 *exit_status = EXIT_NETWORK;
1556 if (!strv_isempty(context->read_write_dirs) ||
1557 !strv_isempty(context->read_only_dirs) ||
1558 !strv_isempty(context->inaccessible_dirs) ||
1559 context->mount_flags != 0 ||
1560 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1561 params->bus_endpoint_path ||
1562 context->private_devices ||
1563 context->protect_system != PROTECT_SYSTEM_NO ||
1564 context->protect_home != PROTECT_HOME_NO) {
1566 char *tmp = NULL, *var = NULL;
1568 /* The runtime struct only contains the parent
1569 * of the private /tmp, which is
1570 * non-accessible to world users. Inside of it
1571 * there's a /tmp that is sticky, and that's
1572 * the one we want to use here. */
1574 if (context->private_tmp && runtime) {
1575 if (runtime->tmp_dir)
1576 tmp = strjoina(runtime->tmp_dir, "/tmp");
1577 if (runtime->var_tmp_dir)
1578 var = strjoina(runtime->var_tmp_dir, "/tmp");
1581 r = setup_namespace(
1582 context->read_write_dirs,
1583 context->read_only_dirs,
1584 context->inaccessible_dirs,
1587 params->bus_endpoint_path,
1588 context->private_devices,
1589 context->protect_home,
1590 context->protect_system,
1591 context->mount_flags);
1593 /* If we couldn't set up the namespace this is
1594 * probably due to a missing capability. In this case,
1595 * silently proceeed. */
1596 if (r == -EPERM || r == -EACCES) {
1598 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1601 *exit_status = EXIT_NAMESPACE;
1606 if (params->apply_chroot) {
1607 if (context->root_directory)
1608 if (chroot(context->root_directory) < 0) {
1609 *exit_status = EXIT_CHROOT;
1613 if (chdir(context->working_directory ?: "/") < 0 &&
1614 !context->working_directory_missing_ok) {
1615 *exit_status = EXIT_CHDIR;
1619 _cleanup_free_ char *d = NULL;
1621 if (asprintf(&d, "%s/%s",
1622 context->root_directory ?: "",
1623 context->working_directory ?: "") < 0) {
1624 *exit_status = EXIT_MEMORY;
1629 !context->working_directory_missing_ok) {
1630 *exit_status = EXIT_CHDIR;
1636 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1637 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1639 *exit_status = EXIT_SELINUX_CONTEXT;
1645 /* We repeat the fd closing here, to make sure that
1646 * nothing is leaked from the PAM modules. Note that
1647 * we are more aggressive this time since socket_fd
1648 * and the netns fds we don't need anymore. The custom
1649 * endpoint fd was needed to upload the policy and can
1650 * now be closed as well. */
1651 r = close_all_fds(fds, n_fds);
1653 r = shift_fds(fds, n_fds);
1655 r = flags_fds(fds, n_fds, context->non_blocking);
1657 *exit_status = EXIT_FDS;
1661 if (params->apply_permissions) {
1663 for (i = 0; i < _RLIMIT_MAX; i++) {
1664 if (!context->rlimit[i])
1667 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1668 *exit_status = EXIT_LIMITS;
1673 if (context->capability_bounding_set_drop) {
1674 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1676 *exit_status = EXIT_CAPABILITIES;
1682 if (context->smack_process_label) {
1683 r = mac_smack_apply_pid(0, context->smack_process_label);
1685 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1691 if (context->user) {
1692 r = enforce_user(context, uid);
1694 *exit_status = EXIT_USER;
1699 /* PR_GET_SECUREBITS is not privileged, while
1700 * PR_SET_SECUREBITS is. So to suppress
1701 * potential EPERMs we'll try not to call
1702 * PR_SET_SECUREBITS unless necessary. */
1703 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1704 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1705 *exit_status = EXIT_SECUREBITS;
1709 if (context->capabilities)
1710 if (cap_set_proc(context->capabilities) < 0) {
1711 *exit_status = EXIT_CAPABILITIES;
1715 if (context->no_new_privileges)
1716 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1717 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1722 if (context->address_families_whitelist ||
1723 !set_isempty(context->address_families)) {
1724 r = apply_address_families(context);
1726 *exit_status = EXIT_ADDRESS_FAMILIES;
1731 if (context->syscall_whitelist ||
1732 !set_isempty(context->syscall_filter) ||
1733 !set_isempty(context->syscall_archs)) {
1734 r = apply_seccomp(context);
1736 *exit_status = EXIT_SECCOMP;
1743 if (mac_selinux_use()) {
1744 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1747 r = setexeccon(exec_context);
1749 *exit_status = EXIT_SELINUX_CONTEXT;
1756 #ifdef HAVE_APPARMOR
1757 if (context->apparmor_profile && mac_apparmor_use()) {
1758 r = aa_change_onexec(context->apparmor_profile);
1759 if (r < 0 && !context->apparmor_profile_ignore) {
1760 *exit_status = EXIT_APPARMOR_PROFILE;
1767 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1769 *exit_status = EXIT_MEMORY;
1773 final_env = strv_env_merge(5,
1774 params->environment,
1776 context->environment,
1781 *exit_status = EXIT_MEMORY;
1785 final_argv = replace_env_argv(argv, final_env);
1787 *exit_status = EXIT_MEMORY;
1791 final_env = strv_env_clean(final_env);
1793 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1794 _cleanup_free_ char *line;
1796 line = exec_command_line(final_argv);
1799 log_unit_struct(params->unit_id,
1801 "EXECUTABLE=%s", command->path,
1802 LOG_MESSAGE("Executing: %s", line),
1807 execve(command->path, final_argv, final_env);
1808 *exit_status = EXIT_EXEC;
1812 int exec_spawn(ExecCommand *command,
1813 const ExecContext *context,
1814 const ExecParameters *params,
1815 ExecRuntime *runtime,
1818 _cleanup_strv_free_ char **files_env = NULL;
1819 int *fds = NULL; unsigned n_fds = 0;
1820 _cleanup_free_ char *line = NULL;
1829 assert(params->fds || params->n_fds <= 0);
1831 if (context->std_input == EXEC_INPUT_SOCKET ||
1832 context->std_output == EXEC_OUTPUT_SOCKET ||
1833 context->std_error == EXEC_OUTPUT_SOCKET) {
1835 if (params->n_fds != 1) {
1836 log_unit_error(params->unit_id, "Got more than one socket.");
1840 socket_fd = params->fds[0];
1844 n_fds = params->n_fds;
1847 r = exec_context_load_environment(context, params->unit_id, &files_env);
1849 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1851 argv = params->argv ?: command->argv;
1852 line = exec_command_line(argv);
1856 log_unit_struct(params->unit_id,
1858 "EXECUTABLE=%s", command->path,
1859 LOG_MESSAGE("About to execute: %s", line),
1863 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1868 r = exec_child(command,
1879 log_unit_struct(params->unit_id,
1881 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1882 "EXECUTABLE=%s", command->path,
1883 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1884 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1885 command->path, strerror(-r)),
1893 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1895 /* We add the new process to the cgroup both in the child (so
1896 * that we can be sure that no user code is ever executed
1897 * outside of the cgroup) and in the parent (so that we can be
1898 * sure that when we kill the cgroup the process will be
1900 if (params->cgroup_path)
1901 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1903 exec_status_start(&command->exec_status, pid);
1909 void exec_context_init(ExecContext *c) {
1913 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1914 c->cpu_sched_policy = SCHED_OTHER;
1915 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1916 c->syslog_level_prefix = true;
1917 c->ignore_sigpipe = true;
1918 c->timer_slack_nsec = NSEC_INFINITY;
1919 c->personality = 0xffffffffUL;
1920 c->runtime_directory_mode = 0755;
1923 void exec_context_done(ExecContext *c) {
1928 strv_free(c->environment);
1929 c->environment = NULL;
1931 strv_free(c->environment_files);
1932 c->environment_files = NULL;
1934 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1936 c->rlimit[l] = NULL;
1939 free(c->working_directory);
1940 c->working_directory = NULL;
1941 free(c->root_directory);
1942 c->root_directory = NULL;
1947 free(c->syslog_identifier);
1948 c->syslog_identifier = NULL;
1956 strv_free(c->supplementary_groups);
1957 c->supplementary_groups = NULL;
1962 if (c->capabilities) {
1963 cap_free(c->capabilities);
1964 c->capabilities = NULL;
1967 strv_free(c->read_only_dirs);
1968 c->read_only_dirs = NULL;
1970 strv_free(c->read_write_dirs);
1971 c->read_write_dirs = NULL;
1973 strv_free(c->inaccessible_dirs);
1974 c->inaccessible_dirs = NULL;
1977 CPU_FREE(c->cpuset);
1982 free(c->selinux_context);
1983 c->selinux_context = NULL;
1985 free(c->apparmor_profile);
1986 c->apparmor_profile = NULL;
1988 set_free(c->syscall_filter);
1989 c->syscall_filter = NULL;
1991 set_free(c->syscall_archs);
1992 c->syscall_archs = NULL;
1994 set_free(c->address_families);
1995 c->address_families = NULL;
1997 strv_free(c->runtime_directory);
1998 c->runtime_directory = NULL;
2000 bus_endpoint_free(c->bus_endpoint);
2001 c->bus_endpoint = NULL;
2004 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2009 if (!runtime_prefix)
2012 STRV_FOREACH(i, c->runtime_directory) {
2013 _cleanup_free_ char *p;
2015 p = strjoin(runtime_prefix, "/", *i, NULL);
2019 /* We execute this synchronously, since we need to be
2020 * sure this is gone when we start the service
2022 rm_rf(p, false, true, false);
2028 void exec_command_done(ExecCommand *c) {
2038 void exec_command_done_array(ExecCommand *c, unsigned n) {
2041 for (i = 0; i < n; i++)
2042 exec_command_done(c+i);
2045 ExecCommand* exec_command_free_list(ExecCommand *c) {
2049 LIST_REMOVE(command, c, i);
2050 exec_command_done(i);
2057 void exec_command_free_array(ExecCommand **c, unsigned n) {
2060 for (i = 0; i < n; i++)
2061 c[i] = exec_command_free_list(c[i]);
2064 typedef struct InvalidEnvInfo {
2065 const char *unit_id;
2069 static void invalid_env(const char *p, void *userdata) {
2070 InvalidEnvInfo *info = userdata;
2072 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2075 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2076 char **i, **r = NULL;
2081 STRV_FOREACH(i, c->environment_files) {
2084 bool ignore = false;
2086 _cleanup_globfree_ glob_t pglob = {};
2096 if (!path_is_absolute(fn)) {
2104 /* Filename supports globbing, take all matching files */
2106 if (glob(fn, 0, NULL, &pglob) != 0) {
2111 return errno ? -errno : -EINVAL;
2113 count = pglob.gl_pathc;
2121 for (n = 0; n < count; n++) {
2122 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2130 /* Log invalid environment variables with filename */
2132 InvalidEnvInfo info = {
2134 .path = pglob.gl_pathv[n]
2137 p = strv_env_clean_with_callback(p, invalid_env, &info);
2145 m = strv_env_merge(2, r, p);
2161 static bool tty_may_match_dev_console(const char *tty) {
2162 _cleanup_free_ char *active = NULL;
2165 if (startswith(tty, "/dev/"))
2168 /* trivial identity? */
2169 if (streq(tty, "console"))
2172 console = resolve_dev_console(&active);
2173 /* if we could not resolve, assume it may */
2177 /* "tty0" means the active VC, so it may be the same sometimes */
2178 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2181 bool exec_context_may_touch_console(ExecContext *ec) {
2182 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2183 is_terminal_input(ec->std_input) ||
2184 is_terminal_output(ec->std_output) ||
2185 is_terminal_output(ec->std_error)) &&
2186 tty_may_match_dev_console(tty_path(ec));
2189 static void strv_fprintf(FILE *f, char **l) {
2195 fprintf(f, " %s", *g);
2198 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2205 prefix = strempty(prefix);
2209 "%sWorkingDirectory: %s\n"
2210 "%sRootDirectory: %s\n"
2211 "%sNonBlocking: %s\n"
2212 "%sPrivateTmp: %s\n"
2213 "%sPrivateNetwork: %s\n"
2214 "%sPrivateDevices: %s\n"
2215 "%sProtectHome: %s\n"
2216 "%sProtectSystem: %s\n"
2217 "%sIgnoreSIGPIPE: %s\n",
2219 prefix, c->working_directory ? c->working_directory : "/",
2220 prefix, c->root_directory ? c->root_directory : "/",
2221 prefix, yes_no(c->non_blocking),
2222 prefix, yes_no(c->private_tmp),
2223 prefix, yes_no(c->private_network),
2224 prefix, yes_no(c->private_devices),
2225 prefix, protect_home_to_string(c->protect_home),
2226 prefix, protect_system_to_string(c->protect_system),
2227 prefix, yes_no(c->ignore_sigpipe));
2229 STRV_FOREACH(e, c->environment)
2230 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2232 STRV_FOREACH(e, c->environment_files)
2233 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2240 if (c->oom_score_adjust_set)
2242 "%sOOMScoreAdjust: %i\n",
2243 prefix, c->oom_score_adjust);
2245 for (i = 0; i < RLIM_NLIMITS; i++)
2247 fprintf(f, "%s%s: "RLIM_FMT"\n",
2248 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2250 if (c->ioprio_set) {
2251 _cleanup_free_ char *class_str = NULL;
2253 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2255 "%sIOSchedulingClass: %s\n"
2256 "%sIOPriority: %i\n",
2257 prefix, strna(class_str),
2258 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2261 if (c->cpu_sched_set) {
2262 _cleanup_free_ char *policy_str = NULL;
2264 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2266 "%sCPUSchedulingPolicy: %s\n"
2267 "%sCPUSchedulingPriority: %i\n"
2268 "%sCPUSchedulingResetOnFork: %s\n",
2269 prefix, strna(policy_str),
2270 prefix, c->cpu_sched_priority,
2271 prefix, yes_no(c->cpu_sched_reset_on_fork));
2275 fprintf(f, "%sCPUAffinity:", prefix);
2276 for (i = 0; i < c->cpuset_ncpus; i++)
2277 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2278 fprintf(f, " %u", i);
2282 if (c->timer_slack_nsec != NSEC_INFINITY)
2283 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2286 "%sStandardInput: %s\n"
2287 "%sStandardOutput: %s\n"
2288 "%sStandardError: %s\n",
2289 prefix, exec_input_to_string(c->std_input),
2290 prefix, exec_output_to_string(c->std_output),
2291 prefix, exec_output_to_string(c->std_error));
2297 "%sTTYVHangup: %s\n"
2298 "%sTTYVTDisallocate: %s\n",
2299 prefix, c->tty_path,
2300 prefix, yes_no(c->tty_reset),
2301 prefix, yes_no(c->tty_vhangup),
2302 prefix, yes_no(c->tty_vt_disallocate));
2304 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2305 c->std_output == EXEC_OUTPUT_KMSG ||
2306 c->std_output == EXEC_OUTPUT_JOURNAL ||
2307 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2308 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2309 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2310 c->std_error == EXEC_OUTPUT_SYSLOG ||
2311 c->std_error == EXEC_OUTPUT_KMSG ||
2312 c->std_error == EXEC_OUTPUT_JOURNAL ||
2313 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2314 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2315 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2317 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2319 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2320 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2323 "%sSyslogFacility: %s\n"
2324 "%sSyslogLevel: %s\n",
2325 prefix, strna(fac_str),
2326 prefix, strna(lvl_str));
2329 if (c->capabilities) {
2330 _cleanup_cap_free_charp_ char *t;
2332 t = cap_to_text(c->capabilities, NULL);
2334 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2338 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2340 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2341 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2342 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2343 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2344 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2345 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2347 if (c->capability_bounding_set_drop) {
2349 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2351 for (l = 0; l <= cap_last_cap(); l++)
2352 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2353 fprintf(f, " %s", strna(capability_to_name(l)));
2359 fprintf(f, "%sUser: %s\n", prefix, c->user);
2361 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2363 if (strv_length(c->supplementary_groups) > 0) {
2364 fprintf(f, "%sSupplementaryGroups:", prefix);
2365 strv_fprintf(f, c->supplementary_groups);
2370 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2372 if (strv_length(c->read_write_dirs) > 0) {
2373 fprintf(f, "%sReadWriteDirs:", prefix);
2374 strv_fprintf(f, c->read_write_dirs);
2378 if (strv_length(c->read_only_dirs) > 0) {
2379 fprintf(f, "%sReadOnlyDirs:", prefix);
2380 strv_fprintf(f, c->read_only_dirs);
2384 if (strv_length(c->inaccessible_dirs) > 0) {
2385 fprintf(f, "%sInaccessibleDirs:", prefix);
2386 strv_fprintf(f, c->inaccessible_dirs);
2392 "%sUtmpIdentifier: %s\n",
2393 prefix, c->utmp_id);
2395 if (c->selinux_context)
2397 "%sSELinuxContext: %s%s\n",
2398 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2400 if (c->personality != 0xffffffffUL)
2402 "%sPersonality: %s\n",
2403 prefix, strna(personality_to_string(c->personality)));
2405 if (c->syscall_filter) {
2413 "%sSystemCallFilter: ",
2416 if (!c->syscall_whitelist)
2420 SET_FOREACH(id, c->syscall_filter, j) {
2421 _cleanup_free_ char *name = NULL;
2428 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2429 fputs(strna(name), f);
2436 if (c->syscall_archs) {
2443 "%sSystemCallArchitectures:",
2447 SET_FOREACH(id, c->syscall_archs, j)
2448 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2453 if (c->syscall_errno != 0)
2455 "%sSystemCallErrorNumber: %s\n",
2456 prefix, strna(errno_to_name(c->syscall_errno)));
2458 if (c->apparmor_profile)
2460 "%sAppArmorProfile: %s%s\n",
2461 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2464 bool exec_context_maintains_privileges(ExecContext *c) {
2467 /* Returns true if the process forked off would run run under
2468 * an unchanged UID or as root. */
2473 if (streq(c->user, "root") || streq(c->user, "0"))
2479 void exec_status_start(ExecStatus *s, pid_t pid) {
2484 dual_timestamp_get(&s->start_timestamp);
2487 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2490 if (s->pid && s->pid != pid)
2494 dual_timestamp_get(&s->exit_timestamp);
2500 if (context->utmp_id)
2501 utmp_put_dead_process(context->utmp_id, pid, code, status);
2503 exec_context_tty_reset(context);
2507 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2508 char buf[FORMAT_TIMESTAMP_MAX];
2516 prefix = strempty(prefix);
2519 "%sPID: "PID_FMT"\n",
2522 if (s->start_timestamp.realtime > 0)
2524 "%sStart Timestamp: %s\n",
2525 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2527 if (s->exit_timestamp.realtime > 0)
2529 "%sExit Timestamp: %s\n"
2531 "%sExit Status: %i\n",
2532 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2533 prefix, sigchld_code_to_string(s->code),
2537 char *exec_command_line(char **argv) {
2545 STRV_FOREACH(a, argv)
2548 if (!(n = new(char, k)))
2552 STRV_FOREACH(a, argv) {
2559 if (strpbrk(*a, WHITESPACE)) {
2570 /* FIXME: this doesn't really handle arguments that have
2571 * spaces and ticks in them */
2576 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2577 _cleanup_free_ char *cmd = NULL;
2578 const char *prefix2;
2583 prefix = strempty(prefix);
2584 prefix2 = strjoina(prefix, "\t");
2586 cmd = exec_command_line(c->argv);
2588 "%sCommand Line: %s\n",
2589 prefix, cmd ? cmd : strerror(ENOMEM));
2591 exec_status_dump(&c->exec_status, f, prefix2);
2594 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2597 prefix = strempty(prefix);
2599 LIST_FOREACH(command, c, c)
2600 exec_command_dump(c, f, prefix);
2603 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2610 /* It's kind of important, that we keep the order here */
2611 LIST_FIND_TAIL(command, *l, end);
2612 LIST_INSERT_AFTER(command, *l, end, e);
2617 int exec_command_set(ExecCommand *c, const char *path, ...) {
2625 l = strv_new_ap(path, ap);
2646 int exec_command_append(ExecCommand *c, const char *path, ...) {
2647 _cleanup_strv_free_ char **l = NULL;
2655 l = strv_new_ap(path, ap);
2661 r = strv_extend_strv(&c->argv, l);
2669 static int exec_runtime_allocate(ExecRuntime **rt) {
2674 *rt = new0(ExecRuntime, 1);
2679 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2684 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2694 if (!c->private_network && !c->private_tmp)
2697 r = exec_runtime_allocate(rt);
2701 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2702 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2706 if (c->private_tmp && !(*rt)->tmp_dir) {
2707 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2715 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2717 assert(r->n_ref > 0);
2723 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2728 assert(r->n_ref > 0);
2731 if (r->n_ref <= 0) {
2733 free(r->var_tmp_dir);
2734 safe_close_pair(r->netns_storage_socket);
2741 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2750 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2752 if (rt->var_tmp_dir)
2753 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2755 if (rt->netns_storage_socket[0] >= 0) {
2758 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2762 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2765 if (rt->netns_storage_socket[1] >= 0) {
2768 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2772 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2778 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2785 if (streq(key, "tmp-dir")) {
2788 r = exec_runtime_allocate(rt);
2792 copy = strdup(value);
2796 free((*rt)->tmp_dir);
2797 (*rt)->tmp_dir = copy;
2799 } else if (streq(key, "var-tmp-dir")) {
2802 r = exec_runtime_allocate(rt);
2806 copy = strdup(value);
2810 free((*rt)->var_tmp_dir);
2811 (*rt)->var_tmp_dir = copy;
2813 } else if (streq(key, "netns-socket-0")) {
2816 r = exec_runtime_allocate(rt);
2820 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2821 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2823 safe_close((*rt)->netns_storage_socket[0]);
2824 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2826 } else if (streq(key, "netns-socket-1")) {
2829 r = exec_runtime_allocate(rt);
2833 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2834 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2836 safe_close((*rt)->netns_storage_socket[1]);
2837 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2845 static void *remove_tmpdir_thread(void *p) {
2846 _cleanup_free_ char *path = p;
2848 rm_rf_dangerous(path, false, true, false);
2852 void exec_runtime_destroy(ExecRuntime *rt) {
2858 /* If there are multiple users of this, let's leave the stuff around */
2863 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2865 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2867 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2874 if (rt->var_tmp_dir) {
2875 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2877 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2879 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2880 free(rt->var_tmp_dir);
2883 rt->var_tmp_dir = NULL;
2886 safe_close_pair(rt->netns_storage_socket);
2889 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2890 [EXEC_INPUT_NULL] = "null",
2891 [EXEC_INPUT_TTY] = "tty",
2892 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2893 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2894 [EXEC_INPUT_SOCKET] = "socket"
2897 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2899 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2900 [EXEC_OUTPUT_INHERIT] = "inherit",
2901 [EXEC_OUTPUT_NULL] = "null",
2902 [EXEC_OUTPUT_TTY] = "tty",
2903 [EXEC_OUTPUT_SYSLOG] = "syslog",
2904 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2905 [EXEC_OUTPUT_KMSG] = "kmsg",
2906 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2907 [EXEC_OUTPUT_JOURNAL] = "journal",
2908 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2909 [EXEC_OUTPUT_SOCKET] = "socket"
2912 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);