1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
27 #include <sys/socket.h>
29 #include <sys/prctl.h>
34 #include <sys/personality.h>
37 #include <security/pam_appl.h>
41 #include <selinux/selinux.h>
49 #include <sys/apparmor.h>
55 #include "capability.h"
58 #include "sd-messages.h"
60 #include "securebits.h"
61 #include "namespace.h"
62 #include "exit-status.h"
64 #include "utmp-wtmp.h"
66 #include "path-util.h"
71 #include "selinux-util.h"
72 #include "errno-list.h"
75 #include "smack-util.h"
76 #include "bus-endpoint.h"
80 #include "apparmor-util.h"
84 #include "seccomp-util.h"
87 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
88 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
90 /* This assumes there is a 'tty' group */
93 #define SNDBUF_SIZE (8*1024*1024)
95 static int shift_fds(int fds[], unsigned n_fds) {
96 int start, restart_from;
101 /* Modifies the fds array! (sorts it) */
111 for (i = start; i < (int) n_fds; i++) {
114 /* Already at right index? */
118 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
124 /* Hmm, the fd we wanted isn't free? Then
125 * let's remember that and try again from here */
126 if (nfd != i+3 && restart_from < 0)
130 if (restart_from < 0)
133 start = restart_from;
139 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
148 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
150 for (i = 0; i < n_fds; i++) {
152 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
155 /* We unconditionally drop FD_CLOEXEC from the fds,
156 * since after all we want to pass these fds to our
159 if ((r = fd_cloexec(fds[i], false)) < 0)
166 _pure_ static const char *tty_path(const ExecContext *context) {
169 if (context->tty_path)
170 return context->tty_path;
172 return "/dev/console";
175 static void exec_context_tty_reset(const ExecContext *context) {
178 if (context->tty_vhangup)
179 terminal_vhangup(tty_path(context));
181 if (context->tty_reset)
182 reset_terminal(tty_path(context));
184 if (context->tty_vt_disallocate && context->tty_path)
185 vt_disallocate(context->tty_path);
188 static bool is_terminal_output(ExecOutput o) {
190 o == EXEC_OUTPUT_TTY ||
191 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
192 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
193 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
196 static int open_null_as(int flags, int nfd) {
201 fd = open("/dev/null", flags|O_NOCTTY);
206 r = dup2(fd, nfd) < 0 ? -errno : nfd;
214 static int connect_journal_socket(int fd, uid_t uid, gid_t gid) {
215 union sockaddr_union sa = {
216 .un.sun_family = AF_UNIX,
217 .un.sun_path = "/run/systemd/journal/stdout",
219 uid_t olduid = UID_INVALID;
220 gid_t oldgid = GID_INVALID;
223 if (gid != GID_INVALID) {
231 if (uid != UID_INVALID) {
241 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
245 /* If we fail to restore the uid or gid, things will likely
246 fail later on. This should only happen if an LSM interferes. */
248 if (uid != UID_INVALID)
249 (void) seteuid(olduid);
252 if (gid != GID_INVALID)
253 (void) setegid(oldgid);
258 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd, uid_t uid, gid_t gid) {
262 assert(output < _EXEC_OUTPUT_MAX);
266 fd = socket(AF_UNIX, SOCK_STREAM, 0);
270 r = connect_journal_socket(fd, uid, gid);
274 if (shutdown(fd, SHUT_RD) < 0) {
279 fd_inc_sndbuf(fd, SNDBUF_SIZE);
289 context->syslog_identifier ? context->syslog_identifier : ident,
291 context->syslog_priority,
292 !!context->syslog_level_prefix,
293 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
294 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
295 is_terminal_output(output));
298 r = dup2(fd, nfd) < 0 ? -errno : nfd;
305 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
311 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
315 r = dup2(fd, nfd) < 0 ? -errno : nfd;
323 static bool is_terminal_input(ExecInput i) {
325 i == EXEC_INPUT_TTY ||
326 i == EXEC_INPUT_TTY_FORCE ||
327 i == EXEC_INPUT_TTY_FAIL;
330 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
332 if (is_terminal_input(std_input) && !apply_tty_stdin)
333 return EXEC_INPUT_NULL;
335 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
336 return EXEC_INPUT_NULL;
341 static int fixup_output(ExecOutput std_output, int socket_fd) {
343 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
344 return EXEC_OUTPUT_INHERIT;
349 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
354 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
358 case EXEC_INPUT_NULL:
359 return open_null_as(O_RDONLY, STDIN_FILENO);
362 case EXEC_INPUT_TTY_FORCE:
363 case EXEC_INPUT_TTY_FAIL: {
366 fd = acquire_terminal(tty_path(context),
367 i == EXEC_INPUT_TTY_FAIL,
368 i == EXEC_INPUT_TTY_FORCE,
374 if (fd != STDIN_FILENO) {
375 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
383 case EXEC_INPUT_SOCKET:
384 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
387 assert_not_reached("Unknown input type");
391 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin, uid_t uid, gid_t gid) {
399 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
400 o = fixup_output(context->std_output, socket_fd);
402 if (fileno == STDERR_FILENO) {
404 e = fixup_output(context->std_error, socket_fd);
406 /* This expects the input and output are already set up */
408 /* Don't change the stderr file descriptor if we inherit all
409 * the way and are not on a tty */
410 if (e == EXEC_OUTPUT_INHERIT &&
411 o == EXEC_OUTPUT_INHERIT &&
412 i == EXEC_INPUT_NULL &&
413 !is_terminal_input(context->std_input) &&
417 /* Duplicate from stdout if possible */
418 if (e == o || e == EXEC_OUTPUT_INHERIT)
419 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
423 } else if (o == EXEC_OUTPUT_INHERIT) {
424 /* If input got downgraded, inherit the original value */
425 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
426 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
428 /* If the input is connected to anything that's not a /dev/null, inherit that... */
429 if (i != EXEC_INPUT_NULL)
430 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
432 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
436 /* We need to open /dev/null here anew, to get the right access mode. */
437 return open_null_as(O_WRONLY, fileno);
442 case EXEC_OUTPUT_NULL:
443 return open_null_as(O_WRONLY, fileno);
445 case EXEC_OUTPUT_TTY:
446 if (is_terminal_input(i))
447 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
449 /* We don't reset the terminal if this is just about output */
450 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
452 case EXEC_OUTPUT_SYSLOG:
453 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
454 case EXEC_OUTPUT_KMSG:
455 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
456 case EXEC_OUTPUT_JOURNAL:
457 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
458 r = connect_logger_as(context, o, ident, unit_id, fileno, uid, gid);
460 log_unit_struct(unit_id,
462 LOG_MESSAGE("Failed to connect %s of %s to the journal socket: %s",
463 fileno == STDOUT_FILENO ? "stdout" : "stderr",
464 unit_id, strerror(-r)),
467 r = open_null_as(O_WRONLY, fileno);
471 case EXEC_OUTPUT_SOCKET:
472 assert(socket_fd >= 0);
473 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
476 assert_not_reached("Unknown error type");
480 static int chown_terminal(int fd, uid_t uid) {
485 /* This might fail. What matters are the results. */
486 (void) fchown(fd, uid, -1);
487 (void) fchmod(fd, TTY_MODE);
489 if (fstat(fd, &st) < 0)
492 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
498 static int setup_confirm_stdio(int *_saved_stdin,
499 int *_saved_stdout) {
500 int fd = -1, saved_stdin, saved_stdout = -1, r;
502 assert(_saved_stdin);
503 assert(_saved_stdout);
505 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
509 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
510 if (saved_stdout < 0) {
515 fd = acquire_terminal(
520 DEFAULT_CONFIRM_USEC);
526 r = chown_terminal(fd, getuid());
530 if (dup2(fd, STDIN_FILENO) < 0) {
535 if (dup2(fd, STDOUT_FILENO) < 0) {
543 *_saved_stdin = saved_stdin;
544 *_saved_stdout = saved_stdout;
549 safe_close(saved_stdout);
550 safe_close(saved_stdin);
556 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
557 _cleanup_close_ int fd = -1;
562 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
566 va_start(ap, format);
567 vdprintf(fd, format, ap);
573 static int restore_confirm_stdio(int *saved_stdin,
579 assert(saved_stdout);
583 if (*saved_stdin >= 0)
584 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
587 if (*saved_stdout >= 0)
588 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
591 safe_close(*saved_stdin);
592 safe_close(*saved_stdout);
597 static int ask_for_confirmation(char *response, char **argv) {
598 int saved_stdout = -1, saved_stdin = -1, r;
599 _cleanup_free_ char *line = NULL;
601 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
605 line = exec_command_line(argv);
609 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
611 restore_confirm_stdio(&saved_stdin, &saved_stdout);
616 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
617 bool keep_groups = false;
622 /* Lookup and set GID and supplementary group list. Here too
623 * we avoid NSS lookups for gid=0. */
625 if (context->group || username) {
627 if (context->group) {
628 const char *g = context->group;
630 if ((r = get_group_creds(&g, &gid)) < 0)
634 /* First step, initialize groups from /etc/groups */
635 if (username && gid != 0) {
636 if (initgroups(username, gid) < 0)
642 /* Second step, set our gids */
643 if (setresgid(gid, gid, gid) < 0)
647 if (context->supplementary_groups) {
652 /* Final step, initialize any manually set supplementary groups */
653 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
655 if (!(gids = new(gid_t, ngroups_max)))
659 if ((k = getgroups(ngroups_max, gids)) < 0) {
666 STRV_FOREACH(i, context->supplementary_groups) {
669 if (k >= ngroups_max) {
675 r = get_group_creds(&g, gids+k);
684 if (setgroups(k, gids) < 0) {
695 static int enforce_user(const ExecContext *context, uid_t uid) {
698 /* Sets (but doesn't lookup) the uid and make sure we keep the
699 * capabilities while doing so. */
701 if (context->capabilities) {
702 _cleanup_cap_free_ cap_t d = NULL;
703 static const cap_value_t bits[] = {
704 CAP_SETUID, /* Necessary so that we can run setresuid() below */
705 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
708 /* First step: If we need to keep capabilities but
709 * drop privileges we need to make sure we keep our
710 * caps, while we drop privileges. */
712 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
714 if (prctl(PR_GET_SECUREBITS) != sb)
715 if (prctl(PR_SET_SECUREBITS, sb) < 0)
719 /* Second step: set the capabilities. This will reduce
720 * the capabilities to the minimum we need. */
722 d = cap_dup(context->capabilities);
726 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
727 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
730 if (cap_set_proc(d) < 0)
734 /* Third step: actually set the uids */
735 if (setresuid(uid, uid, uid) < 0)
738 /* At this point we should have all necessary capabilities but
739 are otherwise a normal user. However, the caps might got
740 corrupted due to the setresuid() so we need clean them up
741 later. This is done outside of this call. */
748 static int null_conv(
750 const struct pam_message **msg,
751 struct pam_response **resp,
754 /* We don't support conversations */
759 static int setup_pam(
765 int fds[], unsigned n_fds) {
767 static const struct pam_conv conv = {
772 pam_handle_t *handle = NULL;
774 int pam_code = PAM_SUCCESS;
777 bool close_session = false;
778 pid_t pam_pid = 0, parent_pid;
785 /* We set up PAM in the parent process, then fork. The child
786 * will then stay around until killed via PR_GET_PDEATHSIG or
787 * systemd via the cgroup logic. It will then remove the PAM
788 * session again. The parent process will exec() the actual
789 * daemon. We do things this way to ensure that the main PID
790 * of the daemon is the one we initially fork()ed. */
792 if (log_get_max_level() < LOG_DEBUG)
795 pam_code = pam_start(name, user, &conv, &handle);
796 if (pam_code != PAM_SUCCESS) {
802 pam_code = pam_set_item(handle, PAM_TTY, tty);
803 if (pam_code != PAM_SUCCESS)
807 pam_code = pam_acct_mgmt(handle, flags);
808 if (pam_code != PAM_SUCCESS)
811 pam_code = pam_open_session(handle, flags);
812 if (pam_code != PAM_SUCCESS)
815 close_session = true;
817 e = pam_getenvlist(handle);
819 pam_code = PAM_BUF_ERR;
823 /* Block SIGTERM, so that we know that it won't get lost in
825 if (sigemptyset(&ss) < 0 ||
826 sigaddset(&ss, SIGTERM) < 0 ||
827 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
830 parent_pid = getpid();
840 /* The child's job is to reset the PAM session on
843 /* This string must fit in 10 chars (i.e. the length
844 * of "/sbin/init"), to look pretty in /bin/ps */
845 rename_process("(sd-pam)");
847 /* Make sure we don't keep open the passed fds in this
848 child. We assume that otherwise only those fds are
849 open here that have been opened by PAM. */
850 close_many(fds, n_fds);
852 /* Drop privileges - we don't need any to pam_close_session
853 * and this will make PR_SET_PDEATHSIG work in most cases.
854 * If this fails, ignore the error - but expect sd-pam threads
855 * to fail to exit normally */
856 if (setresuid(uid, uid, uid) < 0)
857 log_error_errno(r, "Error: Failed to setresuid() in sd-pam: %m");
859 /* Wait until our parent died. This will only work if
860 * the above setresuid() succeeds, otherwise the kernel
861 * will not allow unprivileged parents kill their privileged
862 * children this way. We rely on the control groups kill logic
863 * to do the rest for us. */
864 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
867 /* Check if our parent process might already have
869 if (getppid() == parent_pid) {
871 if (sigwait(&ss, &sig) < 0) {
878 assert(sig == SIGTERM);
883 /* If our parent died we'll end the session */
884 if (getppid() != parent_pid) {
885 pam_code = pam_close_session(handle, flags);
886 if (pam_code != PAM_SUCCESS)
893 pam_end(handle, pam_code | flags);
897 /* If the child was forked off successfully it will do all the
898 * cleanups, so forget about the handle here. */
901 /* Unblock SIGTERM again in the parent */
902 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
905 /* We close the log explicitly here, since the PAM modules
906 * might have opened it, but we don't want this fd around. */
915 if (pam_code != PAM_SUCCESS) {
916 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
917 err = -EPERM; /* PAM errors do not map to errno */
919 log_error_errno(errno, "PAM failed: %m");
925 pam_code = pam_close_session(handle, flags);
927 pam_end(handle, pam_code | flags);
935 kill(pam_pid, SIGTERM);
936 kill(pam_pid, SIGCONT);
943 static void rename_process_from_path(const char *path) {
944 char process_name[11];
948 /* This resulting string must fit in 10 chars (i.e. the length
949 * of "/sbin/init") to look pretty in /bin/ps */
953 rename_process("(...)");
959 /* The end of the process name is usually more
960 * interesting, since the first bit might just be
966 process_name[0] = '(';
967 memcpy(process_name+1, p, l);
968 process_name[1+l] = ')';
969 process_name[1+l+1] = 0;
971 rename_process(process_name);
976 static int apply_seccomp(const ExecContext *c) {
977 uint32_t negative_action, action;
978 scmp_filter_ctx *seccomp;
985 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
987 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
991 if (c->syscall_archs) {
993 SET_FOREACH(id, c->syscall_archs, i) {
994 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
1002 r = seccomp_add_secondary_archs(seccomp);
1007 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
1008 SET_FOREACH(id, c->syscall_filter, i) {
1009 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
1014 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1018 r = seccomp_load(seccomp);
1021 seccomp_release(seccomp);
1025 static int apply_address_families(const ExecContext *c) {
1026 scmp_filter_ctx *seccomp;
1032 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1036 r = seccomp_add_secondary_archs(seccomp);
1040 if (c->address_families_whitelist) {
1041 int af, first = 0, last = 0;
1044 /* If this is a whitelist, we first block the address
1045 * families that are out of range and then everything
1046 * that is not in the set. First, we find the lowest
1047 * and highest address family in the set. */
1049 SET_FOREACH(afp, c->address_families, i) {
1050 af = PTR_TO_INT(afp);
1052 if (af <= 0 || af >= af_max())
1055 if (first == 0 || af < first)
1058 if (last == 0 || af > last)
1062 assert((first == 0) == (last == 0));
1066 /* No entries in the valid range, block everything */
1067 r = seccomp_rule_add(
1069 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1077 /* Block everything below the first entry */
1078 r = seccomp_rule_add(
1080 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1083 SCMP_A0(SCMP_CMP_LT, first));
1087 /* Block everything above the last entry */
1088 r = seccomp_rule_add(
1090 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1093 SCMP_A0(SCMP_CMP_GT, last));
1097 /* Block everything between the first and last
1099 for (af = 1; af < af_max(); af++) {
1101 if (set_contains(c->address_families, INT_TO_PTR(af)))
1104 r = seccomp_rule_add(
1106 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1109 SCMP_A0(SCMP_CMP_EQ, af));
1118 /* If this is a blacklist, then generate one rule for
1119 * each address family that are then combined in OR
1122 SET_FOREACH(af, c->address_families, i) {
1124 r = seccomp_rule_add(
1126 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1129 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1135 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1139 r = seccomp_load(seccomp);
1142 seccomp_release(seccomp);
1148 static void do_idle_pipe_dance(int idle_pipe[4]) {
1152 safe_close(idle_pipe[1]);
1153 safe_close(idle_pipe[2]);
1155 if (idle_pipe[0] >= 0) {
1158 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1160 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1161 /* Signal systemd that we are bored and want to continue. */
1162 r = write(idle_pipe[3], "x", 1);
1164 /* Wait for systemd to react to the signal above. */
1165 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1168 safe_close(idle_pipe[0]);
1172 safe_close(idle_pipe[3]);
1175 static int build_environment(
1176 const ExecContext *c,
1178 usec_t watchdog_usec,
1180 const char *username,
1184 _cleanup_strv_free_ char **our_env = NULL;
1191 our_env = new0(char*, 10);
1196 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1198 our_env[n_env++] = x;
1200 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1202 our_env[n_env++] = x;
1205 if (watchdog_usec > 0) {
1206 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1208 our_env[n_env++] = x;
1210 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1212 our_env[n_env++] = x;
1216 x = strappend("HOME=", home);
1219 our_env[n_env++] = x;
1223 x = strappend("LOGNAME=", username);
1226 our_env[n_env++] = x;
1228 x = strappend("USER=", username);
1231 our_env[n_env++] = x;
1235 x = strappend("SHELL=", shell);
1238 our_env[n_env++] = x;
1241 if (is_terminal_input(c->std_input) ||
1242 c->std_output == EXEC_OUTPUT_TTY ||
1243 c->std_error == EXEC_OUTPUT_TTY ||
1246 x = strdup(default_term_for_tty(tty_path(c)));
1249 our_env[n_env++] = x;
1252 our_env[n_env++] = NULL;
1253 assert(n_env <= 10);
1261 static int exec_child(
1262 ExecCommand *command,
1263 const ExecContext *context,
1264 const ExecParameters *params,
1265 ExecRuntime *runtime,
1268 int *fds, unsigned n_fds,
1272 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1273 _cleanup_free_ char *mac_selinux_context_net = NULL;
1274 const char *username = NULL, *home = NULL, *shell = NULL;
1275 unsigned n_dont_close = 0;
1276 int dont_close[n_fds + 4];
1277 uid_t uid = UID_INVALID;
1278 gid_t gid = GID_INVALID;
1284 assert(exit_status);
1286 rename_process_from_path(command->path);
1288 /* We reset exactly these signals, since they are the
1289 * only ones we set to SIG_IGN in the main daemon. All
1290 * others we leave untouched because we set them to
1291 * SIG_DFL or a valid handler initially, both of which
1292 * will be demoted to SIG_DFL. */
1293 default_signals(SIGNALS_CRASH_HANDLER,
1294 SIGNALS_IGNORE, -1);
1296 if (context->ignore_sigpipe)
1297 ignore_signals(SIGPIPE, -1);
1299 r = reset_signal_mask();
1301 *exit_status = EXIT_SIGNAL_MASK;
1305 if (params->idle_pipe)
1306 do_idle_pipe_dance(params->idle_pipe);
1308 /* Close sockets very early to make sure we don't
1309 * block init reexecution because it cannot bind its
1315 dont_close[n_dont_close++] = socket_fd;
1317 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1318 n_dont_close += n_fds;
1320 if (params->bus_endpoint_fd >= 0)
1321 dont_close[n_dont_close++] = params->bus_endpoint_fd;
1323 if (runtime->netns_storage_socket[0] >= 0)
1324 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1325 if (runtime->netns_storage_socket[1] >= 0)
1326 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1329 r = close_all_fds(dont_close, n_dont_close);
1331 *exit_status = EXIT_FDS;
1335 if (!context->same_pgrp)
1337 *exit_status = EXIT_SETSID;
1341 exec_context_tty_reset(context);
1343 if (params->confirm_spawn) {
1346 r = ask_for_confirmation(&response, argv);
1347 if (r == -ETIMEDOUT)
1348 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1350 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-r));
1351 else if (response == 's') {
1352 write_confirm_message("Skipping execution.\n");
1353 *exit_status = EXIT_CONFIRM;
1355 } else if (response == 'n') {
1356 write_confirm_message("Failing execution.\n");
1362 if (context->user) {
1363 username = context->user;
1364 r = get_user_creds(&username, &uid, &gid, &home, &shell);
1366 *exit_status = EXIT_USER;
1371 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1372 * must sure to drop O_NONBLOCK */
1374 fd_nonblock(socket_fd, false);
1376 r = setup_input(context, socket_fd, params->apply_tty_stdin);
1378 *exit_status = EXIT_STDIN;
1382 r = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1384 *exit_status = EXIT_STDOUT;
1388 r = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin, uid, gid);
1390 *exit_status = EXIT_STDERR;
1394 if (params->cgroup_path) {
1395 r = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0, NULL, NULL);
1397 *exit_status = EXIT_CGROUP;
1402 if (context->oom_score_adjust_set) {
1403 char t[DECIMAL_STR_MAX(context->oom_score_adjust)];
1405 /* When we can't make this change due to EPERM, then
1406 * let's silently skip over it. User namespaces
1407 * prohibit write access to this file, and we
1408 * shouldn't trip up over that. */
1410 sprintf(t, "%i", context->oom_score_adjust);
1411 r = write_string_file("/proc/self/oom_score_adj", t);
1412 if (r == -EPERM || r == -EACCES) {
1414 log_unit_debug_errno(params->unit_id, r, "Failed to adjust OOM setting, assuming containerized execution, ignoring: %m");
1417 *exit_status = EXIT_OOM_ADJUST;
1422 if (context->nice_set)
1423 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1424 *exit_status = EXIT_NICE;
1428 if (context->cpu_sched_set) {
1429 struct sched_param param = {
1430 .sched_priority = context->cpu_sched_priority,
1433 r = sched_setscheduler(0,
1434 context->cpu_sched_policy |
1435 (context->cpu_sched_reset_on_fork ?
1436 SCHED_RESET_ON_FORK : 0),
1439 *exit_status = EXIT_SETSCHEDULER;
1444 if (context->cpuset)
1445 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1446 *exit_status = EXIT_CPUAFFINITY;
1450 if (context->ioprio_set)
1451 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1452 *exit_status = EXIT_IOPRIO;
1456 if (context->timer_slack_nsec != NSEC_INFINITY)
1457 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1458 *exit_status = EXIT_TIMERSLACK;
1462 if (context->personality != 0xffffffffUL)
1463 if (personality(context->personality) < 0) {
1464 *exit_status = EXIT_PERSONALITY;
1468 if (context->utmp_id)
1469 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1471 if (context->user && is_terminal_input(context->std_input)) {
1472 r = chown_terminal(STDIN_FILENO, uid);
1474 *exit_status = EXIT_STDIN;
1480 if (params->bus_endpoint_fd >= 0 && context->bus_endpoint) {
1481 uid_t ep_uid = (uid == UID_INVALID) ? 0 : uid;
1483 r = bus_kernel_set_endpoint_policy(params->bus_endpoint_fd, ep_uid, context->bus_endpoint);
1485 *exit_status = EXIT_BUS_ENDPOINT;
1491 /* If delegation is enabled we'll pass ownership of the cgroup
1492 * (but only in systemd's own controller hierarchy!) to the
1493 * user of the new process. */
1494 if (params->cgroup_path && context->user && params->cgroup_delegate) {
1495 r = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1497 *exit_status = EXIT_CGROUP;
1502 r = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1504 *exit_status = EXIT_CGROUP;
1509 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1512 STRV_FOREACH(rt, context->runtime_directory) {
1513 _cleanup_free_ char *p;
1515 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1517 *exit_status = EXIT_RUNTIME_DIRECTORY;
1521 r = mkdir_safe_label(p, context->runtime_directory_mode, uid, gid);
1523 *exit_status = EXIT_RUNTIME_DIRECTORY;
1529 if (params->apply_permissions) {
1530 r = enforce_groups(context, username, gid);
1532 *exit_status = EXIT_GROUP;
1537 umask(context->umask);
1540 if (params->apply_permissions && context->pam_name && username) {
1541 r = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1543 *exit_status = EXIT_PAM;
1549 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1550 r = setup_netns(runtime->netns_storage_socket);
1552 *exit_status = EXIT_NETWORK;
1557 if (!strv_isempty(context->read_write_dirs) ||
1558 !strv_isempty(context->read_only_dirs) ||
1559 !strv_isempty(context->inaccessible_dirs) ||
1560 context->mount_flags != 0 ||
1561 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1562 params->bus_endpoint_path ||
1563 context->private_devices ||
1564 context->protect_system != PROTECT_SYSTEM_NO ||
1565 context->protect_home != PROTECT_HOME_NO) {
1567 char *tmp = NULL, *var = NULL;
1569 /* The runtime struct only contains the parent
1570 * of the private /tmp, which is
1571 * non-accessible to world users. Inside of it
1572 * there's a /tmp that is sticky, and that's
1573 * the one we want to use here. */
1575 if (context->private_tmp && runtime) {
1576 if (runtime->tmp_dir)
1577 tmp = strjoina(runtime->tmp_dir, "/tmp");
1578 if (runtime->var_tmp_dir)
1579 var = strjoina(runtime->var_tmp_dir, "/tmp");
1582 r = setup_namespace(
1583 context->read_write_dirs,
1584 context->read_only_dirs,
1585 context->inaccessible_dirs,
1588 params->bus_endpoint_path,
1589 context->private_devices,
1590 context->protect_home,
1591 context->protect_system,
1592 context->mount_flags);
1594 /* If we couldn't set up the namespace this is
1595 * probably due to a missing capability. In this case,
1596 * silently proceeed. */
1597 if (r == -EPERM || r == -EACCES) {
1599 log_unit_debug_errno(params->unit_id, r, "Failed to set up namespace, assuming containerized execution, ignoring: %m");
1602 *exit_status = EXIT_NAMESPACE;
1607 if (params->apply_chroot) {
1608 if (context->root_directory)
1609 if (chroot(context->root_directory) < 0) {
1610 *exit_status = EXIT_CHROOT;
1614 if (chdir(context->working_directory ?: "/") < 0 &&
1615 !context->working_directory_missing_ok) {
1616 *exit_status = EXIT_CHDIR;
1620 _cleanup_free_ char *d = NULL;
1622 if (asprintf(&d, "%s/%s",
1623 context->root_directory ?: "",
1624 context->working_directory ?: "") < 0) {
1625 *exit_status = EXIT_MEMORY;
1630 !context->working_directory_missing_ok) {
1631 *exit_status = EXIT_CHDIR;
1637 if (params->apply_permissions && mac_selinux_use() && params->selinux_context_net && socket_fd >= 0) {
1638 r = mac_selinux_get_child_mls_label(socket_fd, command->path, context->selinux_context, &mac_selinux_context_net);
1640 *exit_status = EXIT_SELINUX_CONTEXT;
1646 /* We repeat the fd closing here, to make sure that
1647 * nothing is leaked from the PAM modules. Note that
1648 * we are more aggressive this time since socket_fd
1649 * and the netns fds we don't need anymore. The custom
1650 * endpoint fd was needed to upload the policy and can
1651 * now be closed as well. */
1652 r = close_all_fds(fds, n_fds);
1654 r = shift_fds(fds, n_fds);
1656 r = flags_fds(fds, n_fds, context->non_blocking);
1658 *exit_status = EXIT_FDS;
1662 if (params->apply_permissions) {
1664 for (i = 0; i < _RLIMIT_MAX; i++) {
1665 if (!context->rlimit[i])
1668 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1669 *exit_status = EXIT_LIMITS;
1674 if (context->capability_bounding_set_drop) {
1675 r = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1677 *exit_status = EXIT_CAPABILITIES;
1683 if (context->smack_process_label) {
1684 r = mac_smack_apply_pid(0, context->smack_process_label);
1686 *exit_status = EXIT_SMACK_PROCESS_LABEL;
1692 if (context->user) {
1693 r = enforce_user(context, uid);
1695 *exit_status = EXIT_USER;
1700 /* PR_GET_SECUREBITS is not privileged, while
1701 * PR_SET_SECUREBITS is. So to suppress
1702 * potential EPERMs we'll try not to call
1703 * PR_SET_SECUREBITS unless necessary. */
1704 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1705 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1706 *exit_status = EXIT_SECUREBITS;
1710 if (context->capabilities)
1711 if (cap_set_proc(context->capabilities) < 0) {
1712 *exit_status = EXIT_CAPABILITIES;
1716 if (context->no_new_privileges)
1717 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1718 *exit_status = EXIT_NO_NEW_PRIVILEGES;
1723 if (context->address_families_whitelist ||
1724 !set_isempty(context->address_families)) {
1725 r = apply_address_families(context);
1727 *exit_status = EXIT_ADDRESS_FAMILIES;
1732 if (context->syscall_whitelist ||
1733 !set_isempty(context->syscall_filter) ||
1734 !set_isempty(context->syscall_archs)) {
1735 r = apply_seccomp(context);
1737 *exit_status = EXIT_SECCOMP;
1744 if (mac_selinux_use()) {
1745 char *exec_context = mac_selinux_context_net ?: context->selinux_context;
1748 r = setexeccon(exec_context);
1750 *exit_status = EXIT_SELINUX_CONTEXT;
1757 #ifdef HAVE_APPARMOR
1758 if (context->apparmor_profile && mac_apparmor_use()) {
1759 r = aa_change_onexec(context->apparmor_profile);
1760 if (r < 0 && !context->apparmor_profile_ignore) {
1761 *exit_status = EXIT_APPARMOR_PROFILE;
1768 r = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1770 *exit_status = EXIT_MEMORY;
1774 final_env = strv_env_merge(5,
1775 params->environment,
1777 context->environment,
1782 *exit_status = EXIT_MEMORY;
1786 final_argv = replace_env_argv(argv, final_env);
1788 *exit_status = EXIT_MEMORY;
1792 final_env = strv_env_clean(final_env);
1794 if (_unlikely_(log_get_max_level() >= LOG_DEBUG)) {
1795 _cleanup_free_ char *line;
1797 line = exec_command_line(final_argv);
1800 log_unit_struct(params->unit_id,
1802 "EXECUTABLE=%s", command->path,
1803 LOG_MESSAGE("Executing: %s", line),
1808 execve(command->path, final_argv, final_env);
1809 *exit_status = EXIT_EXEC;
1813 int exec_spawn(ExecCommand *command,
1814 const ExecContext *context,
1815 const ExecParameters *params,
1816 ExecRuntime *runtime,
1819 _cleanup_strv_free_ char **files_env = NULL;
1820 int *fds = NULL; unsigned n_fds = 0;
1821 _cleanup_free_ char *line = NULL;
1830 assert(params->fds || params->n_fds <= 0);
1832 if (context->std_input == EXEC_INPUT_SOCKET ||
1833 context->std_output == EXEC_OUTPUT_SOCKET ||
1834 context->std_error == EXEC_OUTPUT_SOCKET) {
1836 if (params->n_fds != 1) {
1837 log_unit_error(params->unit_id, "Got more than one socket.");
1841 socket_fd = params->fds[0];
1845 n_fds = params->n_fds;
1848 r = exec_context_load_environment(context, params->unit_id, &files_env);
1850 return log_unit_error_errno(params->unit_id, r, "Failed to load environment files: %m");
1852 argv = params->argv ?: command->argv;
1853 line = exec_command_line(argv);
1857 log_unit_struct(params->unit_id,
1859 "EXECUTABLE=%s", command->path,
1860 LOG_MESSAGE("About to execute: %s", line),
1864 return log_unit_error_errno(params->unit_id, r, "Failed to fork: %m");
1869 r = exec_child(command,
1880 log_unit_struct(params->unit_id,
1882 LOG_MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1883 "EXECUTABLE=%s", command->path,
1884 LOG_MESSAGE("Failed at step %s spawning %s: %s",
1885 exit_status_to_string(exit_status, EXIT_STATUS_SYSTEMD),
1886 command->path, strerror(-r)),
1894 log_unit_debug(params->unit_id, "Forked %s as "PID_FMT, command->path, pid);
1896 /* We add the new process to the cgroup both in the child (so
1897 * that we can be sure that no user code is ever executed
1898 * outside of the cgroup) and in the parent (so that we can be
1899 * sure that when we kill the cgroup the process will be
1901 if (params->cgroup_path)
1902 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1904 exec_status_start(&command->exec_status, pid);
1910 void exec_context_init(ExecContext *c) {
1914 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1915 c->cpu_sched_policy = SCHED_OTHER;
1916 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1917 c->syslog_level_prefix = true;
1918 c->ignore_sigpipe = true;
1919 c->timer_slack_nsec = NSEC_INFINITY;
1920 c->personality = 0xffffffffUL;
1921 c->runtime_directory_mode = 0755;
1924 void exec_context_done(ExecContext *c) {
1929 strv_free(c->environment);
1930 c->environment = NULL;
1932 strv_free(c->environment_files);
1933 c->environment_files = NULL;
1935 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1937 c->rlimit[l] = NULL;
1940 free(c->working_directory);
1941 c->working_directory = NULL;
1942 free(c->root_directory);
1943 c->root_directory = NULL;
1948 free(c->syslog_identifier);
1949 c->syslog_identifier = NULL;
1957 strv_free(c->supplementary_groups);
1958 c->supplementary_groups = NULL;
1963 if (c->capabilities) {
1964 cap_free(c->capabilities);
1965 c->capabilities = NULL;
1968 strv_free(c->read_only_dirs);
1969 c->read_only_dirs = NULL;
1971 strv_free(c->read_write_dirs);
1972 c->read_write_dirs = NULL;
1974 strv_free(c->inaccessible_dirs);
1975 c->inaccessible_dirs = NULL;
1978 CPU_FREE(c->cpuset);
1983 free(c->selinux_context);
1984 c->selinux_context = NULL;
1986 free(c->apparmor_profile);
1987 c->apparmor_profile = NULL;
1989 set_free(c->syscall_filter);
1990 c->syscall_filter = NULL;
1992 set_free(c->syscall_archs);
1993 c->syscall_archs = NULL;
1995 set_free(c->address_families);
1996 c->address_families = NULL;
1998 strv_free(c->runtime_directory);
1999 c->runtime_directory = NULL;
2001 bus_endpoint_free(c->bus_endpoint);
2002 c->bus_endpoint = NULL;
2005 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
2010 if (!runtime_prefix)
2013 STRV_FOREACH(i, c->runtime_directory) {
2014 _cleanup_free_ char *p;
2016 p = strjoin(runtime_prefix, "/", *i, NULL);
2020 /* We execute this synchronously, since we need to be
2021 * sure this is gone when we start the service
2023 rm_rf(p, false, true, false);
2029 void exec_command_done(ExecCommand *c) {
2039 void exec_command_done_array(ExecCommand *c, unsigned n) {
2042 for (i = 0; i < n; i++)
2043 exec_command_done(c+i);
2046 ExecCommand* exec_command_free_list(ExecCommand *c) {
2050 LIST_REMOVE(command, c, i);
2051 exec_command_done(i);
2058 void exec_command_free_array(ExecCommand **c, unsigned n) {
2061 for (i = 0; i < n; i++)
2062 c[i] = exec_command_free_list(c[i]);
2065 typedef struct InvalidEnvInfo {
2066 const char *unit_id;
2070 static void invalid_env(const char *p, void *userdata) {
2071 InvalidEnvInfo *info = userdata;
2073 log_unit_error(info->unit_id, "Ignoring invalid environment assignment '%s': %s", p, info->path);
2076 int exec_context_load_environment(const ExecContext *c, const char *unit_id, char ***l) {
2077 char **i, **r = NULL;
2082 STRV_FOREACH(i, c->environment_files) {
2085 bool ignore = false;
2087 _cleanup_globfree_ glob_t pglob = {};
2097 if (!path_is_absolute(fn)) {
2105 /* Filename supports globbing, take all matching files */
2107 if (glob(fn, 0, NULL, &pglob) != 0) {
2112 return errno ? -errno : -EINVAL;
2114 count = pglob.gl_pathc;
2122 for (n = 0; n < count; n++) {
2123 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2131 /* Log invalid environment variables with filename */
2133 InvalidEnvInfo info = {
2135 .path = pglob.gl_pathv[n]
2138 p = strv_env_clean_with_callback(p, invalid_env, &info);
2146 m = strv_env_merge(2, r, p);
2162 static bool tty_may_match_dev_console(const char *tty) {
2163 _cleanup_free_ char *active = NULL;
2166 if (startswith(tty, "/dev/"))
2169 /* trivial identity? */
2170 if (streq(tty, "console"))
2173 console = resolve_dev_console(&active);
2174 /* if we could not resolve, assume it may */
2178 /* "tty0" means the active VC, so it may be the same sometimes */
2179 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2182 bool exec_context_may_touch_console(ExecContext *ec) {
2183 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2184 is_terminal_input(ec->std_input) ||
2185 is_terminal_output(ec->std_output) ||
2186 is_terminal_output(ec->std_error)) &&
2187 tty_may_match_dev_console(tty_path(ec));
2190 static void strv_fprintf(FILE *f, char **l) {
2196 fprintf(f, " %s", *g);
2199 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2206 prefix = strempty(prefix);
2210 "%sWorkingDirectory: %s\n"
2211 "%sRootDirectory: %s\n"
2212 "%sNonBlocking: %s\n"
2213 "%sPrivateTmp: %s\n"
2214 "%sPrivateNetwork: %s\n"
2215 "%sPrivateDevices: %s\n"
2216 "%sProtectHome: %s\n"
2217 "%sProtectSystem: %s\n"
2218 "%sIgnoreSIGPIPE: %s\n",
2220 prefix, c->working_directory ? c->working_directory : "/",
2221 prefix, c->root_directory ? c->root_directory : "/",
2222 prefix, yes_no(c->non_blocking),
2223 prefix, yes_no(c->private_tmp),
2224 prefix, yes_no(c->private_network),
2225 prefix, yes_no(c->private_devices),
2226 prefix, protect_home_to_string(c->protect_home),
2227 prefix, protect_system_to_string(c->protect_system),
2228 prefix, yes_no(c->ignore_sigpipe));
2230 STRV_FOREACH(e, c->environment)
2231 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2233 STRV_FOREACH(e, c->environment_files)
2234 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2241 if (c->oom_score_adjust_set)
2243 "%sOOMScoreAdjust: %i\n",
2244 prefix, c->oom_score_adjust);
2246 for (i = 0; i < RLIM_NLIMITS; i++)
2248 fprintf(f, "%s%s: "RLIM_FMT"\n",
2249 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2251 if (c->ioprio_set) {
2252 _cleanup_free_ char *class_str = NULL;
2254 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2256 "%sIOSchedulingClass: %s\n"
2257 "%sIOPriority: %i\n",
2258 prefix, strna(class_str),
2259 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2262 if (c->cpu_sched_set) {
2263 _cleanup_free_ char *policy_str = NULL;
2265 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2267 "%sCPUSchedulingPolicy: %s\n"
2268 "%sCPUSchedulingPriority: %i\n"
2269 "%sCPUSchedulingResetOnFork: %s\n",
2270 prefix, strna(policy_str),
2271 prefix, c->cpu_sched_priority,
2272 prefix, yes_no(c->cpu_sched_reset_on_fork));
2276 fprintf(f, "%sCPUAffinity:", prefix);
2277 for (i = 0; i < c->cpuset_ncpus; i++)
2278 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2279 fprintf(f, " %u", i);
2283 if (c->timer_slack_nsec != NSEC_INFINITY)
2284 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2287 "%sStandardInput: %s\n"
2288 "%sStandardOutput: %s\n"
2289 "%sStandardError: %s\n",
2290 prefix, exec_input_to_string(c->std_input),
2291 prefix, exec_output_to_string(c->std_output),
2292 prefix, exec_output_to_string(c->std_error));
2298 "%sTTYVHangup: %s\n"
2299 "%sTTYVTDisallocate: %s\n",
2300 prefix, c->tty_path,
2301 prefix, yes_no(c->tty_reset),
2302 prefix, yes_no(c->tty_vhangup),
2303 prefix, yes_no(c->tty_vt_disallocate));
2305 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2306 c->std_output == EXEC_OUTPUT_KMSG ||
2307 c->std_output == EXEC_OUTPUT_JOURNAL ||
2308 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2309 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2310 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2311 c->std_error == EXEC_OUTPUT_SYSLOG ||
2312 c->std_error == EXEC_OUTPUT_KMSG ||
2313 c->std_error == EXEC_OUTPUT_JOURNAL ||
2314 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2315 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2316 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2318 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2320 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2321 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2324 "%sSyslogFacility: %s\n"
2325 "%sSyslogLevel: %s\n",
2326 prefix, strna(fac_str),
2327 prefix, strna(lvl_str));
2330 if (c->capabilities) {
2331 _cleanup_cap_free_charp_ char *t;
2333 t = cap_to_text(c->capabilities, NULL);
2335 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2339 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2341 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2342 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2343 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2344 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2345 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2346 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2348 if (c->capability_bounding_set_drop) {
2350 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2352 for (l = 0; l <= cap_last_cap(); l++)
2353 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l)))
2354 fprintf(f, " %s", strna(capability_to_name(l)));
2360 fprintf(f, "%sUser: %s\n", prefix, c->user);
2362 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2364 if (strv_length(c->supplementary_groups) > 0) {
2365 fprintf(f, "%sSupplementaryGroups:", prefix);
2366 strv_fprintf(f, c->supplementary_groups);
2371 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2373 if (strv_length(c->read_write_dirs) > 0) {
2374 fprintf(f, "%sReadWriteDirs:", prefix);
2375 strv_fprintf(f, c->read_write_dirs);
2379 if (strv_length(c->read_only_dirs) > 0) {
2380 fprintf(f, "%sReadOnlyDirs:", prefix);
2381 strv_fprintf(f, c->read_only_dirs);
2385 if (strv_length(c->inaccessible_dirs) > 0) {
2386 fprintf(f, "%sInaccessibleDirs:", prefix);
2387 strv_fprintf(f, c->inaccessible_dirs);
2393 "%sUtmpIdentifier: %s\n",
2394 prefix, c->utmp_id);
2396 if (c->selinux_context)
2398 "%sSELinuxContext: %s%s\n",
2399 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2401 if (c->personality != 0xffffffffUL)
2403 "%sPersonality: %s\n",
2404 prefix, strna(personality_to_string(c->personality)));
2406 if (c->syscall_filter) {
2414 "%sSystemCallFilter: ",
2417 if (!c->syscall_whitelist)
2421 SET_FOREACH(id, c->syscall_filter, j) {
2422 _cleanup_free_ char *name = NULL;
2429 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2430 fputs(strna(name), f);
2437 if (c->syscall_archs) {
2444 "%sSystemCallArchitectures:",
2448 SET_FOREACH(id, c->syscall_archs, j)
2449 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2454 if (c->syscall_errno != 0)
2456 "%sSystemCallErrorNumber: %s\n",
2457 prefix, strna(errno_to_name(c->syscall_errno)));
2459 if (c->apparmor_profile)
2461 "%sAppArmorProfile: %s%s\n",
2462 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2465 bool exec_context_maintains_privileges(ExecContext *c) {
2468 /* Returns true if the process forked off would run run under
2469 * an unchanged UID or as root. */
2474 if (streq(c->user, "root") || streq(c->user, "0"))
2480 void exec_status_start(ExecStatus *s, pid_t pid) {
2485 dual_timestamp_get(&s->start_timestamp);
2488 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2491 if (s->pid && s->pid != pid)
2495 dual_timestamp_get(&s->exit_timestamp);
2501 if (context->utmp_id)
2502 utmp_put_dead_process(context->utmp_id, pid, code, status);
2504 exec_context_tty_reset(context);
2508 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2509 char buf[FORMAT_TIMESTAMP_MAX];
2517 prefix = strempty(prefix);
2520 "%sPID: "PID_FMT"\n",
2523 if (s->start_timestamp.realtime > 0)
2525 "%sStart Timestamp: %s\n",
2526 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2528 if (s->exit_timestamp.realtime > 0)
2530 "%sExit Timestamp: %s\n"
2532 "%sExit Status: %i\n",
2533 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2534 prefix, sigchld_code_to_string(s->code),
2538 char *exec_command_line(char **argv) {
2546 STRV_FOREACH(a, argv)
2549 if (!(n = new(char, k)))
2553 STRV_FOREACH(a, argv) {
2560 if (strpbrk(*a, WHITESPACE)) {
2571 /* FIXME: this doesn't really handle arguments that have
2572 * spaces and ticks in them */
2577 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2578 _cleanup_free_ char *cmd = NULL;
2579 const char *prefix2;
2584 prefix = strempty(prefix);
2585 prefix2 = strjoina(prefix, "\t");
2587 cmd = exec_command_line(c->argv);
2589 "%sCommand Line: %s\n",
2590 prefix, cmd ? cmd : strerror(ENOMEM));
2592 exec_status_dump(&c->exec_status, f, prefix2);
2595 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2598 prefix = strempty(prefix);
2600 LIST_FOREACH(command, c, c)
2601 exec_command_dump(c, f, prefix);
2604 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2611 /* It's kind of important, that we keep the order here */
2612 LIST_FIND_TAIL(command, *l, end);
2613 LIST_INSERT_AFTER(command, *l, end, e);
2618 int exec_command_set(ExecCommand *c, const char *path, ...) {
2626 l = strv_new_ap(path, ap);
2647 int exec_command_append(ExecCommand *c, const char *path, ...) {
2648 _cleanup_strv_free_ char **l = NULL;
2656 l = strv_new_ap(path, ap);
2662 r = strv_extend_strv(&c->argv, l);
2670 static int exec_runtime_allocate(ExecRuntime **rt) {
2675 *rt = new0(ExecRuntime, 1);
2680 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2685 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2695 if (!c->private_network && !c->private_tmp)
2698 r = exec_runtime_allocate(rt);
2702 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2703 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2707 if (c->private_tmp && !(*rt)->tmp_dir) {
2708 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2716 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2718 assert(r->n_ref > 0);
2724 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2729 assert(r->n_ref > 0);
2732 if (r->n_ref <= 0) {
2734 free(r->var_tmp_dir);
2735 safe_close_pair(r->netns_storage_socket);
2742 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2751 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2753 if (rt->var_tmp_dir)
2754 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2756 if (rt->netns_storage_socket[0] >= 0) {
2759 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2763 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2766 if (rt->netns_storage_socket[1] >= 0) {
2769 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2773 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2779 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2786 if (streq(key, "tmp-dir")) {
2789 r = exec_runtime_allocate(rt);
2793 copy = strdup(value);
2797 free((*rt)->tmp_dir);
2798 (*rt)->tmp_dir = copy;
2800 } else if (streq(key, "var-tmp-dir")) {
2803 r = exec_runtime_allocate(rt);
2807 copy = strdup(value);
2811 free((*rt)->var_tmp_dir);
2812 (*rt)->var_tmp_dir = copy;
2814 } else if (streq(key, "netns-socket-0")) {
2817 r = exec_runtime_allocate(rt);
2821 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2822 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2824 safe_close((*rt)->netns_storage_socket[0]);
2825 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2827 } else if (streq(key, "netns-socket-1")) {
2830 r = exec_runtime_allocate(rt);
2834 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2835 log_unit_debug(u->id, "Failed to parse netns socket value %s", value);
2837 safe_close((*rt)->netns_storage_socket[1]);
2838 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2846 static void *remove_tmpdir_thread(void *p) {
2847 _cleanup_free_ char *path = p;
2849 rm_rf_dangerous(path, false, true, false);
2853 void exec_runtime_destroy(ExecRuntime *rt) {
2859 /* If there are multiple users of this, let's leave the stuff around */
2864 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2866 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2868 log_warning_errno(r, "Failed to nuke %s: %m", rt->tmp_dir);
2875 if (rt->var_tmp_dir) {
2876 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2878 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2880 log_warning_errno(r, "Failed to nuke %s: %m", rt->var_tmp_dir);
2881 free(rt->var_tmp_dir);
2884 rt->var_tmp_dir = NULL;
2887 safe_close_pair(rt->netns_storage_socket);
2890 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2891 [EXEC_INPUT_NULL] = "null",
2892 [EXEC_INPUT_TTY] = "tty",
2893 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2894 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2895 [EXEC_INPUT_SOCKET] = "socket"
2898 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2900 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2901 [EXEC_OUTPUT_INHERIT] = "inherit",
2902 [EXEC_OUTPUT_NULL] = "null",
2903 [EXEC_OUTPUT_TTY] = "tty",
2904 [EXEC_OUTPUT_SYSLOG] = "syslog",
2905 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2906 [EXEC_OUTPUT_KMSG] = "kmsg",
2907 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2908 [EXEC_OUTPUT_JOURNAL] = "journal",
2909 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2910 [EXEC_OUTPUT_SOCKET] = "socket"
2913 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);