1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
29 #include <sys/socket.h>
31 #include <sys/prctl.h>
32 #include <linux/sched.h>
33 #include <sys/types.h>
37 #include <sys/mount.h>
39 #include <linux/oom.h>
42 #include <sys/personality.h>
47 #include <security/pam_appl.h>
51 #include <selinux/selinux.h>
59 #include <sys/apparmor.h>
65 #include "capability.h"
68 #include "sd-messages.h"
70 #include "securebits.h"
71 #include "namespace.h"
72 #include "exit-status.h"
74 #include "utmp-wtmp.h"
76 #include "path-util.h"
81 #include "selinux-util.h"
82 #include "errno-list.h"
85 #include "apparmor-util.h"
88 #include "seccomp-util.h"
91 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
92 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
94 /* This assumes there is a 'tty' group */
97 #define SNDBUF_SIZE (8*1024*1024)
99 static int shift_fds(int fds[], unsigned n_fds) {
100 int start, restart_from;
105 /* Modifies the fds array! (sorts it) */
115 for (i = start; i < (int) n_fds; i++) {
118 /* Already at right index? */
122 if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
128 /* Hmm, the fd we wanted isn't free? Then
129 * let's remember that and try again from here*/
130 if (nfd != i+3 && restart_from < 0)
134 if (restart_from < 0)
137 start = restart_from;
143 static int flags_fds(const int fds[], unsigned n_fds, bool nonblock) {
152 /* Drops/Sets O_NONBLOCK and FD_CLOEXEC from the file flags */
154 for (i = 0; i < n_fds; i++) {
156 if ((r = fd_nonblock(fds[i], nonblock)) < 0)
159 /* We unconditionally drop FD_CLOEXEC from the fds,
160 * since after all we want to pass these fds to our
163 if ((r = fd_cloexec(fds[i], false)) < 0)
170 _pure_ static const char *tty_path(const ExecContext *context) {
173 if (context->tty_path)
174 return context->tty_path;
176 return "/dev/console";
179 static void exec_context_tty_reset(const ExecContext *context) {
182 if (context->tty_vhangup)
183 terminal_vhangup(tty_path(context));
185 if (context->tty_reset)
186 reset_terminal(tty_path(context));
188 if (context->tty_vt_disallocate && context->tty_path)
189 vt_disallocate(context->tty_path);
192 static bool is_terminal_output(ExecOutput o) {
194 o == EXEC_OUTPUT_TTY ||
195 o == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
196 o == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
197 o == EXEC_OUTPUT_JOURNAL_AND_CONSOLE;
200 static int open_null_as(int flags, int nfd) {
205 fd = open("/dev/null", flags|O_NOCTTY);
210 r = dup2(fd, nfd) < 0 ? -errno : nfd;
218 static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
220 union sockaddr_union sa = {
221 .un.sun_family = AF_UNIX,
222 .un.sun_path = "/run/systemd/journal/stdout",
226 assert(output < _EXEC_OUTPUT_MAX);
230 fd = socket(AF_UNIX, SOCK_STREAM, 0);
234 r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
240 if (shutdown(fd, SHUT_RD) < 0) {
245 fd_inc_sndbuf(fd, SNDBUF_SIZE);
255 context->syslog_identifier ? context->syslog_identifier : ident,
257 context->syslog_priority,
258 !!context->syslog_level_prefix,
259 output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
260 output == EXEC_OUTPUT_KMSG || output == EXEC_OUTPUT_KMSG_AND_CONSOLE,
261 is_terminal_output(output));
264 r = dup2(fd, nfd) < 0 ? -errno : nfd;
271 static int open_terminal_as(const char *path, mode_t mode, int nfd) {
277 if ((fd = open_terminal(path, mode | O_NOCTTY)) < 0)
281 r = dup2(fd, nfd) < 0 ? -errno : nfd;
289 static bool is_terminal_input(ExecInput i) {
291 i == EXEC_INPUT_TTY ||
292 i == EXEC_INPUT_TTY_FORCE ||
293 i == EXEC_INPUT_TTY_FAIL;
296 static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) {
298 if (is_terminal_input(std_input) && !apply_tty_stdin)
299 return EXEC_INPUT_NULL;
301 if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0)
302 return EXEC_INPUT_NULL;
307 static int fixup_output(ExecOutput std_output, int socket_fd) {
309 if (std_output == EXEC_OUTPUT_SOCKET && socket_fd < 0)
310 return EXEC_OUTPUT_INHERIT;
315 static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) {
320 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
324 case EXEC_INPUT_NULL:
325 return open_null_as(O_RDONLY, STDIN_FILENO);
328 case EXEC_INPUT_TTY_FORCE:
329 case EXEC_INPUT_TTY_FAIL: {
332 fd = acquire_terminal(tty_path(context),
333 i == EXEC_INPUT_TTY_FAIL,
334 i == EXEC_INPUT_TTY_FORCE,
340 if (fd != STDIN_FILENO) {
341 r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
349 case EXEC_INPUT_SOCKET:
350 return dup2(socket_fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
353 assert_not_reached("Unknown input type");
357 static int setup_output(const ExecContext *context, int fileno, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
365 i = fixup_input(context->std_input, socket_fd, apply_tty_stdin);
366 o = fixup_output(context->std_output, socket_fd);
368 if (fileno == STDERR_FILENO) {
370 e = fixup_output(context->std_error, socket_fd);
372 /* This expects the input and output are already set up */
374 /* Don't change the stderr file descriptor if we inherit all
375 * the way and are not on a tty */
376 if (e == EXEC_OUTPUT_INHERIT &&
377 o == EXEC_OUTPUT_INHERIT &&
378 i == EXEC_INPUT_NULL &&
379 !is_terminal_input(context->std_input) &&
383 /* Duplicate from stdout if possible */
384 if (e == o || e == EXEC_OUTPUT_INHERIT)
385 return dup2(STDOUT_FILENO, fileno) < 0 ? -errno : fileno;
389 } else if (o == EXEC_OUTPUT_INHERIT) {
390 /* If input got downgraded, inherit the original value */
391 if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input))
392 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
394 /* If the input is connected to anything that's not a /dev/null, inherit that... */
395 if (i != EXEC_INPUT_NULL)
396 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
398 /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */
402 /* We need to open /dev/null here anew, to get the right access mode. */
403 return open_null_as(O_WRONLY, fileno);
408 case EXEC_OUTPUT_NULL:
409 return open_null_as(O_WRONLY, fileno);
411 case EXEC_OUTPUT_TTY:
412 if (is_terminal_input(i))
413 return dup2(STDIN_FILENO, fileno) < 0 ? -errno : fileno;
415 /* We don't reset the terminal if this is just about output */
416 return open_terminal_as(tty_path(context), O_WRONLY, fileno);
418 case EXEC_OUTPUT_SYSLOG:
419 case EXEC_OUTPUT_SYSLOG_AND_CONSOLE:
420 case EXEC_OUTPUT_KMSG:
421 case EXEC_OUTPUT_KMSG_AND_CONSOLE:
422 case EXEC_OUTPUT_JOURNAL:
423 case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
424 r = connect_logger_as(context, o, ident, unit_id, fileno);
426 log_struct_unit(LOG_CRIT, unit_id,
427 "MESSAGE=Failed to connect std%s of %s to the journal socket: %s",
428 fileno == STDOUT_FILENO ? "out" : "err",
429 unit_id, strerror(-r),
432 r = open_null_as(O_WRONLY, fileno);
436 case EXEC_OUTPUT_SOCKET:
437 assert(socket_fd >= 0);
438 return dup2(socket_fd, fileno) < 0 ? -errno : fileno;
441 assert_not_reached("Unknown error type");
445 static int chown_terminal(int fd, uid_t uid) {
450 /* This might fail. What matters are the results. */
451 (void) fchown(fd, uid, -1);
452 (void) fchmod(fd, TTY_MODE);
454 if (fstat(fd, &st) < 0)
457 if (st.st_uid != uid || (st.st_mode & 0777) != TTY_MODE)
463 static int setup_confirm_stdio(int *_saved_stdin,
464 int *_saved_stdout) {
465 int fd = -1, saved_stdin, saved_stdout = -1, r;
467 assert(_saved_stdin);
468 assert(_saved_stdout);
470 saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
474 saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
475 if (saved_stdout < 0) {
480 fd = acquire_terminal(
485 DEFAULT_CONFIRM_USEC);
491 r = chown_terminal(fd, getuid());
495 if (dup2(fd, STDIN_FILENO) < 0) {
500 if (dup2(fd, STDOUT_FILENO) < 0) {
508 *_saved_stdin = saved_stdin;
509 *_saved_stdout = saved_stdout;
514 safe_close(saved_stdout);
515 safe_close(saved_stdin);
521 _printf_(1, 2) static int write_confirm_message(const char *format, ...) {
522 _cleanup_close_ int fd = -1;
527 fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
531 va_start(ap, format);
532 vdprintf(fd, format, ap);
538 static int restore_confirm_stdio(int *saved_stdin,
544 assert(saved_stdout);
548 if (*saved_stdin >= 0)
549 if (dup2(*saved_stdin, STDIN_FILENO) < 0)
552 if (*saved_stdout >= 0)
553 if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
556 safe_close(*saved_stdin);
557 safe_close(*saved_stdout);
562 static int ask_for_confirmation(char *response, char **argv) {
563 int saved_stdout = -1, saved_stdin = -1, r;
564 _cleanup_free_ char *line = NULL;
566 r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
570 line = exec_command_line(argv);
574 r = ask_char(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
576 restore_confirm_stdio(&saved_stdin, &saved_stdout);
581 static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
582 bool keep_groups = false;
587 /* Lookup and set GID and supplementary group list. Here too
588 * we avoid NSS lookups for gid=0. */
590 if (context->group || username) {
592 if (context->group) {
593 const char *g = context->group;
595 if ((r = get_group_creds(&g, &gid)) < 0)
599 /* First step, initialize groups from /etc/groups */
600 if (username && gid != 0) {
601 if (initgroups(username, gid) < 0)
607 /* Second step, set our gids */
608 if (setresgid(gid, gid, gid) < 0)
612 if (context->supplementary_groups) {
617 /* Final step, initialize any manually set supplementary groups */
618 assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0);
620 if (!(gids = new(gid_t, ngroups_max)))
624 if ((k = getgroups(ngroups_max, gids)) < 0) {
631 STRV_FOREACH(i, context->supplementary_groups) {
634 if (k >= ngroups_max) {
640 r = get_group_creds(&g, gids+k);
649 if (setgroups(k, gids) < 0) {
660 static int enforce_user(const ExecContext *context, uid_t uid) {
663 /* Sets (but doesn't lookup) the uid and make sure we keep the
664 * capabilities while doing so. */
666 if (context->capabilities) {
667 _cleanup_cap_free_ cap_t d = NULL;
668 static const cap_value_t bits[] = {
669 CAP_SETUID, /* Necessary so that we can run setresuid() below */
670 CAP_SETPCAP /* Necessary so that we can set PR_SET_SECUREBITS later on */
673 /* First step: If we need to keep capabilities but
674 * drop privileges we need to make sure we keep our
675 * caps, while we drop privileges. */
677 int sb = context->secure_bits | 1<<SECURE_KEEP_CAPS;
679 if (prctl(PR_GET_SECUREBITS) != sb)
680 if (prctl(PR_SET_SECUREBITS, sb) < 0)
684 /* Second step: set the capabilities. This will reduce
685 * the capabilities to the minimum we need. */
687 d = cap_dup(context->capabilities);
691 if (cap_set_flag(d, CAP_EFFECTIVE, ELEMENTSOF(bits), bits, CAP_SET) < 0 ||
692 cap_set_flag(d, CAP_PERMITTED, ELEMENTSOF(bits), bits, CAP_SET) < 0)
695 if (cap_set_proc(d) < 0)
699 /* Third step: actually set the uids */
700 if (setresuid(uid, uid, uid) < 0)
703 /* At this point we should have all necessary capabilities but
704 are otherwise a normal user. However, the caps might got
705 corrupted due to the setresuid() so we need clean them up
706 later. This is done outside of this call. */
713 static int null_conv(
715 const struct pam_message **msg,
716 struct pam_response **resp,
719 /* We don't support conversations */
724 static int setup_pam(
730 int fds[], unsigned n_fds) {
732 static const struct pam_conv conv = {
737 pam_handle_t *handle = NULL;
739 int pam_code = PAM_SUCCESS;
742 bool close_session = false;
743 pid_t pam_pid = 0, parent_pid;
750 /* We set up PAM in the parent process, then fork. The child
751 * will then stay around until killed via PR_GET_PDEATHSIG or
752 * systemd via the cgroup logic. It will then remove the PAM
753 * session again. The parent process will exec() the actual
754 * daemon. We do things this way to ensure that the main PID
755 * of the daemon is the one we initially fork()ed. */
757 if (log_get_max_level() < LOG_PRI(LOG_DEBUG))
760 pam_code = pam_start(name, user, &conv, &handle);
761 if (pam_code != PAM_SUCCESS) {
767 pam_code = pam_set_item(handle, PAM_TTY, tty);
768 if (pam_code != PAM_SUCCESS)
772 pam_code = pam_acct_mgmt(handle, flags);
773 if (pam_code != PAM_SUCCESS)
776 pam_code = pam_open_session(handle, flags);
777 if (pam_code != PAM_SUCCESS)
780 close_session = true;
782 e = pam_getenvlist(handle);
784 pam_code = PAM_BUF_ERR;
788 /* Block SIGTERM, so that we know that it won't get lost in
790 if (sigemptyset(&ss) < 0 ||
791 sigaddset(&ss, SIGTERM) < 0 ||
792 sigprocmask(SIG_BLOCK, &ss, &old_ss) < 0)
795 parent_pid = getpid();
805 /* The child's job is to reset the PAM session on
808 /* This string must fit in 10 chars (i.e. the length
809 * of "/sbin/init"), to look pretty in /bin/ps */
810 rename_process("(sd-pam)");
812 /* Make sure we don't keep open the passed fds in this
813 child. We assume that otherwise only those fds are
814 open here that have been opened by PAM. */
815 close_many(fds, n_fds);
817 /* Drop privileges - we don't need any to pam_close_session
818 * and this will make PR_SET_PDEATHSIG work in most cases.
819 * If this fails, ignore the error - but expect sd-pam threads
820 * to fail to exit normally */
821 if (setresuid(uid, uid, uid) < 0)
822 log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
824 /* Wait until our parent died. This will only work if
825 * the above setresuid() succeeds, otherwise the kernel
826 * will not allow unprivileged parents kill their privileged
827 * children this way. We rely on the control groups kill logic
828 * to do the rest for us. */
829 if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
832 /* Check if our parent process might already have
834 if (getppid() == parent_pid) {
836 if (sigwait(&ss, &sig) < 0) {
843 assert(sig == SIGTERM);
848 /* If our parent died we'll end the session */
849 if (getppid() != parent_pid) {
850 pam_code = pam_close_session(handle, flags);
851 if (pam_code != PAM_SUCCESS)
858 pam_end(handle, pam_code | flags);
862 /* If the child was forked off successfully it will do all the
863 * cleanups, so forget about the handle here. */
866 /* Unblock SIGTERM again in the parent */
867 if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0)
870 /* We close the log explicitly here, since the PAM modules
871 * might have opened it, but we don't want this fd around. */
880 if (pam_code != PAM_SUCCESS) {
881 log_error("PAM failed: %s", pam_strerror(handle, pam_code));
882 err = -EPERM; /* PAM errors do not map to errno */
884 log_error("PAM failed: %m");
890 pam_code = pam_close_session(handle, flags);
892 pam_end(handle, pam_code | flags);
900 kill(pam_pid, SIGTERM);
901 kill(pam_pid, SIGCONT);
908 static void rename_process_from_path(const char *path) {
909 char process_name[11];
913 /* This resulting string must fit in 10 chars (i.e. the length
914 * of "/sbin/init") to look pretty in /bin/ps */
918 rename_process("(...)");
924 /* The end of the process name is usually more
925 * interesting, since the first bit might just be
931 process_name[0] = '(';
932 memcpy(process_name+1, p, l);
933 process_name[1+l] = ')';
934 process_name[1+l+1] = 0;
936 rename_process(process_name);
941 static int apply_seccomp(ExecContext *c) {
942 uint32_t negative_action, action;
943 scmp_filter_ctx *seccomp;
950 negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
952 seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
956 if (c->syscall_archs) {
958 SET_FOREACH(id, c->syscall_archs, i) {
959 r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
967 r = seccomp_add_secondary_archs(seccomp);
972 action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
973 SET_FOREACH(id, c->syscall_filter, i) {
974 r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
979 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
983 r = seccomp_load(seccomp);
986 seccomp_release(seccomp);
990 static int apply_address_families(ExecContext *c) {
991 scmp_filter_ctx *seccomp;
997 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1001 r = seccomp_add_secondary_archs(seccomp);
1005 if (c->address_families_whitelist) {
1006 int af, first = 0, last = 0;
1009 /* If this is a whitelist, we first block the address
1010 * families that are out of range and then everything
1011 * that is not in the set. First, we find the lowest
1012 * and highest address family in the set. */
1014 SET_FOREACH(afp, c->address_families, i) {
1015 af = PTR_TO_INT(afp);
1017 if (af <= 0 || af >= af_max())
1020 if (first == 0 || af < first)
1023 if (last == 0 || af > last)
1027 assert((first == 0) == (last == 0));
1031 /* No entries in the valid range, block everything */
1032 r = seccomp_rule_add(
1034 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1042 /* Block everything below the first entry */
1043 r = seccomp_rule_add(
1045 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1048 SCMP_A0(SCMP_CMP_LT, first));
1052 /* Block everything above the last entry */
1053 r = seccomp_rule_add(
1055 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1058 SCMP_A0(SCMP_CMP_GT, last));
1062 /* Block everything between the first and last
1064 for (af = 1; af < af_max(); af++) {
1066 if (set_contains(c->address_families, INT_TO_PTR(af)))
1069 r = seccomp_rule_add(
1071 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1074 SCMP_A0(SCMP_CMP_EQ, af));
1083 /* If this is a blacklist, then generate one rule for
1084 * each address family that are then combined in OR
1087 SET_FOREACH(af, c->address_families, i) {
1089 r = seccomp_rule_add(
1091 SCMP_ACT_ERRNO(EPROTONOSUPPORT),
1094 SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
1100 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1104 r = seccomp_load(seccomp);
1107 seccomp_release(seccomp);
1113 static void do_idle_pipe_dance(int idle_pipe[4]) {
1117 safe_close(idle_pipe[1]);
1118 safe_close(idle_pipe[2]);
1120 if (idle_pipe[0] >= 0) {
1123 r = fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
1125 if (idle_pipe[3] >= 0 && r == 0 /* timeout */) {
1126 /* Signal systemd that we are bored and want to continue. */
1127 write(idle_pipe[3], "x", 1);
1129 /* Wait for systemd to react to the signal above. */
1130 fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
1133 safe_close(idle_pipe[0]);
1137 safe_close(idle_pipe[3]);
1140 static int build_environment(
1141 const ExecContext *c,
1143 usec_t watchdog_usec,
1145 const char *username,
1149 _cleanup_strv_free_ char **our_env = NULL;
1156 our_env = new0(char*, 10);
1161 if (asprintf(&x, "LISTEN_PID="PID_FMT, getpid()) < 0)
1163 our_env[n_env++] = x;
1165 if (asprintf(&x, "LISTEN_FDS=%u", n_fds) < 0)
1167 our_env[n_env++] = x;
1170 if (watchdog_usec > 0) {
1171 if (asprintf(&x, "WATCHDOG_PID="PID_FMT, getpid()) < 0)
1173 our_env[n_env++] = x;
1175 if (asprintf(&x, "WATCHDOG_USEC="USEC_FMT, watchdog_usec) < 0)
1177 our_env[n_env++] = x;
1181 x = strappend("HOME=", home);
1184 our_env[n_env++] = x;
1188 x = strappend("LOGNAME=", username);
1191 our_env[n_env++] = x;
1193 x = strappend("USER=", username);
1196 our_env[n_env++] = x;
1200 x = strappend("SHELL=", shell);
1203 our_env[n_env++] = x;
1206 if (is_terminal_input(c->std_input) ||
1207 c->std_output == EXEC_OUTPUT_TTY ||
1208 c->std_error == EXEC_OUTPUT_TTY ||
1211 x = strdup(default_term_for_tty(tty_path(c)));
1214 our_env[n_env++] = x;
1217 our_env[n_env++] = NULL;
1218 assert(n_env <= 10);
1226 static int exec_child(ExecCommand *command,
1227 const ExecContext *context,
1228 const ExecParameters *params,
1229 ExecRuntime *runtime,
1232 int *fds, unsigned n_fds,
1236 _cleanup_strv_free_ char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
1237 const char *username = NULL, *home = NULL, *shell = NULL;
1238 unsigned n_dont_close = 0;
1239 int dont_close[n_fds + 3];
1240 uid_t uid = (uid_t) -1;
1241 gid_t gid = (gid_t) -1;
1249 rename_process_from_path(command->path);
1251 /* We reset exactly these signals, since they are the
1252 * only ones we set to SIG_IGN in the main daemon. All
1253 * others we leave untouched because we set them to
1254 * SIG_DFL or a valid handler initially, both of which
1255 * will be demoted to SIG_DFL. */
1256 default_signals(SIGNALS_CRASH_HANDLER,
1257 SIGNALS_IGNORE, -1);
1259 if (context->ignore_sigpipe)
1260 ignore_signals(SIGPIPE, -1);
1262 err = reset_signal_mask();
1264 *error = EXIT_SIGNAL_MASK;
1268 if (params->idle_pipe)
1269 do_idle_pipe_dance(params->idle_pipe);
1271 /* Close sockets very early to make sure we don't
1272 * block init reexecution because it cannot bind its
1277 dont_close[n_dont_close++] = socket_fd;
1279 memcpy(dont_close + n_dont_close, fds, sizeof(int) * n_fds);
1280 n_dont_close += n_fds;
1283 if (runtime->netns_storage_socket[0] >= 0)
1284 dont_close[n_dont_close++] = runtime->netns_storage_socket[0];
1285 if (runtime->netns_storage_socket[1] >= 0)
1286 dont_close[n_dont_close++] = runtime->netns_storage_socket[1];
1289 err = close_all_fds(dont_close, n_dont_close);
1295 if (!context->same_pgrp)
1297 *error = EXIT_SETSID;
1301 exec_context_tty_reset(context);
1303 if (params->confirm_spawn) {
1306 err = ask_for_confirmation(&response, argv);
1307 if (err == -ETIMEDOUT)
1308 write_confirm_message("Confirmation question timed out, assuming positive response.\n");
1310 write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
1311 else if (response == 's') {
1312 write_confirm_message("Skipping execution.\n");
1313 *error = EXIT_CONFIRM;
1315 } else if (response == 'n') {
1316 write_confirm_message("Failing execution.\n");
1322 /* If a socket is connected to STDIN/STDOUT/STDERR, we
1323 * must sure to drop O_NONBLOCK */
1325 fd_nonblock(socket_fd, false);
1327 err = setup_input(context, socket_fd, params->apply_tty_stdin);
1329 *error = EXIT_STDIN;
1333 err = setup_output(context, STDOUT_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1335 *error = EXIT_STDOUT;
1339 err = setup_output(context, STDERR_FILENO, socket_fd, basename(command->path), params->unit_id, params->apply_tty_stdin);
1341 *error = EXIT_STDERR;
1345 if (params->cgroup_path) {
1346 err = cg_attach_everywhere(params->cgroup_supported, params->cgroup_path, 0);
1348 *error = EXIT_CGROUP;
1353 if (context->oom_score_adjust_set) {
1356 snprintf(t, sizeof(t), "%i", context->oom_score_adjust);
1359 if (write_string_file("/proc/self/oom_score_adj", t) < 0) {
1360 *error = EXIT_OOM_ADJUST;
1365 if (context->nice_set)
1366 if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) {
1371 if (context->cpu_sched_set) {
1372 struct sched_param param = {
1373 .sched_priority = context->cpu_sched_priority,
1376 err = sched_setscheduler(0,
1377 context->cpu_sched_policy |
1378 (context->cpu_sched_reset_on_fork ?
1379 SCHED_RESET_ON_FORK : 0),
1382 *error = EXIT_SETSCHEDULER;
1387 if (context->cpuset)
1388 if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) {
1389 *error = EXIT_CPUAFFINITY;
1393 if (context->ioprio_set)
1394 if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) {
1395 *error = EXIT_IOPRIO;
1399 if (context->timer_slack_nsec != NSEC_INFINITY)
1400 if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
1401 *error = EXIT_TIMERSLACK;
1405 if (context->personality != 0xffffffffUL)
1406 if (personality(context->personality) < 0) {
1407 *error = EXIT_PERSONALITY;
1411 if (context->utmp_id)
1412 utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
1414 if (context->user) {
1415 username = context->user;
1416 err = get_user_creds(&username, &uid, &gid, &home, &shell);
1422 if (is_terminal_input(context->std_input)) {
1423 err = chown_terminal(STDIN_FILENO, uid);
1425 *error = EXIT_STDIN;
1432 if (params->cgroup_path && context->user && context->pam_name) {
1433 err = cg_set_task_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0644, uid, gid);
1435 *error = EXIT_CGROUP;
1440 err = cg_set_group_access(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, 0755, uid, gid);
1442 *error = EXIT_CGROUP;
1448 if (!strv_isempty(context->runtime_directory) && params->runtime_prefix) {
1451 STRV_FOREACH(rt, context->runtime_directory) {
1452 _cleanup_free_ char *p;
1454 p = strjoin(params->runtime_prefix, "/", *rt, NULL);
1456 *error = EXIT_RUNTIME_DIRECTORY;
1460 err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
1462 *error = EXIT_RUNTIME_DIRECTORY;
1468 if (params->apply_permissions) {
1469 err = enforce_groups(context, username, gid);
1471 *error = EXIT_GROUP;
1476 umask(context->umask);
1479 if (params->apply_permissions && context->pam_name && username) {
1480 err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
1488 if (context->private_network && runtime && runtime->netns_storage_socket[0] >= 0) {
1489 err = setup_netns(runtime->netns_storage_socket);
1491 *error = EXIT_NETWORK;
1496 if (!strv_isempty(context->read_write_dirs) ||
1497 !strv_isempty(context->read_only_dirs) ||
1498 !strv_isempty(context->inaccessible_dirs) ||
1499 context->mount_flags != 0 ||
1500 (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
1501 context->private_devices ||
1502 context->protect_system != PROTECT_SYSTEM_NO ||
1503 context->protect_home != PROTECT_HOME_NO) {
1505 char *tmp = NULL, *var = NULL;
1507 /* The runtime struct only contains the parent
1508 * of the private /tmp, which is
1509 * non-accessible to world users. Inside of it
1510 * there's a /tmp that is sticky, and that's
1511 * the one we want to use here. */
1513 if (context->private_tmp && runtime) {
1514 if (runtime->tmp_dir)
1515 tmp = strappenda(runtime->tmp_dir, "/tmp");
1516 if (runtime->var_tmp_dir)
1517 var = strappenda(runtime->var_tmp_dir, "/tmp");
1520 err = setup_namespace(
1521 context->read_write_dirs,
1522 context->read_only_dirs,
1523 context->inaccessible_dirs,
1527 context->private_devices,
1528 context->protect_home,
1529 context->protect_system,
1530 context->mount_flags);
1532 *error = EXIT_NAMESPACE;
1537 if (params->apply_chroot) {
1538 if (context->root_directory)
1539 if (chroot(context->root_directory) < 0) {
1540 *error = EXIT_CHROOT;
1544 if (chdir(context->working_directory ? context->working_directory : "/") < 0) {
1545 *error = EXIT_CHDIR;
1549 _cleanup_free_ char *d = NULL;
1551 if (asprintf(&d, "%s/%s",
1552 context->root_directory ? context->root_directory : "",
1553 context->working_directory ? context->working_directory : "") < 0) {
1554 *error = EXIT_MEMORY;
1559 *error = EXIT_CHDIR;
1564 /* We repeat the fd closing here, to make sure that
1565 * nothing is leaked from the PAM modules. Note that
1566 * we are more aggressive this time since socket_fd
1567 * and the netns fds we don#t need anymore. */
1568 err = close_all_fds(fds, n_fds);
1570 err = shift_fds(fds, n_fds);
1572 err = flags_fds(fds, n_fds, context->non_blocking);
1578 if (params->apply_permissions) {
1580 for (i = 0; i < _RLIMIT_MAX; i++) {
1581 if (!context->rlimit[i])
1584 if (setrlimit_closest(i, context->rlimit[i]) < 0) {
1585 *error = EXIT_LIMITS;
1590 if (context->capability_bounding_set_drop) {
1591 err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
1593 *error = EXIT_CAPABILITIES;
1598 if (context->user) {
1599 err = enforce_user(context, uid);
1606 /* PR_GET_SECUREBITS is not privileged, while
1607 * PR_SET_SECUREBITS is. So to suppress
1608 * potential EPERMs we'll try not to call
1609 * PR_SET_SECUREBITS unless necessary. */
1610 if (prctl(PR_GET_SECUREBITS) != context->secure_bits)
1611 if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) {
1612 *error = EXIT_SECUREBITS;
1616 if (context->capabilities)
1617 if (cap_set_proc(context->capabilities) < 0) {
1618 *error = EXIT_CAPABILITIES;
1622 if (context->no_new_privileges)
1623 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
1624 *error = EXIT_NO_NEW_PRIVILEGES;
1629 if (context->address_families_whitelist ||
1630 !set_isempty(context->address_families)) {
1631 err = apply_address_families(context);
1633 *error = EXIT_ADDRESS_FAMILIES;
1638 if (context->syscall_whitelist ||
1639 !set_isempty(context->syscall_filter) ||
1640 !set_isempty(context->syscall_archs)) {
1641 err = apply_seccomp(context);
1643 *error = EXIT_SECCOMP;
1650 if (context->selinux_context && use_selinux()) {
1651 err = setexeccon(context->selinux_context);
1652 if (err < 0 && !context->selinux_context_ignore) {
1653 *error = EXIT_SELINUX_CONTEXT;
1659 #ifdef HAVE_APPARMOR
1660 if (context->apparmor_profile && use_apparmor()) {
1661 err = aa_change_onexec(context->apparmor_profile);
1662 if (err < 0 && !context->apparmor_profile_ignore) {
1663 *error = EXIT_APPARMOR_PROFILE;
1670 err = build_environment(context, n_fds, params->watchdog_usec, home, username, shell, &our_env);
1672 *error = EXIT_MEMORY;
1676 final_env = strv_env_merge(5,
1677 params->environment,
1679 context->environment,
1684 *error = EXIT_MEMORY;
1688 final_argv = replace_env_argv(argv, final_env);
1690 *error = EXIT_MEMORY;
1694 final_env = strv_env_clean(final_env);
1696 if (_unlikely_(log_get_max_level() >= LOG_PRI(LOG_DEBUG))) {
1697 _cleanup_free_ char *line;
1699 line = exec_command_line(final_argv);
1702 log_struct_unit(LOG_DEBUG,
1704 "EXECUTABLE=%s", command->path,
1705 "MESSAGE=Executing: %s", line,
1710 execve(command->path, final_argv, final_env);
1715 int exec_spawn(ExecCommand *command,
1716 const ExecContext *context,
1717 const ExecParameters *params,
1718 ExecRuntime *runtime,
1721 _cleanup_strv_free_ char **files_env = NULL;
1722 int *fds = NULL; unsigned n_fds = 0;
1732 assert(params->fds || params->n_fds <= 0);
1734 if (context->std_input == EXEC_INPUT_SOCKET ||
1735 context->std_output == EXEC_OUTPUT_SOCKET ||
1736 context->std_error == EXEC_OUTPUT_SOCKET) {
1738 if (params->n_fds != 1)
1741 socket_fd = params->fds[0];
1745 n_fds = params->n_fds;
1748 err = exec_context_load_environment(context, &files_env);
1750 log_struct_unit(LOG_ERR,
1752 "MESSAGE=Failed to load environment files: %s", strerror(-err),
1758 argv = params->argv ?: command->argv;
1760 line = exec_command_line(argv);
1764 log_struct_unit(LOG_DEBUG,
1766 "EXECUTABLE=%s", command->path,
1767 "MESSAGE=About to execute: %s", line,
1778 err = exec_child(command,
1789 log_struct(LOG_ERR, MESSAGE_ID(SD_MESSAGE_SPAWN_FAILED),
1790 "EXECUTABLE=%s", command->path,
1791 "MESSAGE=Failed at step %s spawning %s: %s",
1792 exit_status_to_string(r, EXIT_STATUS_SYSTEMD),
1793 command->path, strerror(-err),
1802 log_struct_unit(LOG_DEBUG,
1804 "MESSAGE=Forked %s as "PID_FMT,
1808 /* We add the new process to the cgroup both in the child (so
1809 * that we can be sure that no user code is ever executed
1810 * outside of the cgroup) and in the parent (so that we can be
1811 * sure that when we kill the cgroup the process will be
1813 if (params->cgroup_path)
1814 cg_attach(SYSTEMD_CGROUP_CONTROLLER, params->cgroup_path, pid);
1816 exec_status_start(&command->exec_status, pid);
1822 void exec_context_init(ExecContext *c) {
1826 c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0);
1827 c->cpu_sched_policy = SCHED_OTHER;
1828 c->syslog_priority = LOG_DAEMON|LOG_INFO;
1829 c->syslog_level_prefix = true;
1830 c->ignore_sigpipe = true;
1831 c->timer_slack_nsec = NSEC_INFINITY;
1832 c->personality = 0xffffffffUL;
1833 c->runtime_directory_mode = 0755;
1836 void exec_context_done(ExecContext *c) {
1841 strv_free(c->environment);
1842 c->environment = NULL;
1844 strv_free(c->environment_files);
1845 c->environment_files = NULL;
1847 for (l = 0; l < ELEMENTSOF(c->rlimit); l++) {
1849 c->rlimit[l] = NULL;
1852 free(c->working_directory);
1853 c->working_directory = NULL;
1854 free(c->root_directory);
1855 c->root_directory = NULL;
1860 free(c->syslog_identifier);
1861 c->syslog_identifier = NULL;
1869 strv_free(c->supplementary_groups);
1870 c->supplementary_groups = NULL;
1875 if (c->capabilities) {
1876 cap_free(c->capabilities);
1877 c->capabilities = NULL;
1880 strv_free(c->read_only_dirs);
1881 c->read_only_dirs = NULL;
1883 strv_free(c->read_write_dirs);
1884 c->read_write_dirs = NULL;
1886 strv_free(c->inaccessible_dirs);
1887 c->inaccessible_dirs = NULL;
1890 CPU_FREE(c->cpuset);
1895 free(c->selinux_context);
1896 c->selinux_context = NULL;
1898 free(c->apparmor_profile);
1899 c->apparmor_profile = NULL;
1901 set_free(c->syscall_filter);
1902 c->syscall_filter = NULL;
1904 set_free(c->syscall_archs);
1905 c->syscall_archs = NULL;
1907 set_free(c->address_families);
1908 c->address_families = NULL;
1910 strv_free(c->runtime_directory);
1911 c->runtime_directory = NULL;
1913 bus_endpoint_free(c->bus_endpoint);
1914 c->bus_endpoint = NULL;
1917 int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
1922 if (!runtime_prefix)
1925 STRV_FOREACH(i, c->runtime_directory) {
1926 _cleanup_free_ char *p;
1928 p = strjoin(runtime_prefix, "/", *i, NULL);
1932 /* We execute this synchronously, since we need to be
1933 * sure this is gone when we start the service
1935 rm_rf_dangerous(p, false, true, false);
1941 void exec_command_done(ExecCommand *c) {
1951 void exec_command_done_array(ExecCommand *c, unsigned n) {
1954 for (i = 0; i < n; i++)
1955 exec_command_done(c+i);
1958 void exec_command_free_list(ExecCommand *c) {
1962 LIST_REMOVE(command, c, i);
1963 exec_command_done(i);
1968 void exec_command_free_array(ExecCommand **c, unsigned n) {
1971 for (i = 0; i < n; i++) {
1972 exec_command_free_list(c[i]);
1977 int exec_context_load_environment(const ExecContext *c, char ***l) {
1978 char **i, **r = NULL;
1983 STRV_FOREACH(i, c->environment_files) {
1986 bool ignore = false;
1988 _cleanup_globfree_ glob_t pglob = {};
1998 if (!path_is_absolute(fn)) {
2006 /* Filename supports globbing, take all matching files */
2008 if (glob(fn, 0, NULL, &pglob) != 0) {
2013 return errno ? -errno : -EINVAL;
2015 count = pglob.gl_pathc;
2023 for (n = 0; n < count; n++) {
2024 k = load_env_file(NULL, pglob.gl_pathv[n], NULL, &p);
2032 /* Log invalid environment variables with filename */
2034 p = strv_env_clean_log(p, pglob.gl_pathv[n]);
2041 m = strv_env_merge(2, r, p);
2057 static bool tty_may_match_dev_console(const char *tty) {
2058 _cleanup_free_ char *active = NULL;
2061 if (startswith(tty, "/dev/"))
2064 /* trivial identity? */
2065 if (streq(tty, "console"))
2068 console = resolve_dev_console(&active);
2069 /* if we could not resolve, assume it may */
2073 /* "tty0" means the active VC, so it may be the same sometimes */
2074 return streq(console, tty) || (streq(console, "tty0") && tty_is_vc(tty));
2077 bool exec_context_may_touch_console(ExecContext *ec) {
2078 return (ec->tty_reset || ec->tty_vhangup || ec->tty_vt_disallocate ||
2079 is_terminal_input(ec->std_input) ||
2080 is_terminal_output(ec->std_output) ||
2081 is_terminal_output(ec->std_error)) &&
2082 tty_may_match_dev_console(tty_path(ec));
2085 static void strv_fprintf(FILE *f, char **l) {
2091 fprintf(f, " %s", *g);
2094 void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
2101 prefix = strempty(prefix);
2105 "%sWorkingDirectory: %s\n"
2106 "%sRootDirectory: %s\n"
2107 "%sNonBlocking: %s\n"
2108 "%sPrivateTmp: %s\n"
2109 "%sPrivateNetwork: %s\n"
2110 "%sPrivateDevices: %s\n"
2111 "%sProtectHome: %s\n"
2112 "%sProtectSystem: %s\n"
2113 "%sIgnoreSIGPIPE: %s\n",
2115 prefix, c->working_directory ? c->working_directory : "/",
2116 prefix, c->root_directory ? c->root_directory : "/",
2117 prefix, yes_no(c->non_blocking),
2118 prefix, yes_no(c->private_tmp),
2119 prefix, yes_no(c->private_network),
2120 prefix, yes_no(c->private_devices),
2121 prefix, protect_home_to_string(c->protect_home),
2122 prefix, protect_system_to_string(c->protect_system),
2123 prefix, yes_no(c->ignore_sigpipe));
2125 STRV_FOREACH(e, c->environment)
2126 fprintf(f, "%sEnvironment: %s\n", prefix, *e);
2128 STRV_FOREACH(e, c->environment_files)
2129 fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e);
2136 if (c->oom_score_adjust_set)
2138 "%sOOMScoreAdjust: %i\n",
2139 prefix, c->oom_score_adjust);
2141 for (i = 0; i < RLIM_NLIMITS; i++)
2143 fprintf(f, "%s%s: "RLIM_FMT"\n",
2144 prefix, rlimit_to_string(i), c->rlimit[i]->rlim_max);
2146 if (c->ioprio_set) {
2147 _cleanup_free_ char *class_str = NULL;
2149 ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
2151 "%sIOSchedulingClass: %s\n"
2152 "%sIOPriority: %i\n",
2153 prefix, strna(class_str),
2154 prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
2157 if (c->cpu_sched_set) {
2158 _cleanup_free_ char *policy_str = NULL;
2160 sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
2162 "%sCPUSchedulingPolicy: %s\n"
2163 "%sCPUSchedulingPriority: %i\n"
2164 "%sCPUSchedulingResetOnFork: %s\n",
2165 prefix, strna(policy_str),
2166 prefix, c->cpu_sched_priority,
2167 prefix, yes_no(c->cpu_sched_reset_on_fork));
2171 fprintf(f, "%sCPUAffinity:", prefix);
2172 for (i = 0; i < c->cpuset_ncpus; i++)
2173 if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset))
2174 fprintf(f, " %u", i);
2178 if (c->timer_slack_nsec != NSEC_INFINITY)
2179 fprintf(f, "%sTimerSlackNSec: "NSEC_FMT "\n", prefix, c->timer_slack_nsec);
2182 "%sStandardInput: %s\n"
2183 "%sStandardOutput: %s\n"
2184 "%sStandardError: %s\n",
2185 prefix, exec_input_to_string(c->std_input),
2186 prefix, exec_output_to_string(c->std_output),
2187 prefix, exec_output_to_string(c->std_error));
2193 "%sTTYVHangup: %s\n"
2194 "%sTTYVTDisallocate: %s\n",
2195 prefix, c->tty_path,
2196 prefix, yes_no(c->tty_reset),
2197 prefix, yes_no(c->tty_vhangup),
2198 prefix, yes_no(c->tty_vt_disallocate));
2200 if (c->std_output == EXEC_OUTPUT_SYSLOG ||
2201 c->std_output == EXEC_OUTPUT_KMSG ||
2202 c->std_output == EXEC_OUTPUT_JOURNAL ||
2203 c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2204 c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2205 c->std_output == EXEC_OUTPUT_JOURNAL_AND_CONSOLE ||
2206 c->std_error == EXEC_OUTPUT_SYSLOG ||
2207 c->std_error == EXEC_OUTPUT_KMSG ||
2208 c->std_error == EXEC_OUTPUT_JOURNAL ||
2209 c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE ||
2210 c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE ||
2211 c->std_error == EXEC_OUTPUT_JOURNAL_AND_CONSOLE) {
2213 _cleanup_free_ char *fac_str = NULL, *lvl_str = NULL;
2215 log_facility_unshifted_to_string_alloc(c->syslog_priority >> 3, &fac_str);
2216 log_level_to_string_alloc(LOG_PRI(c->syslog_priority), &lvl_str);
2219 "%sSyslogFacility: %s\n"
2220 "%sSyslogLevel: %s\n",
2221 prefix, strna(fac_str),
2222 prefix, strna(lvl_str));
2225 if (c->capabilities) {
2226 _cleanup_cap_free_charp_ char *t;
2228 t = cap_to_text(c->capabilities, NULL);
2230 fprintf(f, "%sCapabilities: %s\n", prefix, t);
2234 fprintf(f, "%sSecure Bits:%s%s%s%s%s%s\n",
2236 (c->secure_bits & 1<<SECURE_KEEP_CAPS) ? " keep-caps" : "",
2237 (c->secure_bits & 1<<SECURE_KEEP_CAPS_LOCKED) ? " keep-caps-locked" : "",
2238 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP) ? " no-setuid-fixup" : "",
2239 (c->secure_bits & 1<<SECURE_NO_SETUID_FIXUP_LOCKED) ? " no-setuid-fixup-locked" : "",
2240 (c->secure_bits & 1<<SECURE_NOROOT) ? " noroot" : "",
2241 (c->secure_bits & 1<<SECURE_NOROOT_LOCKED) ? "noroot-locked" : "");
2243 if (c->capability_bounding_set_drop) {
2245 fprintf(f, "%sCapabilityBoundingSet:", prefix);
2247 for (l = 0; l <= cap_last_cap(); l++)
2248 if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) {
2249 _cleanup_cap_free_charp_ char *t;
2253 fprintf(f, " %s", t);
2260 fprintf(f, "%sUser: %s\n", prefix, c->user);
2262 fprintf(f, "%sGroup: %s\n", prefix, c->group);
2264 if (strv_length(c->supplementary_groups) > 0) {
2265 fprintf(f, "%sSupplementaryGroups:", prefix);
2266 strv_fprintf(f, c->supplementary_groups);
2271 fprintf(f, "%sPAMName: %s\n", prefix, c->pam_name);
2273 if (strv_length(c->read_write_dirs) > 0) {
2274 fprintf(f, "%sReadWriteDirs:", prefix);
2275 strv_fprintf(f, c->read_write_dirs);
2279 if (strv_length(c->read_only_dirs) > 0) {
2280 fprintf(f, "%sReadOnlyDirs:", prefix);
2281 strv_fprintf(f, c->read_only_dirs);
2285 if (strv_length(c->inaccessible_dirs) > 0) {
2286 fprintf(f, "%sInaccessibleDirs:", prefix);
2287 strv_fprintf(f, c->inaccessible_dirs);
2293 "%sUtmpIdentifier: %s\n",
2294 prefix, c->utmp_id);
2296 if (c->selinux_context)
2298 "%sSELinuxContext: %s%s\n",
2299 prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
2301 if (c->personality != 0xffffffffUL)
2303 "%sPersonality: %s\n",
2304 prefix, strna(personality_to_string(c->personality)));
2306 if (c->syscall_filter) {
2314 "%sSystemCallFilter: ",
2317 if (!c->syscall_whitelist)
2321 SET_FOREACH(id, c->syscall_filter, j) {
2322 _cleanup_free_ char *name = NULL;
2329 name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
2330 fputs(strna(name), f);
2337 if (c->syscall_archs) {
2344 "%sSystemCallArchitectures:",
2348 SET_FOREACH(id, c->syscall_archs, j)
2349 fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
2354 if (c->syscall_errno != 0)
2356 "%sSystemCallErrorNumber: %s\n",
2357 prefix, strna(errno_to_name(c->syscall_errno)));
2359 if (c->apparmor_profile)
2361 "%sAppArmorProfile: %s%s\n",
2362 prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
2365 void exec_status_start(ExecStatus *s, pid_t pid) {
2370 dual_timestamp_get(&s->start_timestamp);
2373 void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) {
2376 if (s->pid && s->pid != pid)
2380 dual_timestamp_get(&s->exit_timestamp);
2386 if (context->utmp_id)
2387 utmp_put_dead_process(context->utmp_id, pid, code, status);
2389 exec_context_tty_reset(context);
2393 void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) {
2394 char buf[FORMAT_TIMESTAMP_MAX];
2402 prefix = strempty(prefix);
2405 "%sPID: "PID_FMT"\n",
2408 if (s->start_timestamp.realtime > 0)
2410 "%sStart Timestamp: %s\n",
2411 prefix, format_timestamp(buf, sizeof(buf), s->start_timestamp.realtime));
2413 if (s->exit_timestamp.realtime > 0)
2415 "%sExit Timestamp: %s\n"
2417 "%sExit Status: %i\n",
2418 prefix, format_timestamp(buf, sizeof(buf), s->exit_timestamp.realtime),
2419 prefix, sigchld_code_to_string(s->code),
2423 char *exec_command_line(char **argv) {
2431 STRV_FOREACH(a, argv)
2434 if (!(n = new(char, k)))
2438 STRV_FOREACH(a, argv) {
2445 if (strpbrk(*a, WHITESPACE)) {
2456 /* FIXME: this doesn't really handle arguments that have
2457 * spaces and ticks in them */
2462 void exec_command_dump(ExecCommand *c, FILE *f, const char *prefix) {
2463 _cleanup_free_ char *cmd = NULL;
2464 const char *prefix2;
2469 prefix = strempty(prefix);
2470 prefix2 = strappenda(prefix, "\t");
2472 cmd = exec_command_line(c->argv);
2474 "%sCommand Line: %s\n",
2475 prefix, cmd ? cmd : strerror(ENOMEM));
2477 exec_status_dump(&c->exec_status, f, prefix2);
2480 void exec_command_dump_list(ExecCommand *c, FILE *f, const char *prefix) {
2483 prefix = strempty(prefix);
2485 LIST_FOREACH(command, c, c)
2486 exec_command_dump(c, f, prefix);
2489 void exec_command_append_list(ExecCommand **l, ExecCommand *e) {
2496 /* It's kind of important, that we keep the order here */
2497 LIST_FIND_TAIL(command, *l, end);
2498 LIST_INSERT_AFTER(command, *l, end, e);
2503 int exec_command_set(ExecCommand *c, const char *path, ...) {
2511 l = strv_new_ap(path, ap);
2532 static int exec_runtime_allocate(ExecRuntime **rt) {
2537 *rt = new0(ExecRuntime, 1);
2542 (*rt)->netns_storage_socket[0] = (*rt)->netns_storage_socket[1] = -1;
2547 int exec_runtime_make(ExecRuntime **rt, ExecContext *c, const char *id) {
2557 if (!c->private_network && !c->private_tmp)
2560 r = exec_runtime_allocate(rt);
2564 if (c->private_network && (*rt)->netns_storage_socket[0] < 0) {
2565 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, (*rt)->netns_storage_socket) < 0)
2569 if (c->private_tmp && !(*rt)->tmp_dir) {
2570 r = setup_tmp_dirs(id, &(*rt)->tmp_dir, &(*rt)->var_tmp_dir);
2578 ExecRuntime *exec_runtime_ref(ExecRuntime *r) {
2580 assert(r->n_ref > 0);
2586 ExecRuntime *exec_runtime_unref(ExecRuntime *r) {
2591 assert(r->n_ref > 0);
2594 if (r->n_ref <= 0) {
2596 free(r->var_tmp_dir);
2597 safe_close_pair(r->netns_storage_socket);
2604 int exec_runtime_serialize(ExecRuntime *rt, Unit *u, FILE *f, FDSet *fds) {
2613 unit_serialize_item(u, f, "tmp-dir", rt->tmp_dir);
2615 if (rt->var_tmp_dir)
2616 unit_serialize_item(u, f, "var-tmp-dir", rt->var_tmp_dir);
2618 if (rt->netns_storage_socket[0] >= 0) {
2621 copy = fdset_put_dup(fds, rt->netns_storage_socket[0]);
2625 unit_serialize_item_format(u, f, "netns-socket-0", "%i", copy);
2628 if (rt->netns_storage_socket[1] >= 0) {
2631 copy = fdset_put_dup(fds, rt->netns_storage_socket[1]);
2635 unit_serialize_item_format(u, f, "netns-socket-1", "%i", copy);
2641 int exec_runtime_deserialize_item(ExecRuntime **rt, Unit *u, const char *key, const char *value, FDSet *fds) {
2648 if (streq(key, "tmp-dir")) {
2651 r = exec_runtime_allocate(rt);
2655 copy = strdup(value);
2659 free((*rt)->tmp_dir);
2660 (*rt)->tmp_dir = copy;
2662 } else if (streq(key, "var-tmp-dir")) {
2665 r = exec_runtime_allocate(rt);
2669 copy = strdup(value);
2673 free((*rt)->var_tmp_dir);
2674 (*rt)->var_tmp_dir = copy;
2676 } else if (streq(key, "netns-socket-0")) {
2679 r = exec_runtime_allocate(rt);
2683 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2684 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2686 safe_close((*rt)->netns_storage_socket[0]);
2687 (*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
2689 } else if (streq(key, "netns-socket-1")) {
2692 r = exec_runtime_allocate(rt);
2696 if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
2697 log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
2699 safe_close((*rt)->netns_storage_socket[1]);
2700 (*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
2708 static void *remove_tmpdir_thread(void *p) {
2709 _cleanup_free_ char *path = p;
2711 rm_rf_dangerous(path, false, true, false);
2715 void exec_runtime_destroy(ExecRuntime *rt) {
2721 /* If there are multiple users of this, let's leave the stuff around */
2726 log_debug("Spawning thread to nuke %s", rt->tmp_dir);
2728 r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
2730 log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
2737 if (rt->var_tmp_dir) {
2738 log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
2740 r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
2742 log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
2743 free(rt->var_tmp_dir);
2746 rt->var_tmp_dir = NULL;
2749 safe_close_pair(rt->netns_storage_socket);
2752 static const char* const exec_input_table[_EXEC_INPUT_MAX] = {
2753 [EXEC_INPUT_NULL] = "null",
2754 [EXEC_INPUT_TTY] = "tty",
2755 [EXEC_INPUT_TTY_FORCE] = "tty-force",
2756 [EXEC_INPUT_TTY_FAIL] = "tty-fail",
2757 [EXEC_INPUT_SOCKET] = "socket"
2760 DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput);
2762 static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = {
2763 [EXEC_OUTPUT_INHERIT] = "inherit",
2764 [EXEC_OUTPUT_NULL] = "null",
2765 [EXEC_OUTPUT_TTY] = "tty",
2766 [EXEC_OUTPUT_SYSLOG] = "syslog",
2767 [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console",
2768 [EXEC_OUTPUT_KMSG] = "kmsg",
2769 [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console",
2770 [EXEC_OUTPUT_JOURNAL] = "journal",
2771 [EXEC_OUTPUT_JOURNAL_AND_CONSOLE] = "journal+console",
2772 [EXEC_OUTPUT_SOCKET] = "socket"
2775 DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);