X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fexecute.c;h=2039861782f21c91ef95fc35800f9f58e2459395;hp=cf71f232b421635c7705d019c2b3b788bef47f70;hb=9ba353983adc026b75a503c1381f6e5c8062f3e0;hpb=6398320759ce4ed84922bb28f715d3c6c66166c4 diff --git a/src/execute.c b/src/execute.c index cf71f232b..203986178 100644 --- a/src/execute.c +++ b/src/execute.c @@ -1,4 +1,4 @@ -/*-*- Mode: C; c-basic-offset: 8 -*-*/ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ /*** This file is part of systemd. @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef HAVE_PAM #include @@ -51,6 +52,11 @@ #include "cgroup.h" #include "namespace.h" #include "tcpwrap.h" +#include "exit-status.h" +#include "missing.h" +#include "utmp-wtmp.h" +#include "def.h" +#include "loopback-setup.h" /* This assumes there is a 'tty' group */ #define TTY_MODE 0620 @@ -135,6 +141,19 @@ static const char *tty_path(const ExecContext *context) { return "/dev/console"; } +void exec_context_tty_reset(const ExecContext *context) { + assert(context); + + if (context->tty_vhangup) + terminal_vhangup(tty_path(context)); + + if (context->tty_reset) + reset_terminal(tty_path(context)); + + if (context->tty_vt_disallocate && context->tty_path) + vt_disallocate(context->tty_path); +} + static int open_null_as(int flags, int nfd) { int fd, r; @@ -169,9 +188,9 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons zero(sa); sa.sa.sa_family = AF_UNIX; - strncpy(sa.un.sun_path+1, LOGGER_SOCKET, sizeof(sa.un.sun_path)-1); + strncpy(sa.un.sun_path, STDOUT_SYSLOG_BRIDGE_SOCKET, sizeof(sa.un.sun_path)); - if (connect(fd, &sa.sa, sizeof(sa)) < 0) { + if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + sizeof(STDOUT_SYSLOG_BRIDGE_SOCKET) - 1) < 0) { close_nointr_nofail(fd); return -errno; } @@ -194,10 +213,13 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons "%i\n" "%s\n" "%i\n", - output == EXEC_OUTPUT_KMSG ? "kmsg" : "syslog", + output == EXEC_OUTPUT_KMSG ? "kmsg" : + output == EXEC_OUTPUT_KMSG_AND_CONSOLE ? "kmsg+console" : + output == EXEC_OUTPUT_SYSLOG ? "syslog" : + "syslog+console", context->syslog_priority, context->syslog_identifier ? context->syslog_identifier : ident, - !context->syslog_no_prefix); + context->syslog_level_prefix); if (fd != nfd) { r = dup2(fd, nfd) < 0 ? -errno : nfd; @@ -232,7 +254,10 @@ static bool is_terminal_input(ExecInput i) { i == EXEC_INPUT_TTY_FAIL; } -static int fixup_input(ExecInput std_input, int socket_fd) { +static int fixup_input(ExecInput std_input, int socket_fd, bool apply_tty_stdin) { + + if (is_terminal_input(std_input) && !apply_tty_stdin) + return EXEC_INPUT_NULL; if (std_input == EXEC_INPUT_SOCKET && socket_fd < 0) return EXEC_INPUT_NULL; @@ -248,12 +273,12 @@ static int fixup_output(ExecOutput std_output, int socket_fd) { return std_output; } -static int setup_input(const ExecContext *context, int socket_fd) { +static int setup_input(const ExecContext *context, int socket_fd, bool apply_tty_stdin) { ExecInput i; assert(context); - i = fixup_input(context->std_input, socket_fd); + i = fixup_input(context->std_input, socket_fd, apply_tty_stdin); switch (i) { @@ -289,14 +314,14 @@ static int setup_input(const ExecContext *context, int socket_fd) { } } -static int setup_output(const ExecContext *context, int socket_fd, const char *ident) { +static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) { ExecOutput o; ExecInput i; assert(context); assert(ident); - i = fixup_input(context->std_input, socket_fd); + i = fixup_input(context->std_input, socket_fd, apply_tty_stdin); o = fixup_output(context->std_output, socket_fd); /* This expects the input is already set up */ @@ -305,16 +330,20 @@ static int setup_output(const ExecContext *context, int socket_fd, const char *i case EXEC_OUTPUT_INHERIT: - /* If the input is connected to a terminal, inherit that... */ + /* If input got downgraded, inherit the original value */ + if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input)) + return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO); + + /* If the input is connected to anything that's not a /dev/null, inherit that... */ if (i != EXEC_INPUT_NULL) return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO; - /* For PID 1 stdout is always connected to /dev/null, - * hence reopen the console if out parent is PID1. */ - if (getppid() == 1) - return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO); + /* If we are not started from PID 1 we just inherit STDOUT from our parent process. */ + if (getppid() != 1) + return STDOUT_FILENO; - return STDOUT_FILENO; + /* We need to open /dev/null here anew, to get the + * right access mode. So we fall through */ case EXEC_OUTPUT_NULL: return open_null_as(O_WRONLY, STDOUT_FILENO); @@ -327,7 +356,9 @@ static int setup_output(const ExecContext *context, int socket_fd, const char *i return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO); case EXEC_OUTPUT_SYSLOG: + case EXEC_OUTPUT_SYSLOG_AND_CONSOLE: case EXEC_OUTPUT_KMSG: + case EXEC_OUTPUT_KMSG_AND_CONSOLE: return connect_logger_as(context, o, ident, STDOUT_FILENO); case EXEC_OUTPUT_SOCKET: @@ -339,14 +370,14 @@ static int setup_output(const ExecContext *context, int socket_fd, const char *i } } -static int setup_error(const ExecContext *context, int socket_fd, const char *ident) { +static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) { ExecOutput o, e; ExecInput i; assert(context); assert(ident); - i = fixup_input(context->std_input, socket_fd); + i = fixup_input(context->std_input, socket_fd, apply_tty_stdin); o = fixup_output(context->std_output, socket_fd); e = fixup_output(context->std_error, socket_fd); @@ -356,11 +387,12 @@ static int setup_error(const ExecContext *context, int socket_fd, const char *id * the way and are not on a tty */ if (e == EXEC_OUTPUT_INHERIT && o == EXEC_OUTPUT_INHERIT && - i != EXEC_INPUT_NULL && + i == EXEC_INPUT_NULL && + !is_terminal_input(context->std_input) && getppid () != 1) return STDERR_FILENO; - /* Duplicate form stdout if possible */ + /* Duplicate from stdout if possible */ if (e == o || e == EXEC_OUTPUT_INHERIT) return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO; @@ -377,7 +409,9 @@ static int setup_error(const ExecContext *context, int socket_fd, const char *id return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO); case EXEC_OUTPUT_SYSLOG: + case EXEC_OUTPUT_SYSLOG_AND_CONSOLE: case EXEC_OUTPUT_KMSG: + case EXEC_OUTPUT_KMSG_AND_CONSOLE: return connect_logger_as(context, e, ident, STDERR_FILENO); case EXEC_OUTPUT_SOCKET: @@ -516,96 +550,23 @@ static int restore_confirm_stdio(const ExecContext *context, return 0; } -static int get_group_creds(const char *groupname, gid_t *gid) { - struct group *g; - unsigned long lu; - - assert(groupname); - assert(gid); - - /* We enforce some special rules for gid=0: in order to avoid - * NSS lookups for root we hardcode its data. */ - - if (streq(groupname, "root") || streq(groupname, "0")) { - *gid = 0; - return 0; - } - - if (safe_atolu(groupname, &lu) >= 0) { - errno = 0; - g = getgrgid((gid_t) lu); - } else { - errno = 0; - g = getgrnam(groupname); - } - - if (!g) - return errno != 0 ? -errno : -ESRCH; - - *gid = g->gr_gid; - return 0; -} - -static int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home) { - struct passwd *p; - unsigned long lu; - - assert(username); - assert(*username); - assert(uid); - assert(gid); - assert(home); - - /* We enforce some special rules for uid=0: in order to avoid - * NSS lookups for root we hardcode its data. */ - - if (streq(*username, "root") || streq(*username, "0")) { - *username = "root"; - *uid = 0; - *gid = 0; - *home = "/root"; - return 0; - } - - if (safe_atolu(*username, &lu) >= 0) { - errno = 0; - p = getpwuid((uid_t) lu); - - /* If there are multiple users with the same id, make - * sure to leave $USER to the configured value instead - * of the first occurence in the database. However if - * the uid was configured by a numeric uid, then let's - * pick the real username from /etc/passwd. */ - if (*username && p) - *username = p->pw_name; - } else { - errno = 0; - p = getpwnam(*username); - } - - if (!p) - return errno != 0 ? -errno : -ESRCH; - - *uid = p->pw_uid; - *gid = p->pw_gid; - *home = p->pw_dir; - return 0; -} - static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) { bool keep_groups = false; int r; assert(context); - /* Lookup and ser GID and supplementary group list. Here too + /* Lookup and set GID and supplementary group list. Here too * we avoid NSS lookups for gid=0. */ if (context->group || username) { - if (context->group) - if ((r = get_group_creds(context->group, &gid)) < 0) + if (context->group) { + const char *g = context->group; + + if ((r = get_group_creds(&g, &gid)) < 0) return r; + } /* First step, initialize groups from /etc/groups */ if (username && gid != 0) { @@ -626,7 +587,7 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ char **i; /* Final step, initialize any manually set supplementary groups */ - ngroups_max = (int) sysconf(_SC_NGROUPS_MAX); + assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0); if (!(gids = new(gid_t, ngroups_max))) return -ENOMEM; @@ -640,13 +601,16 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ k = 0; STRV_FOREACH(i, context->supplementary_groups) { + const char *g; if (k >= ngroups_max) { free(gids); return -E2BIG; } - if ((r = get_group_creds(*i, gids+k)) < 0) { + g = *i; + r = get_group_creds(&g, gids+k); + if (r < 0) { free(gids); return r; } @@ -681,7 +645,7 @@ static int enforce_user(const ExecContext *context, uid_t uid) { /* First step: If we need to keep capabilities but * drop privileges we need to make sure we keep our - * caps, whiel we drop priviliges. */ + * caps, whiel we drop privileges. */ if (uid != 0) { int sb = context->secure_bits|SECURE_KEEP_CAPS; @@ -690,7 +654,7 @@ static int enforce_user(const ExecContext *context, uid_t uid) { return -errno; } - /* Second step: set the capabilites. This will reduce + /* Second step: set the capabilities. This will reduce * the capabilities to the minimum we need. */ if (!(d = cap_dup(context->capabilities))) @@ -752,6 +716,7 @@ static int setup_pam( pam_handle_t *handle = NULL; sigset_t ss, old_ss; int pam_code = PAM_SUCCESS; + int err; char **e = NULL; bool close_session = false; pid_t pam_pid = 0, parent_pid; @@ -761,7 +726,7 @@ static int setup_pam( assert(pam_env); /* We set up PAM in the parent process, then fork. The child - * will then stay around untill killed via PR_GET_PDEATHSIG or + * will then stay around until killed via PR_GET_PDEATHSIG or * systemd via the cgroup logic. It will then remove the PAM * session again. The parent process will exec() the actual * daemon. We do things this way to ensure that the main PID @@ -784,9 +749,6 @@ static int setup_pam( close_session = true; - if ((pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | PAM_SILENT)) != PAM_SUCCESS) - goto fail; - if ((!(e = pam_getenvlist(handle)))) { pam_code = PAM_BUF_ERR; goto fail; @@ -813,7 +775,7 @@ static int setup_pam( /* This string must fit in 10 chars (i.e. the length * of "/sbin/init") */ - rename_process("sd:pam"); + rename_process("sd(PAM)"); /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds are @@ -822,7 +784,7 @@ static int setup_pam( /* Wait until our parent died. This will most likely * not work since the kernel does not allow - * unpriviliged paretns kill their priviliged children + * unprivileged parents kill their privileged children * this way. We rely on the control groups kill logic * to do the rest for us. */ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) @@ -831,13 +793,20 @@ static int setup_pam( /* Check if our parent process might already have * died? */ if (getppid() == parent_pid) { - if (sigwait(&ss, &sig) < 0) - goto child_finish; + for (;;) { + if (sigwait(&ss, &sig) < 0) { + if (errno == EINTR) + continue; + + goto child_finish; + } - assert(sig == SIGTERM); + assert(sig == SIGTERM); + break; + } } - /* Only if our parent died we'll end the session */ + /* If our parent died we'll end the session */ if (getppid() != parent_pid) if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS) goto child_finish; @@ -853,7 +822,7 @@ static int setup_pam( * cleanups, so forget about the handle here. */ handle = NULL; - /* Unblock SIGSUR1 again in the parent */ + /* Unblock SIGTERM again in the parent */ if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0) goto fail; @@ -861,9 +830,17 @@ static int setup_pam( * might have opened it, but we don't want this fd around. */ closelog(); + *pam_env = e; + e = NULL; + return 0; fail: + if (pam_code != PAM_SUCCESS) + err = -EPERM; /* PAM errors do not map to errno */ + else + err = -errno; + if (handle) { if (close_session) pam_code = pam_close_session(handle, PAM_DATA_SILENT); @@ -875,13 +852,77 @@ fail: closelog(); - if (pam_pid > 1) + if (pam_pid > 1) { kill(pam_pid, SIGTERM); + kill(pam_pid, SIGCONT); + } - return EXIT_PAM; + return err; } #endif +static int do_capability_bounding_set_drop(uint64_t drop) { + unsigned long i; + cap_t old_cap = NULL, new_cap = NULL; + cap_flag_value_t fv; + int r; + + /* If we are run as PID 1 we will lack CAP_SETPCAP by default + * in the effective set (yes, the kernel drops that when + * executing init!), so get it back temporarily so that we can + * call PR_CAPBSET_DROP. */ + + old_cap = cap_get_proc(); + if (!old_cap) + return -errno; + + if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) { + r = -errno; + goto finish; + } + + if (fv != CAP_SET) { + static const cap_value_t v = CAP_SETPCAP; + + new_cap = cap_dup(old_cap); + if (!new_cap) { + r = -errno; + goto finish; + } + + if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) { + r = -errno; + goto finish; + } + + if (cap_set_proc(new_cap) < 0) { + r = -errno; + goto finish; + } + } + + for (i = 0; i <= cap_last_cap(); i++) + if (drop & ((uint64_t) 1ULL << (uint64_t) i)) { + if (prctl(PR_CAPBSET_DROP, i) < 0) { + r = -errno; + goto finish; + } + } + + r = 0; + +finish: + if (new_cap) + cap_free(new_cap); + + if (old_cap) { + cap_set_proc(old_cap); + cap_free(old_cap); + } + + return r; +} + int exec_spawn(ExecCommand *command, char **argv, const ExecContext *context, @@ -889,14 +930,17 @@ int exec_spawn(ExecCommand *command, char **environment, bool apply_permissions, bool apply_chroot, + bool apply_tty_stdin, bool confirm_spawn, CGroupBonding *cgroup_bondings, + CGroupAttribute *cgroup_attributes, pid_t *ret) { pid_t pid; int r; char *line; int socket_fd; + char **files_env = NULL; assert(command); assert(context); @@ -917,21 +961,32 @@ int exec_spawn(ExecCommand *command, } else socket_fd = -1; + if ((r = exec_context_load_environment(context, &files_env)) < 0) { + log_error("Failed to load environment files: %s", strerror(-r)); + return r; + } + if (!argv) argv = command->argv; - if (!(line = exec_command_line(argv))) - return -ENOMEM; + if (!(line = exec_command_line(argv))) { + r = -ENOMEM; + goto fail_parent; + } log_debug("About to execute: %s", line); free(line); - if (cgroup_bondings) - if ((r = cgroup_bonding_realize_list(cgroup_bondings))) - return r; + r = cgroup_bonding_realize_list(cgroup_bondings); + if (r < 0) + goto fail_parent; - if ((pid = fork()) < 0) - return -errno; + cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings); + + if ((pid = fork()) < 0) { + r = -errno; + goto fail_parent; + } if (pid == 0) { int i; @@ -939,7 +994,7 @@ int exec_spawn(ExecCommand *command, const char *username = NULL, *home = NULL; uid_t uid = (uid_t) -1; gid_t gid = (gid_t) -1; - char **our_env = NULL, **pam_env = NULL, **final_env = NULL; + char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; unsigned n_env = 0; int saved_stdout = -1, saved_stdin = -1; bool keep_stdout = false, keep_stdin = false; @@ -948,7 +1003,7 @@ int exec_spawn(ExecCommand *command, /* This string must fit in 10 chars (i.e. the length * of "/sbin/init") */ - rename_process("sd:exec"); + rename_process("sd(EXEC)"); /* We reset exactly these signals, since they are the * only ones we set to SIG_IGN in the main daemon. All @@ -961,33 +1016,56 @@ int exec_spawn(ExecCommand *command, if (sigemptyset(&ss) < 0 || sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { r = EXIT_SIGNAL_MASK; - goto fail; + goto fail_child; } - if (!context->no_setsid) + /* Close sockets very early to make sure we don't + * block init reexecution because it cannot bind its + * sockets */ + log_forget_fds(); + if (close_all_fds(socket_fd >= 0 ? &socket_fd : fds, + socket_fd >= 0 ? 1 : n_fds) < 0) { + r = EXIT_FDS; + goto fail_child; + } + + if (!context->same_pgrp) if (setsid() < 0) { r = EXIT_SETSID; - goto fail; + goto fail_child; } - if (socket_fd >= 0 && context->tcpwrap_name) - if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) { - r = EXIT_TCPWRAP; - goto fail; + if (context->tcpwrap_name) { + if (socket_fd >= 0) + if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) { + r = EXIT_TCPWRAP; + goto fail_child; + } + + for (i = 0; i < (int) n_fds; i++) { + if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) { + r = EXIT_TCPWRAP; + goto fail_child; + } } + } - if (confirm_spawn) { + exec_context_tty_reset(context); + + /* We skip the confirmation step if we shall not apply the TTY */ + if (confirm_spawn && + (!is_terminal_input(context->std_input) || apply_tty_stdin)) { char response; /* Set up terminal for the question */ if ((r = setup_confirm_stdio(context, &saved_stdin, &saved_stdout))) - goto fail; + goto fail_child; /* Now ask the question. */ if (!(line = exec_command_line(argv))) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line); @@ -995,58 +1073,76 @@ int exec_spawn(ExecCommand *command, if (r < 0 || response == 'n') { r = EXIT_CONFIRM; - goto fail; + goto fail_child; } else if (response == 's') { r = 0; - goto fail; + goto fail_child; } /* Release terminal for the question */ if ((r = restore_confirm_stdio(context, &saved_stdin, &saved_stdout, &keep_stdin, &keep_stdout))) - goto fail; + goto fail_child; } + /* If a socket is connected to STDIN/STDOUT/STDERR, we + * must sure to drop O_NONBLOCK */ + if (socket_fd >= 0) + fd_nonblock(socket_fd, false); + if (!keep_stdin) - if (setup_input(context, socket_fd) < 0) { + if (setup_input(context, socket_fd, apply_tty_stdin) < 0) { r = EXIT_STDIN; - goto fail; + goto fail_child; } if (!keep_stdout) - if (setup_output(context, socket_fd, file_name_from_path(command->path)) < 0) { + if (setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin) < 0) { r = EXIT_STDOUT; - goto fail; + goto fail_child; } - if (setup_error(context, socket_fd, file_name_from_path(command->path)) < 0) { + if (setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin) < 0) { r = EXIT_STDERR; - goto fail; + goto fail_child; } if (cgroup_bondings) - if ((r = cgroup_bonding_install_list(cgroup_bondings, 0)) < 0) { + if (cgroup_bonding_install_list(cgroup_bondings, 0) < 0) { r = EXIT_CGROUP; - goto fail; + goto fail_child; } - if (context->oom_adjust_set) { + if (context->oom_score_adjust_set) { char t[16]; - snprintf(t, sizeof(t), "%i", context->oom_adjust); + snprintf(t, sizeof(t), "%i", context->oom_score_adjust); char_array_0(t); - if (write_one_line_file("/proc/self/oom_adj", t) < 0) { - r = EXIT_OOM_ADJUST; - goto fail; + if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) { + /* Compatibility with Linux <= 2.6.35 */ + + int adj; + + adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; + adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX); + + snprintf(t, sizeof(t), "%i", adj); + char_array_0(t); + + if (write_one_line_file("/proc/self/oom_adj", t) < 0 + && errno != EACCES) { + r = EXIT_OOM_ADJUST; + goto fail_child; + } } } if (context->nice_set) if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) { r = EXIT_NICE; - goto fail; + goto fail_child; } if (context->cpu_sched_set) { @@ -1058,68 +1154,76 @@ int exec_spawn(ExecCommand *command, if (sched_setscheduler(0, context->cpu_sched_policy | (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) { r = EXIT_SETSCHEDULER; - goto fail; + goto fail_child; } } - if (context->cpu_affinity_set) - if (sched_setaffinity(0, sizeof(context->cpu_affinity), &context->cpu_affinity) < 0) { + if (context->cpuset) + if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) { r = EXIT_CPUAFFINITY; - goto fail; + goto fail_child; } if (context->ioprio_set) if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) { r = EXIT_IOPRIO; - goto fail; + goto fail_child; } - if (context->timer_slack_ns_set) - if (prctl(PR_SET_TIMERSLACK, context->timer_slack_ns_set) < 0) { + if (context->timer_slack_nsec_set) + if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) { r = EXIT_TIMERSLACK; - goto fail; + goto fail_child; } + if (context->utmp_id) + utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path); + if (context->user) { username = context->user; if (get_user_creds(&username, &uid, &gid, &home) < 0) { r = EXIT_USER; - goto fail; + goto fail_child; } if (is_terminal_input(context->std_input)) if (chown_terminal(STDIN_FILENO, uid) < 0) { r = EXIT_STDIN; - goto fail; + goto fail_child; + } + + if (cgroup_bondings && context->control_group_modify) + if (cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid) < 0 || + cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid) < 0) { + r = EXIT_CGROUP; + goto fail_child; } } -#ifdef HAVE_PAM - if (context->pam_name && username) { - /* Make sure no fds leak into the PAM - * supervisor process. We will call this later - * on again to make sure that any fds leaked - * by the PAM modules get closed before our - * exec(). */ - if (close_all_fds(fds, n_fds) < 0) { - r = EXIT_FDS; - goto fail; + if (apply_permissions) + if (enforce_groups(context, username, gid) < 0) { + r = EXIT_GROUP; + goto fail_child; } - if (setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds) < 0) { + umask(context->umask); + +#ifdef HAVE_PAM + if (context->pam_name && username) { + if (setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds) != 0) { r = EXIT_PAM; - goto fail; + goto fail_child; } } #endif - - if (apply_permissions) - if (enforce_groups(context, username, uid) < 0) { - r = EXIT_GROUP; - goto fail; + if (context->private_network) { + if (unshare(CLONE_NEWNET) < 0) { + r = EXIT_NETWORK; + goto fail_child; } - umask(context->umask); + loopback_setup(); + } if (strv_length(context->read_write_dirs) > 0 || strv_length(context->read_only_dirs) > 0 || @@ -1132,18 +1236,18 @@ int exec_spawn(ExecCommand *command, context->inaccessible_dirs, context->private_tmp, context->mount_flags)) < 0) - goto fail; + goto fail_child; if (apply_chroot) { if (context->root_directory) if (chroot(context->root_directory) < 0) { r = EXIT_CHROOT; - goto fail; + goto fail_child; } if (chdir(context->working_directory ? context->working_directory : "/") < 0) { r = EXIT_CHDIR; - goto fail; + goto fail_child; } } else { @@ -1153,23 +1257,25 @@ int exec_spawn(ExecCommand *command, context->root_directory ? context->root_directory : "", context->working_directory ? context->working_directory : "") < 0) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (chdir(d) < 0) { free(d); r = EXIT_CHDIR; - goto fail; + goto fail_child; } free(d); } + /* We repeat the fd closing here, to make sure that + * nothing is leaked from the PAM modules */ if (close_all_fds(fds, n_fds) < 0 || shift_fds(fds, n_fds) < 0 || flags_fds(fds, n_fds, context->non_blocking) < 0) { r = EXIT_FDS; - goto fail; + goto fail_child; } if (apply_permissions) { @@ -1180,78 +1286,102 @@ int exec_spawn(ExecCommand *command, if (setrlimit(i, context->rlimit[i]) < 0) { r = EXIT_LIMITS; - goto fail; + goto fail_child; } } + if (context->capability_bounding_set_drop) + if (do_capability_bounding_set_drop(context->capability_bounding_set_drop) < 0) { + r = EXIT_CAPABILITIES; + goto fail_child; + } + if (context->user) if (enforce_user(context, uid) < 0) { r = EXIT_USER; - goto fail; + goto fail_child; } - /* PR_GET_SECUREBITS is not priviliged, while + /* PR_GET_SECUREBITS is not privileged, while * PR_SET_SECUREBITS is. So to suppress * potential EPERMs we'll try not to call * PR_SET_SECUREBITS unless necessary. */ if (prctl(PR_GET_SECUREBITS) != context->secure_bits) if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) { r = EXIT_SECUREBITS; - goto fail; + goto fail_child; } if (context->capabilities) if (cap_set_proc(context->capabilities) < 0) { r = EXIT_CAPABILITIES; - goto fail; + goto fail_child; } } - if (!(our_env = new0(char*, 6))) { + if (!(our_env = new0(char*, 7))) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (n_fds > 0) if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 || asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (home) if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (username) if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 || asprintf(our_env + n_env++, "USER=%s", username) < 0) { r = EXIT_MEMORY; - goto fail; + goto fail_child; } - assert(n_env <= 6); + if (is_terminal_input(context->std_input) || + context->std_output == EXEC_OUTPUT_TTY || + context->std_error == EXEC_OUTPUT_TTY) + if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) { + r = EXIT_MEMORY; + goto fail_child; + } + + assert(n_env <= 7); if (!(final_env = strv_env_merge( - 4, + 5, environment, our_env, context->environment, + files_env, pam_env, NULL))) { r = EXIT_MEMORY; - goto fail; + goto fail_child; + } + + if (!(final_argv = replace_env_argv(argv, final_env))) { + r = EXIT_MEMORY; + goto fail_child; } - execve(command->path, argv, final_env); + final_env = strv_env_clean(final_env); + + execve(command->path, final_argv, final_env); r = EXIT_EXEC; - fail: + fail_child: strv_free(our_env); strv_free(final_env); strv_free(pam_env); + strv_free(files_env); + strv_free(final_argv); if (saved_stdin >= 0) close_nointr_nofail(saved_stdin); @@ -1262,6 +1392,8 @@ int exec_spawn(ExecCommand *command, _exit(r); } + strv_free(files_env); + /* We add the new process to the cgroup both in the child (so * that we can be sure that no user code is ever executed * outside of the cgroup) and in the parent (so that we can be @@ -1272,21 +1404,28 @@ int exec_spawn(ExecCommand *command, log_debug("Forked %s as %lu", command->path, (unsigned long) pid); - command->exec_status.pid = pid; - dual_timestamp_get(&command->exec_status.start_timestamp); + exec_status_start(&command->exec_status, pid); *ret = pid; return 0; + +fail_parent: + strv_free(files_env); + + return r; } void exec_context_init(ExecContext *c) { assert(c); - c->umask = 0002; + c->umask = 0022; c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0); c->cpu_sched_policy = SCHED_OTHER; c->syslog_priority = LOG_DAEMON|LOG_INFO; + c->syslog_level_prefix = true; c->mount_flags = MS_SHARED; + c->kill_signal = SIGTERM; + c->send_sigkill = true; } void exec_context_done(ExecContext *c) { @@ -1297,6 +1436,9 @@ void exec_context_done(ExecContext *c) { strv_free(c->environment); c->environment = NULL; + strv_free(c->environment_files); + c->environment_files = NULL; + for (l = 0; l < ELEMENTSOF(c->rlimit); l++) { free(c->rlimit[l]); c->rlimit[l] = NULL; @@ -1341,6 +1483,12 @@ void exec_context_done(ExecContext *c) { strv_free(c->inaccessible_dirs); c->inaccessible_dirs = NULL; + + if (c->cpuset) + CPU_FREE(c->cpuset); + + free(c->utmp_id); + c->utmp_id = NULL; } void exec_command_done(ExecCommand *c) { @@ -1379,6 +1527,64 @@ void exec_command_free_array(ExecCommand **c, unsigned n) { } } +int exec_context_load_environment(const ExecContext *c, char ***l) { + char **i, **r = NULL; + + assert(c); + assert(l); + + STRV_FOREACH(i, c->environment_files) { + char *fn; + int k; + bool ignore = false; + char **p; + + fn = *i; + + if (fn[0] == '-') { + ignore = true; + fn ++; + } + + if (!path_is_absolute(fn)) { + + if (ignore) + continue; + + strv_free(r); + return -EINVAL; + } + + if ((k = load_env_file(fn, &p)) < 0) { + + if (ignore) + continue; + + strv_free(r); + return k; + } + + if (r == NULL) + r = p; + else { + char **m; + + m = strv_env_merge(2, r, p); + strv_free(r); + strv_free(p); + + if (!m) + return -ENOMEM; + + r = m; + } + } + + *l = r; + + return 0; +} + static void strv_fprintf(FILE *f, char **l) { char **g; @@ -1403,16 +1609,22 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sWorkingDirectory: %s\n" "%sRootDirectory: %s\n" "%sNonBlocking: %s\n" - "%sPrivateTmp: %s\n", + "%sPrivateTmp: %s\n" + "%sControlGroupModify: %s\n" + "%sPrivateNetwork: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", prefix, yes_no(c->non_blocking), - prefix, yes_no(c->private_tmp)); + prefix, yes_no(c->private_tmp), + prefix, yes_no(c->control_group_modify), + prefix, yes_no(c->private_network)); - if (c->environment) - for (e = c->environment; *e; e++) - fprintf(f, "%sEnvironment: %s\n", prefix, *e); + STRV_FOREACH(e, c->environment) + fprintf(f, "%sEnvironment: %s\n", prefix, *e); + + STRV_FOREACH(e, c->environment_files) + fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e); if (c->tcpwrap_name) fprintf(f, @@ -1424,10 +1636,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNice: %i\n", prefix, c->nice); - if (c->oom_adjust_set) + if (c->oom_score_adjust_set) fprintf(f, - "%sOOMAdjust: %i\n", - prefix, c->oom_adjust); + "%sOOMScoreAdjust: %i\n", + prefix, c->oom_score_adjust); for (i = 0; i < RLIM_NLIMITS; i++) if (c->rlimit[i]) @@ -1449,16 +1661,16 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, c->cpu_sched_priority, prefix, yes_no(c->cpu_sched_reset_on_fork)); - if (c->cpu_affinity_set) { + if (c->cpuset) { fprintf(f, "%sCPUAffinity:", prefix); - for (i = 0; i < CPU_SETSIZE; i++) - if (CPU_ISSET(i, &c->cpu_affinity)) + for (i = 0; i < c->cpuset_ncpus; i++) + if (CPU_ISSET_S(i, CPU_ALLOC_SIZE(c->cpuset_ncpus), c->cpuset)) fprintf(f, " %i", i); fputs("\n", f); } - if (c->timer_slack_ns_set) - fprintf(f, "%sTimerSlackNS: %lu\n", prefix, c->timer_slack_ns); + if (c->timer_slack_nsec_set) + fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec); fprintf(f, "%sStandardInput: %s\n" @@ -1470,15 +1682,23 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->tty_path) fprintf(f, - "%sTTYPath: %s\n", - prefix, c->tty_path); + "%sTTYPath: %s\n" + "%sTTYReset: %s\n" + "%sTTYVHangup: %s\n" + "%sTTYVTDisallocate: %s\n", + prefix, c->tty_path, + prefix, yes_no(c->tty_reset), + prefix, yes_no(c->tty_vhangup), + prefix, yes_no(c->tty_vt_disallocate)); if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || - c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG) + c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || + c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || + c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE) fprintf(f, "%sSyslogFacility: %s\n" "%sSyslogLevel: %s\n", - prefix, log_facility_to_string(LOG_FAC(c->syslog_priority)), + prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3), prefix, log_level_to_string(LOG_PRI(c->syslog_priority))); if (c->capabilities) { @@ -1501,15 +1721,16 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : ""); if (c->capability_bounding_set_drop) { - fprintf(f, "%sCapabilityBoundingSetDrop:", prefix); + unsigned long l; + fprintf(f, "%sCapabilityBoundingSet:", prefix); - for (i = 0; i <= CAP_LAST_CAP; i++) - if (c->capability_bounding_set_drop & (1 << i)) { + for (l = 0; l <= cap_last_cap(); l++) + if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) { char *t; - if ((t = cap_to_name(i))) { + if ((t = cap_to_name(l))) { fprintf(f, " %s", t); - free(t); + cap_free(t); } } @@ -1547,16 +1768,48 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { strv_fprintf(f, c->inaccessible_dirs); fputs("\n", f); } + + fprintf(f, + "%sKillMode: %s\n" + "%sKillSignal: SIG%s\n" + "%sSendSIGKILL: %s\n", + prefix, kill_mode_to_string(c->kill_mode), + prefix, signal_to_string(c->kill_signal), + prefix, yes_no(c->send_sigkill)); + + if (c->utmp_id) + fprintf(f, + "%sUtmpIdentifier: %s\n", + prefix, c->utmp_id); +} + +void exec_status_start(ExecStatus *s, pid_t pid) { + assert(s); + + zero(*s); + s->pid = pid; + dual_timestamp_get(&s->start_timestamp); } -void exec_status_fill(ExecStatus *s, pid_t pid, int code, int status) { +void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) { assert(s); + if ((s->pid && s->pid != pid) || + !s->start_timestamp.realtime <= 0) + zero(*s); + s->pid = pid; dual_timestamp_get(&s->exit_timestamp); s->code = code; s->status = status; + + if (context) { + if (context->utmp_id) + utmp_put_dead_process(context->utmp_id, pid, code, status); + + exec_context_tty_reset(context); + } } void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) { @@ -1673,7 +1926,7 @@ void exec_command_append_list(ExecCommand **l, ExecCommand *e) { assert(e); if (*l) { - /* It's kinda important that we keep the order here */ + /* It's kind of important, that we keep the order here */ LIST_FIND_TAIL(ExecCommand, command, *l, end); LIST_INSERT_AFTER(ExecCommand, command, *l, end, e); } else @@ -1708,117 +1961,6 @@ int exec_command_set(ExecCommand *c, const char *path, ...) { return 0; } -const char* exit_status_to_string(ExitStatus status) { - - /* We cast to int here, so that -Wenum doesn't complain that - * EXIT_SUCCESS/EXIT_FAILURE aren't in the enum */ - - switch ((int) status) { - - case EXIT_SUCCESS: - return "SUCCESS"; - - case EXIT_FAILURE: - return "FAILURE"; - - case EXIT_INVALIDARGUMENT: - return "INVALIDARGUMENT"; - - case EXIT_NOTIMPLEMENTED: - return "NOTIMPLEMENTED"; - - case EXIT_NOPERMISSION: - return "NOPERMISSION"; - - case EXIT_NOTINSTALLED: - return "NOTINSSTALLED"; - - case EXIT_NOTCONFIGURED: - return "NOTCONFIGURED"; - - case EXIT_NOTRUNNING: - return "NOTRUNNING"; - - case EXIT_CHDIR: - return "CHDIR"; - - case EXIT_NICE: - return "NICE"; - - case EXIT_FDS: - return "FDS"; - - case EXIT_EXEC: - return "EXEC"; - - case EXIT_MEMORY: - return "MEMORY"; - - case EXIT_LIMITS: - return "LIMITS"; - - case EXIT_OOM_ADJUST: - return "OOM_ADJUST"; - - case EXIT_SIGNAL_MASK: - return "SIGNAL_MASK"; - - case EXIT_STDIN: - return "STDIN"; - - case EXIT_STDOUT: - return "STDOUT"; - - case EXIT_CHROOT: - return "CHROOT"; - - case EXIT_IOPRIO: - return "IOPRIO"; - - case EXIT_TIMERSLACK: - return "TIMERSLACK"; - - case EXIT_SECUREBITS: - return "SECUREBITS"; - - case EXIT_SETSCHEDULER: - return "SETSCHEDULER"; - - case EXIT_CPUAFFINITY: - return "CPUAFFINITY"; - - case EXIT_GROUP: - return "GROUP"; - - case EXIT_USER: - return "USER"; - - case EXIT_CAPABILITIES: - return "CAPABILITIES"; - - case EXIT_CGROUP: - return "CGROUP"; - - case EXIT_SETSID: - return "SETSID"; - - case EXIT_CONFIRM: - return "CONFIRM"; - - case EXIT_STDERR: - return "STDERR"; - - case EXIT_TCPWRAP: - return "TCPWRAP"; - - case EXIT_PAM: - return "PAM"; - - default: - return NULL; - } -} - static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_NULL] = "null", [EXEC_INPUT_TTY] = "tty", @@ -1827,15 +1969,33 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_SOCKET] = "socket" }; +DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); + static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = { [EXEC_OUTPUT_INHERIT] = "inherit", [EXEC_OUTPUT_NULL] = "null", [EXEC_OUTPUT_TTY] = "tty", [EXEC_OUTPUT_SYSLOG] = "syslog", + [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console", [EXEC_OUTPUT_KMSG] = "kmsg", + [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console", [EXEC_OUTPUT_SOCKET] = "socket" }; DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput); -DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); +static const char* const kill_mode_table[_KILL_MODE_MAX] = { + [KILL_CONTROL_GROUP] = "control-group", + [KILL_PROCESS] = "process", + [KILL_NONE] = "none" +}; + +DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode); + +static const char* const kill_who_table[_KILL_WHO_MAX] = { + [KILL_MAIN] = "main", + [KILL_CONTROL] = "control", + [KILL_ALL] = "all" +}; + +DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);