X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fexecute.c;h=481725d63d2a0acf76ab419e24d4170a96f35689;hp=138d38817fef92945040b64ad66217344c8dd463;hb=4c2630ebf23b6348174f0bdf1110e90efe45259c;hpb=1e3ad081efda42dd1cc737ce7e98be8889c78340 diff --git a/src/execute.c b/src/execute.c index 138d38817..481725d63 100644 --- a/src/execute.c +++ b/src/execute.c @@ -1,4 +1,4 @@ -/*-*- Mode: C; c-basic-offset: 8 -*-*/ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ /*** This file is part of systemd. @@ -36,6 +36,7 @@ #include #include #include +#include #ifdef HAVE_PAM #include @@ -51,6 +52,11 @@ #include "cgroup.h" #include "namespace.h" #include "tcpwrap.h" +#include "exit-status.h" +#include "missing.h" +#include "utmp-wtmp.h" +#include "def.h" +#include "loopback-setup.h" /* This assumes there is a 'tty' group */ #define TTY_MODE 0620 @@ -135,6 +141,19 @@ static const char *tty_path(const ExecContext *context) { return "/dev/console"; } +void exec_context_tty_reset(const ExecContext *context) { + assert(context); + + if (context->tty_vhangup) + terminal_vhangup(tty_path(context)); + + if (context->tty_reset) + reset_terminal(tty_path(context)); + + if (context->tty_vt_disallocate && context->tty_path) + vt_disallocate(context->tty_path); +} + static int open_null_as(int flags, int nfd) { int fd, r; @@ -169,9 +188,9 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons zero(sa); sa.sa.sa_family = AF_UNIX; - strncpy(sa.un.sun_path+1, LOGGER_SOCKET, sizeof(sa.un.sun_path)-1); + strncpy(sa.un.sun_path, STDOUT_SYSLOG_BRIDGE_SOCKET, sizeof(sa.un.sun_path)); - if (connect(fd, &sa.sa, sizeof(sa)) < 0) { + if (connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + sizeof(STDOUT_SYSLOG_BRIDGE_SOCKET) - 1) < 0) { close_nointr_nofail(fd); return -errno; } @@ -194,7 +213,10 @@ static int connect_logger_as(const ExecContext *context, ExecOutput output, cons "%i\n" "%s\n" "%i\n", - output == EXEC_OUTPUT_KMSG ? "kmsg" : "syslog", + output == EXEC_OUTPUT_KMSG ? "kmsg" : + output == EXEC_OUTPUT_KMSG_AND_CONSOLE ? "kmsg+console" : + output == EXEC_OUTPUT_SYSLOG ? "syslog" : + "syslog+console", context->syslog_priority, context->syslog_identifier ? context->syslog_identifier : ident, context->syslog_level_prefix); @@ -308,6 +330,10 @@ static int setup_output(const ExecContext *context, int socket_fd, const char *i case EXEC_OUTPUT_INHERIT: + /* If input got downgraded, inherit the original value */ + if (i == EXEC_INPUT_NULL && is_terminal_input(context->std_input)) + return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO); + /* If the input is connected to anything that's not a /dev/null, inherit that... */ if (i != EXEC_INPUT_NULL) return dup2(STDIN_FILENO, STDOUT_FILENO) < 0 ? -errno : STDOUT_FILENO; @@ -330,7 +356,9 @@ static int setup_output(const ExecContext *context, int socket_fd, const char *i return open_terminal_as(tty_path(context), O_WRONLY, STDOUT_FILENO); case EXEC_OUTPUT_SYSLOG: + case EXEC_OUTPUT_SYSLOG_AND_CONSOLE: case EXEC_OUTPUT_KMSG: + case EXEC_OUTPUT_KMSG_AND_CONSOLE: return connect_logger_as(context, o, ident, STDOUT_FILENO); case EXEC_OUTPUT_SOCKET: @@ -360,10 +388,11 @@ static int setup_error(const ExecContext *context, int socket_fd, const char *id if (e == EXEC_OUTPUT_INHERIT && o == EXEC_OUTPUT_INHERIT && i == EXEC_INPUT_NULL && + !is_terminal_input(context->std_input) && getppid () != 1) return STDERR_FILENO; - /* Duplicate form stdout if possible */ + /* Duplicate from stdout if possible */ if (e == o || e == EXEC_OUTPUT_INHERIT) return dup2(STDOUT_FILENO, STDERR_FILENO) < 0 ? -errno : STDERR_FILENO; @@ -380,7 +409,9 @@ static int setup_error(const ExecContext *context, int socket_fd, const char *id return open_terminal_as(tty_path(context), O_WRONLY, STDERR_FILENO); case EXEC_OUTPUT_SYSLOG: + case EXEC_OUTPUT_SYSLOG_AND_CONSOLE: case EXEC_OUTPUT_KMSG: + case EXEC_OUTPUT_KMSG_AND_CONSOLE: return connect_logger_as(context, e, ident, STDERR_FILENO); case EXEC_OUTPUT_SOCKET: @@ -519,96 +550,23 @@ static int restore_confirm_stdio(const ExecContext *context, return 0; } -static int get_group_creds(const char *groupname, gid_t *gid) { - struct group *g; - unsigned long lu; - - assert(groupname); - assert(gid); - - /* We enforce some special rules for gid=0: in order to avoid - * NSS lookups for root we hardcode its data. */ - - if (streq(groupname, "root") || streq(groupname, "0")) { - *gid = 0; - return 0; - } - - if (safe_atolu(groupname, &lu) >= 0) { - errno = 0; - g = getgrgid((gid_t) lu); - } else { - errno = 0; - g = getgrnam(groupname); - } - - if (!g) - return errno != 0 ? -errno : -ESRCH; - - *gid = g->gr_gid; - return 0; -} - -static int get_user_creds(const char **username, uid_t *uid, gid_t *gid, const char **home) { - struct passwd *p; - unsigned long lu; - - assert(username); - assert(*username); - assert(uid); - assert(gid); - assert(home); - - /* We enforce some special rules for uid=0: in order to avoid - * NSS lookups for root we hardcode its data. */ - - if (streq(*username, "root") || streq(*username, "0")) { - *username = "root"; - *uid = 0; - *gid = 0; - *home = "/root"; - return 0; - } - - if (safe_atolu(*username, &lu) >= 0) { - errno = 0; - p = getpwuid((uid_t) lu); - - /* If there are multiple users with the same id, make - * sure to leave $USER to the configured value instead - * of the first occurence in the database. However if - * the uid was configured by a numeric uid, then let's - * pick the real username from /etc/passwd. */ - if (*username && p) - *username = p->pw_name; - } else { - errno = 0; - p = getpwnam(*username); - } - - if (!p) - return errno != 0 ? -errno : -ESRCH; - - *uid = p->pw_uid; - *gid = p->pw_gid; - *home = p->pw_dir; - return 0; -} - static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) { bool keep_groups = false; int r; assert(context); - /* Lookup and ser GID and supplementary group list. Here too + /* Lookup and set GID and supplementary group list. Here too * we avoid NSS lookups for gid=0. */ if (context->group || username) { - if (context->group) - if ((r = get_group_creds(context->group, &gid)) < 0) + if (context->group) { + const char *g = context->group; + + if ((r = get_group_creds(&g, &gid)) < 0) return r; + } /* First step, initialize groups from /etc/groups */ if (username && gid != 0) { @@ -629,7 +587,7 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ char **i; /* Final step, initialize any manually set supplementary groups */ - ngroups_max = (int) sysconf(_SC_NGROUPS_MAX); + assert_se((ngroups_max = (int) sysconf(_SC_NGROUPS_MAX)) > 0); if (!(gids = new(gid_t, ngroups_max))) return -ENOMEM; @@ -643,13 +601,16 @@ static int enforce_groups(const ExecContext *context, const char *username, gid_ k = 0; STRV_FOREACH(i, context->supplementary_groups) { + const char *g; if (k >= ngroups_max) { free(gids); return -E2BIG; } - if ((r = get_group_creds(*i, gids+k)) < 0) { + g = *i; + r = get_group_creds(&g, gids+k); + if (r < 0) { free(gids); return r; } @@ -684,7 +645,7 @@ static int enforce_user(const ExecContext *context, uid_t uid) { /* First step: If we need to keep capabilities but * drop privileges we need to make sure we keep our - * caps, whiel we drop priviliges. */ + * caps, whiel we drop privileges. */ if (uid != 0) { int sb = context->secure_bits|SECURE_KEEP_CAPS; @@ -693,7 +654,7 @@ static int enforce_user(const ExecContext *context, uid_t uid) { return -errno; } - /* Second step: set the capabilites. This will reduce + /* Second step: set the capabilities. This will reduce * the capabilities to the minimum we need. */ if (!(d = cap_dup(context->capabilities))) @@ -755,6 +716,7 @@ static int setup_pam( pam_handle_t *handle = NULL; sigset_t ss, old_ss; int pam_code = PAM_SUCCESS; + int err; char **e = NULL; bool close_session = false; pid_t pam_pid = 0, parent_pid; @@ -764,7 +726,7 @@ static int setup_pam( assert(pam_env); /* We set up PAM in the parent process, then fork. The child - * will then stay around untill killed via PR_GET_PDEATHSIG or + * will then stay around until killed via PR_GET_PDEATHSIG or * systemd via the cgroup logic. It will then remove the PAM * session again. The parent process will exec() the actual * daemon. We do things this way to ensure that the main PID @@ -787,9 +749,6 @@ static int setup_pam( close_session = true; - if ((pam_code = pam_setcred(handle, PAM_ESTABLISH_CRED | PAM_SILENT)) != PAM_SUCCESS) - goto fail; - if ((!(e = pam_getenvlist(handle)))) { pam_code = PAM_BUF_ERR; goto fail; @@ -816,7 +775,7 @@ static int setup_pam( /* This string must fit in 10 chars (i.e. the length * of "/sbin/init") */ - rename_process("sd:pam"); + rename_process("sd(PAM)"); /* Make sure we don't keep open the passed fds in this child. We assume that otherwise only those fds are @@ -825,7 +784,7 @@ static int setup_pam( /* Wait until our parent died. This will most likely * not work since the kernel does not allow - * unpriviliged paretns kill their priviliged children + * unprivileged parents kill their privileged children * this way. We rely on the control groups kill logic * to do the rest for us. */ if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0) @@ -834,13 +793,20 @@ static int setup_pam( /* Check if our parent process might already have * died? */ if (getppid() == parent_pid) { - if (sigwait(&ss, &sig) < 0) - goto child_finish; + for (;;) { + if (sigwait(&ss, &sig) < 0) { + if (errno == EINTR) + continue; + + goto child_finish; + } - assert(sig == SIGTERM); + assert(sig == SIGTERM); + break; + } } - /* Only if our parent died we'll end the session */ + /* If our parent died we'll end the session */ if (getppid() != parent_pid) if ((pam_code = pam_close_session(handle, PAM_DATA_SILENT)) != PAM_SUCCESS) goto child_finish; @@ -856,7 +822,7 @@ static int setup_pam( * cleanups, so forget about the handle here. */ handle = NULL; - /* Unblock SIGSUR1 again in the parent */ + /* Unblock SIGTERM again in the parent */ if (sigprocmask(SIG_SETMASK, &old_ss, NULL) < 0) goto fail; @@ -864,9 +830,17 @@ static int setup_pam( * might have opened it, but we don't want this fd around. */ closelog(); + *pam_env = e; + e = NULL; + return 0; fail: + if (pam_code != PAM_SUCCESS) + err = -EPERM; /* PAM errors do not map to errno */ + else + err = -errno; + if (handle) { if (close_session) pam_code = pam_close_session(handle, PAM_DATA_SILENT); @@ -878,13 +852,77 @@ fail: closelog(); - if (pam_pid > 1) + if (pam_pid > 1) { kill(pam_pid, SIGTERM); + kill(pam_pid, SIGCONT); + } - return EXIT_PAM; + return err; } #endif +static int do_capability_bounding_set_drop(uint64_t drop) { + unsigned long i; + cap_t old_cap = NULL, new_cap = NULL; + cap_flag_value_t fv; + int r; + + /* If we are run as PID 1 we will lack CAP_SETPCAP by default + * in the effective set (yes, the kernel drops that when + * executing init!), so get it back temporarily so that we can + * call PR_CAPBSET_DROP. */ + + old_cap = cap_get_proc(); + if (!old_cap) + return -errno; + + if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) { + r = -errno; + goto finish; + } + + if (fv != CAP_SET) { + static const cap_value_t v = CAP_SETPCAP; + + new_cap = cap_dup(old_cap); + if (!new_cap) { + r = -errno; + goto finish; + } + + if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) { + r = -errno; + goto finish; + } + + if (cap_set_proc(new_cap) < 0) { + r = -errno; + goto finish; + } + } + + for (i = 0; i <= cap_last_cap(); i++) + if (drop & ((uint64_t) 1ULL << (uint64_t) i)) { + if (prctl(PR_CAPBSET_DROP, i) < 0) { + r = -errno; + goto finish; + } + } + + r = 0; + +finish: + if (new_cap) + cap_free(new_cap); + + if (old_cap) { + cap_set_proc(old_cap); + cap_free(old_cap); + } + + return r; +} + int exec_spawn(ExecCommand *command, char **argv, const ExecContext *context, @@ -895,12 +933,14 @@ int exec_spawn(ExecCommand *command, bool apply_tty_stdin, bool confirm_spawn, CGroupBonding *cgroup_bondings, + CGroupAttribute *cgroup_attributes, pid_t *ret) { pid_t pid; int r; char *line; int socket_fd; + char **files_env = NULL; assert(command); assert(context); @@ -921,29 +961,40 @@ int exec_spawn(ExecCommand *command, } else socket_fd = -1; + if ((r = exec_context_load_environment(context, &files_env)) < 0) { + log_error("Failed to load environment files: %s", strerror(-r)); + return r; + } + if (!argv) argv = command->argv; - if (!(line = exec_command_line(argv))) - return -ENOMEM; + if (!(line = exec_command_line(argv))) { + r = -ENOMEM; + goto fail_parent; + } log_debug("About to execute: %s", line); free(line); - if (cgroup_bondings) - if ((r = cgroup_bonding_realize_list(cgroup_bondings))) - return r; + r = cgroup_bonding_realize_list(cgroup_bondings); + if (r < 0) + goto fail_parent; - if ((pid = fork()) < 0) - return -errno; + cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings); + + if ((pid = fork()) < 0) { + r = -errno; + goto fail_parent; + } if (pid == 0) { - int i; + int i, err; sigset_t ss; const char *username = NULL, *home = NULL; uid_t uid = (uid_t) -1; gid_t gid = (gid_t) -1; - char **our_env = NULL, **pam_env = NULL, **final_env = NULL; + char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL; unsigned n_env = 0; int saved_stdout = -1, saved_stdin = -1; bool keep_stdout = false, keep_stdin = false; @@ -952,7 +1003,7 @@ int exec_spawn(ExecCommand *command, /* This string must fit in 10 chars (i.e. the length * of "/sbin/init") */ - rename_process("sd:exec"); + rename_process("sd(EXEC)"); /* We reset exactly these signals, since they are the * only ones we set to SIG_IGN in the main daemon. All @@ -964,31 +1015,48 @@ int exec_spawn(ExecCommand *command, if (sigemptyset(&ss) < 0 || sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { + err = -errno; r = EXIT_SIGNAL_MASK; - goto fail; + goto fail_child; + } + + /* Close sockets very early to make sure we don't + * block init reexecution because it cannot bind its + * sockets */ + log_forget_fds(); + err = close_all_fds(socket_fd >= 0 ? &socket_fd : fds, + socket_fd >= 0 ? 1 : n_fds); + if (err < 0) { + r = EXIT_FDS; + goto fail_child; } if (!context->same_pgrp) if (setsid() < 0) { + err = -errno; r = EXIT_SETSID; - goto fail; + goto fail_child; } if (context->tcpwrap_name) { if (socket_fd >= 0) if (!socket_tcpwrap(socket_fd, context->tcpwrap_name)) { + err = -EACCES; r = EXIT_TCPWRAP; - goto fail; + goto fail_child; } for (i = 0; i < (int) n_fds; i++) { if (!socket_tcpwrap(fds[i], context->tcpwrap_name)) { + err = -EACCES; r = EXIT_TCPWRAP; - goto fail; + goto fail_child; } } } + exec_context_tty_reset(context); + /* We skip the confirmation step if we shall not apply the TTY */ if (confirm_spawn && (!is_terminal_input(context->std_input) || apply_tty_stdin)) { @@ -996,72 +1064,105 @@ int exec_spawn(ExecCommand *command, /* Set up terminal for the question */ if ((r = setup_confirm_stdio(context, - &saved_stdin, &saved_stdout))) - goto fail; + &saved_stdin, &saved_stdout))) { + err = -errno; + goto fail_child; + } /* Now ask the question. */ if (!(line = exec_command_line(argv))) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line); free(line); if (r < 0 || response == 'n') { + err = -ECANCELED; r = EXIT_CONFIRM; - goto fail; + goto fail_child; } else if (response == 's') { - r = 0; - goto fail; + err = r = 0; + goto fail_child; } /* Release terminal for the question */ if ((r = restore_confirm_stdio(context, &saved_stdin, &saved_stdout, - &keep_stdin, &keep_stdout))) - goto fail; + &keep_stdin, &keep_stdout))) { + err = -errno; + goto fail_child; + } } - if (!keep_stdin) - if (setup_input(context, socket_fd, apply_tty_stdin) < 0) { + /* If a socket is connected to STDIN/STDOUT/STDERR, we + * must sure to drop O_NONBLOCK */ + if (socket_fd >= 0) + fd_nonblock(socket_fd, false); + + if (!keep_stdin) { + err = setup_input(context, socket_fd, apply_tty_stdin); + if (err < 0) { r = EXIT_STDIN; - goto fail; + goto fail_child; } + } - if (!keep_stdout) - if (setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin) < 0) { + if (!keep_stdout) { + err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin); + if (err < 0) { r = EXIT_STDOUT; - goto fail; + goto fail_child; } + } - if (setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin) < 0) { + err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin); + if (err < 0) { r = EXIT_STDERR; - goto fail; + goto fail_child; } - if (cgroup_bondings) - if ((r = cgroup_bonding_install_list(cgroup_bondings, 0)) < 0) { + if (cgroup_bondings) { + err = cgroup_bonding_install_list(cgroup_bondings, 0); + if (err < 0) { r = EXIT_CGROUP; - goto fail; + goto fail_child; } + } - if (context->oom_adjust_set) { + if (context->oom_score_adjust_set) { char t[16]; - snprintf(t, sizeof(t), "%i", context->oom_adjust); + snprintf(t, sizeof(t), "%i", context->oom_score_adjust); char_array_0(t); - if (write_one_line_file("/proc/self/oom_adj", t) < 0) { - r = EXIT_OOM_ADJUST; - goto fail; + if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) { + /* Compatibility with Linux <= 2.6.35 */ + + int adj; + + adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; + adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX); + + snprintf(t, sizeof(t), "%i", adj); + char_array_0(t); + + if (write_one_line_file("/proc/self/oom_adj", t) < 0 + && errno != EACCES) { + err = -errno; + r = EXIT_OOM_ADJUST; + goto fail_child; + } } } if (context->nice_set) if (setpriority(PRIO_PROCESS, 0, context->nice) < 0) { + err = -errno; r = EXIT_NICE; - goto fail; + goto fail_child; } if (context->cpu_sched_set) { @@ -1072,93 +1173,120 @@ int exec_spawn(ExecCommand *command, if (sched_setscheduler(0, context->cpu_sched_policy | (context->cpu_sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0), ¶m) < 0) { + err = -errno; r = EXIT_SETSCHEDULER; - goto fail; + goto fail_child; } } if (context->cpuset) if (sched_setaffinity(0, CPU_ALLOC_SIZE(context->cpuset_ncpus), context->cpuset) < 0) { + err = -errno; r = EXIT_CPUAFFINITY; - goto fail; + goto fail_child; } if (context->ioprio_set) if (ioprio_set(IOPRIO_WHO_PROCESS, 0, context->ioprio) < 0) { + err = -errno; r = EXIT_IOPRIO; - goto fail; + goto fail_child; } if (context->timer_slack_nsec_set) if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) { + err = -errno; r = EXIT_TIMERSLACK; - goto fail; + goto fail_child; } + if (context->utmp_id) + utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path); + if (context->user) { username = context->user; - if (get_user_creds(&username, &uid, &gid, &home) < 0) { + err = get_user_creds(&username, &uid, &gid, &home); + if (err < 0) { r = EXIT_USER; - goto fail; + goto fail_child; } - if (is_terminal_input(context->std_input)) - if (chown_terminal(STDIN_FILENO, uid) < 0) { + if (is_terminal_input(context->std_input)) { + err = chown_terminal(STDIN_FILENO, uid); + if (err < 0) { r = EXIT_STDIN; - goto fail; + goto fail_child; + } + } + + if (cgroup_bondings && context->control_group_modify) { + err = cgroup_bonding_set_group_access_list(cgroup_bondings, 0755, uid, gid); + if (err >= 0) + err = cgroup_bonding_set_task_access_list(cgroup_bondings, 0644, uid, gid); + if (err < 0) { + r = EXIT_CGROUP; + goto fail_child; } + } } -#ifdef HAVE_PAM - if (context->pam_name && username) { - /* Make sure no fds leak into the PAM - * supervisor process. We will call this later - * on again to make sure that any fds leaked - * by the PAM modules get closed before our - * exec(). */ - if (close_all_fds(fds, n_fds) < 0) { - r = EXIT_FDS; - goto fail; + if (apply_permissions) { + err = enforce_groups(context, username, gid); + if (err < 0) { + r = EXIT_GROUP; + goto fail_child; } + } - if (setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds) < 0) { + umask(context->umask); + +#ifdef HAVE_PAM + if (context->pam_name && username) { + err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds); + if (err < 0) { r = EXIT_PAM; - goto fail; + goto fail_child; } } #endif - - if (apply_permissions) - if (enforce_groups(context, username, uid) < 0) { - r = EXIT_GROUP; - goto fail; + if (context->private_network) { + if (unshare(CLONE_NEWNET) < 0) { + err = -errno; + r = EXIT_NETWORK; + goto fail_child; } - umask(context->umask); + loopback_setup(); + } if (strv_length(context->read_write_dirs) > 0 || strv_length(context->read_only_dirs) > 0 || strv_length(context->inaccessible_dirs) > 0 || context->mount_flags != MS_SHARED || - context->private_tmp) - if ((r = setup_namespace( - context->read_write_dirs, - context->read_only_dirs, - context->inaccessible_dirs, - context->private_tmp, - context->mount_flags)) < 0) - goto fail; + context->private_tmp) { + err = setup_namespace(context->read_write_dirs, + context->read_only_dirs, + context->inaccessible_dirs, + context->private_tmp, + context->mount_flags); + if (err < 0) { + r = EXIT_NAMESPACE; + goto fail_child; + } + } if (apply_chroot) { if (context->root_directory) if (chroot(context->root_directory) < 0) { + err = -errno; r = EXIT_CHROOT; - goto fail; + goto fail_child; } if (chdir(context->working_directory ? context->working_directory : "/") < 0) { + err = -errno; r = EXIT_CHDIR; - goto fail; + goto fail_child; } } else { @@ -1167,24 +1295,31 @@ int exec_spawn(ExecCommand *command, if (asprintf(&d, "%s/%s", context->root_directory ? context->root_directory : "", context->working_directory ? context->working_directory : "") < 0) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (chdir(d) < 0) { + err = -errno; free(d); r = EXIT_CHDIR; - goto fail; + goto fail_child; } free(d); } - if (close_all_fds(fds, n_fds) < 0 || - shift_fds(fds, n_fds) < 0 || - flags_fds(fds, n_fds, context->non_blocking) < 0) { + /* We repeat the fd closing here, to make sure that + * nothing is leaked from the PAM modules */ + err = close_all_fds(fds, n_fds); + if (err >= 0) + err = shift_fds(fds, n_fds); + if (err >= 0) + err = flags_fds(fds, n_fds, context->non_blocking); + if (err < 0) { r = EXIT_FDS; - goto fail; + goto fail_child; } if (apply_permissions) { @@ -1194,79 +1329,125 @@ int exec_spawn(ExecCommand *command, continue; if (setrlimit(i, context->rlimit[i]) < 0) { + err = -errno; r = EXIT_LIMITS; - goto fail; + goto fail_child; } } - if (context->user) - if (enforce_user(context, uid) < 0) { + if (context->capability_bounding_set_drop) { + err = do_capability_bounding_set_drop(context->capability_bounding_set_drop); + if (err < 0) { + r = EXIT_CAPABILITIES; + goto fail_child; + } + } + + if (context->user) { + err = enforce_user(context, uid); + if (err < 0) { r = EXIT_USER; - goto fail; + goto fail_child; } + } - /* PR_GET_SECUREBITS is not priviliged, while + /* PR_GET_SECUREBITS is not privileged, while * PR_SET_SECUREBITS is. So to suppress * potential EPERMs we'll try not to call * PR_SET_SECUREBITS unless necessary. */ if (prctl(PR_GET_SECUREBITS) != context->secure_bits) if (prctl(PR_SET_SECUREBITS, context->secure_bits) < 0) { + err = -errno; r = EXIT_SECUREBITS; - goto fail; + goto fail_child; } if (context->capabilities) if (cap_set_proc(context->capabilities) < 0) { + err = -errno; r = EXIT_CAPABILITIES; - goto fail; + goto fail_child; } } - if (!(our_env = new0(char*, 6))) { + if (!(our_env = new0(char*, 7))) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (n_fds > 0) if (asprintf(our_env + n_env++, "LISTEN_PID=%lu", (unsigned long) getpid()) < 0 || asprintf(our_env + n_env++, "LISTEN_FDS=%u", n_fds) < 0) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (home) if (asprintf(our_env + n_env++, "HOME=%s", home) < 0) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } if (username) if (asprintf(our_env + n_env++, "LOGNAME=%s", username) < 0 || asprintf(our_env + n_env++, "USER=%s", username) < 0) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } - assert(n_env <= 6); + if (is_terminal_input(context->std_input) || + context->std_output == EXEC_OUTPUT_TTY || + context->std_error == EXEC_OUTPUT_TTY) + if (!(our_env[n_env++] = strdup(default_term_for_tty(tty_path(context))))) { + err = -ENOMEM; + r = EXIT_MEMORY; + goto fail_child; + } + + assert(n_env <= 7); if (!(final_env = strv_env_merge( - 4, + 5, environment, our_env, context->environment, + files_env, pam_env, NULL))) { + err = -ENOMEM; r = EXIT_MEMORY; - goto fail; + goto fail_child; } - execve(command->path, argv, final_env); + if (!(final_argv = replace_env_argv(argv, final_env))) { + err = -ENOMEM; + r = EXIT_MEMORY; + goto fail_child; + } + + final_env = strv_env_clean(final_env); + + execve(command->path, final_argv, final_env); + err = -errno; r = EXIT_EXEC; - fail: + fail_child: + if (r != 0) { + log_open(); + log_warning("Failed at step %s spawning %s: %s", + exit_status_to_string(r, EXIT_STATUS_SYSTEMD), + command->path, strerror(-err)); + } + strv_free(our_env); strv_free(final_env); strv_free(pam_env); + strv_free(files_env); + strv_free(final_argv); if (saved_stdin >= 0) close_nointr_nofail(saved_stdin); @@ -1277,6 +1458,8 @@ int exec_spawn(ExecCommand *command, _exit(r); } + strv_free(files_env); + /* We add the new process to the cgroup both in the child (so * that we can be sure that no user code is ever executed * outside of the cgroup) and in the parent (so that we can be @@ -1291,17 +1474,24 @@ int exec_spawn(ExecCommand *command, *ret = pid; return 0; + +fail_parent: + strv_free(files_env); + + return r; } void exec_context_init(ExecContext *c) { assert(c); - c->umask = 0002; + c->umask = 0022; c->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 0); c->cpu_sched_policy = SCHED_OTHER; c->syslog_priority = LOG_DAEMON|LOG_INFO; c->syslog_level_prefix = true; c->mount_flags = MS_SHARED; + c->kill_signal = SIGTERM; + c->send_sigkill = true; } void exec_context_done(ExecContext *c) { @@ -1312,6 +1502,9 @@ void exec_context_done(ExecContext *c) { strv_free(c->environment); c->environment = NULL; + strv_free(c->environment_files); + c->environment_files = NULL; + for (l = 0; l < ELEMENTSOF(c->rlimit); l++) { free(c->rlimit[l]); c->rlimit[l] = NULL; @@ -1359,6 +1552,9 @@ void exec_context_done(ExecContext *c) { if (c->cpuset) CPU_FREE(c->cpuset); + + free(c->utmp_id); + c->utmp_id = NULL; } void exec_command_done(ExecCommand *c) { @@ -1397,6 +1593,64 @@ void exec_command_free_array(ExecCommand **c, unsigned n) { } } +int exec_context_load_environment(const ExecContext *c, char ***l) { + char **i, **r = NULL; + + assert(c); + assert(l); + + STRV_FOREACH(i, c->environment_files) { + char *fn; + int k; + bool ignore = false; + char **p; + + fn = *i; + + if (fn[0] == '-') { + ignore = true; + fn ++; + } + + if (!path_is_absolute(fn)) { + + if (ignore) + continue; + + strv_free(r); + return -EINVAL; + } + + if ((k = load_env_file(fn, &p)) < 0) { + + if (ignore) + continue; + + strv_free(r); + return k; + } + + if (r == NULL) + r = p; + else { + char **m; + + m = strv_env_merge(2, r, p); + strv_free(r); + strv_free(p); + + if (!m) + return -ENOMEM; + + r = m; + } + } + + *l = r; + + return 0; +} + static void strv_fprintf(FILE *f, char **l) { char **g; @@ -1421,16 +1675,22 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sWorkingDirectory: %s\n" "%sRootDirectory: %s\n" "%sNonBlocking: %s\n" - "%sPrivateTmp: %s\n", + "%sPrivateTmp: %s\n" + "%sControlGroupModify: %s\n" + "%sPrivateNetwork: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", prefix, c->root_directory ? c->root_directory : "/", prefix, yes_no(c->non_blocking), - prefix, yes_no(c->private_tmp)); + prefix, yes_no(c->private_tmp), + prefix, yes_no(c->control_group_modify), + prefix, yes_no(c->private_network)); + + STRV_FOREACH(e, c->environment) + fprintf(f, "%sEnvironment: %s\n", prefix, *e); - if (c->environment) - for (e = c->environment; *e; e++) - fprintf(f, "%sEnvironment: %s\n", prefix, *e); + STRV_FOREACH(e, c->environment_files) + fprintf(f, "%sEnvironmentFile: %s\n", prefix, *e); if (c->tcpwrap_name) fprintf(f, @@ -1442,10 +1702,10 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNice: %i\n", prefix, c->nice); - if (c->oom_adjust_set) + if (c->oom_score_adjust_set) fprintf(f, - "%sOOMAdjust: %i\n", - prefix, c->oom_adjust); + "%sOOMScoreAdjust: %i\n", + prefix, c->oom_score_adjust); for (i = 0; i < RLIM_NLIMITS; i++) if (c->rlimit[i]) @@ -1488,15 +1748,23 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { if (c->tty_path) fprintf(f, - "%sTTYPath: %s\n", - prefix, c->tty_path); + "%sTTYPath: %s\n" + "%sTTYReset: %s\n" + "%sTTYVHangup: %s\n" + "%sTTYVTDisallocate: %s\n", + prefix, c->tty_path, + prefix, yes_no(c->tty_reset), + prefix, yes_no(c->tty_vhangup), + prefix, yes_no(c->tty_vt_disallocate)); if (c->std_output == EXEC_OUTPUT_SYSLOG || c->std_output == EXEC_OUTPUT_KMSG || - c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG) + c->std_output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_output == EXEC_OUTPUT_KMSG_AND_CONSOLE || + c->std_error == EXEC_OUTPUT_SYSLOG || c->std_error == EXEC_OUTPUT_KMSG || + c->std_error == EXEC_OUTPUT_SYSLOG_AND_CONSOLE || c->std_error == EXEC_OUTPUT_KMSG_AND_CONSOLE) fprintf(f, "%sSyslogFacility: %s\n" "%sSyslogLevel: %s\n", - prefix, log_facility_to_string(LOG_FAC(c->syslog_priority)), + prefix, log_facility_unshifted_to_string(c->syslog_priority >> 3), prefix, log_level_to_string(LOG_PRI(c->syslog_priority))); if (c->capabilities) { @@ -1519,15 +1787,16 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { (c->secure_bits & SECURE_NOROOT_LOCKED) ? "noroot-locked" : ""); if (c->capability_bounding_set_drop) { - fprintf(f, "%sCapabilityBoundingSetDrop:", prefix); + unsigned long l; + fprintf(f, "%sCapabilityBoundingSet:", prefix); - for (i = 0; i <= CAP_LAST_CAP; i++) - if (c->capability_bounding_set_drop & (1 << i)) { + for (l = 0; l <= cap_last_cap(); l++) + if (!(c->capability_bounding_set_drop & ((uint64_t) 1ULL << (uint64_t) l))) { char *t; - if ((t = cap_to_name(i))) { + if ((t = cap_to_name(l))) { fprintf(f, " %s", t); - free(t); + cap_free(t); } } @@ -1565,6 +1834,19 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { strv_fprintf(f, c->inaccessible_dirs); fputs("\n", f); } + + fprintf(f, + "%sKillMode: %s\n" + "%sKillSignal: SIG%s\n" + "%sSendSIGKILL: %s\n", + prefix, kill_mode_to_string(c->kill_mode), + prefix, signal_to_string(c->kill_signal), + prefix, yes_no(c->send_sigkill)); + + if (c->utmp_id) + fprintf(f, + "%sUtmpIdentifier: %s\n", + prefix, c->utmp_id); } void exec_status_start(ExecStatus *s, pid_t pid) { @@ -1575,7 +1857,7 @@ void exec_status_start(ExecStatus *s, pid_t pid) { dual_timestamp_get(&s->start_timestamp); } -void exec_status_exit(ExecStatus *s, pid_t pid, int code, int status) { +void exec_status_exit(ExecStatus *s, ExecContext *context, pid_t pid, int code, int status) { assert(s); if ((s->pid && s->pid != pid) || @@ -1587,6 +1869,13 @@ void exec_status_exit(ExecStatus *s, pid_t pid, int code, int status) { s->code = code; s->status = status; + + if (context) { + if (context->utmp_id) + utmp_put_dead_process(context->utmp_id, pid, code, status); + + exec_context_tty_reset(context); + } } void exec_status_dump(ExecStatus *s, FILE *f, const char *prefix) { @@ -1703,7 +1992,7 @@ void exec_command_append_list(ExecCommand **l, ExecCommand *e) { assert(e); if (*l) { - /* It's kinda important that we keep the order here */ + /* It's kind of important, that we keep the order here */ LIST_FIND_TAIL(ExecCommand, command, *l, end); LIST_INSERT_AFTER(ExecCommand, command, *l, end, e); } else @@ -1738,117 +2027,6 @@ int exec_command_set(ExecCommand *c, const char *path, ...) { return 0; } -const char* exit_status_to_string(ExitStatus status) { - - /* We cast to int here, so that -Wenum doesn't complain that - * EXIT_SUCCESS/EXIT_FAILURE aren't in the enum */ - - switch ((int) status) { - - case EXIT_SUCCESS: - return "SUCCESS"; - - case EXIT_FAILURE: - return "FAILURE"; - - case EXIT_INVALIDARGUMENT: - return "INVALIDARGUMENT"; - - case EXIT_NOTIMPLEMENTED: - return "NOTIMPLEMENTED"; - - case EXIT_NOPERMISSION: - return "NOPERMISSION"; - - case EXIT_NOTINSTALLED: - return "NOTINSSTALLED"; - - case EXIT_NOTCONFIGURED: - return "NOTCONFIGURED"; - - case EXIT_NOTRUNNING: - return "NOTRUNNING"; - - case EXIT_CHDIR: - return "CHDIR"; - - case EXIT_NICE: - return "NICE"; - - case EXIT_FDS: - return "FDS"; - - case EXIT_EXEC: - return "EXEC"; - - case EXIT_MEMORY: - return "MEMORY"; - - case EXIT_LIMITS: - return "LIMITS"; - - case EXIT_OOM_ADJUST: - return "OOM_ADJUST"; - - case EXIT_SIGNAL_MASK: - return "SIGNAL_MASK"; - - case EXIT_STDIN: - return "STDIN"; - - case EXIT_STDOUT: - return "STDOUT"; - - case EXIT_CHROOT: - return "CHROOT"; - - case EXIT_IOPRIO: - return "IOPRIO"; - - case EXIT_TIMERSLACK: - return "TIMERSLACK"; - - case EXIT_SECUREBITS: - return "SECUREBITS"; - - case EXIT_SETSCHEDULER: - return "SETSCHEDULER"; - - case EXIT_CPUAFFINITY: - return "CPUAFFINITY"; - - case EXIT_GROUP: - return "GROUP"; - - case EXIT_USER: - return "USER"; - - case EXIT_CAPABILITIES: - return "CAPABILITIES"; - - case EXIT_CGROUP: - return "CGROUP"; - - case EXIT_SETSID: - return "SETSID"; - - case EXIT_CONFIRM: - return "CONFIRM"; - - case EXIT_STDERR: - return "STDERR"; - - case EXIT_TCPWRAP: - return "TCPWRAP"; - - case EXIT_PAM: - return "PAM"; - - default: - return NULL; - } -} - static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_NULL] = "null", [EXEC_INPUT_TTY] = "tty", @@ -1857,15 +2035,33 @@ static const char* const exec_input_table[_EXEC_INPUT_MAX] = { [EXEC_INPUT_SOCKET] = "socket" }; +DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); + static const char* const exec_output_table[_EXEC_OUTPUT_MAX] = { [EXEC_OUTPUT_INHERIT] = "inherit", [EXEC_OUTPUT_NULL] = "null", [EXEC_OUTPUT_TTY] = "tty", [EXEC_OUTPUT_SYSLOG] = "syslog", + [EXEC_OUTPUT_SYSLOG_AND_CONSOLE] = "syslog+console", [EXEC_OUTPUT_KMSG] = "kmsg", + [EXEC_OUTPUT_KMSG_AND_CONSOLE] = "kmsg+console", [EXEC_OUTPUT_SOCKET] = "socket" }; DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput); -DEFINE_STRING_TABLE_LOOKUP(exec_input, ExecInput); +static const char* const kill_mode_table[_KILL_MODE_MAX] = { + [KILL_CONTROL_GROUP] = "control-group", + [KILL_PROCESS] = "process", + [KILL_NONE] = "none" +}; + +DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode); + +static const char* const kill_who_table[_KILL_WHO_MAX] = { + [KILL_MAIN] = "main", + [KILL_CONTROL] = "control", + [KILL_ALL] = "all" +}; + +DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);