#include <sys/mount.h>
#include <linux/fs.h>
#include <linux/oom.h>
+#include <sys/poll.h>
+#include <linux/seccomp-bpf.h>
#ifdef HAVE_PAM
#include <security/pam_appl.h>
#include "utmp-wtmp.h"
#include "def.h"
#include "loopback-setup.h"
+#include "path-util.h"
+#include "syscall-list.h"
+
+#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
/* This assumes there is a 'tty' group */
#define TTY_MODE 0620
return r;
}
-static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, int nfd) {
+static int connect_logger_as(const ExecContext *context, ExecOutput output, const char *ident, const char *unit_id, int nfd) {
int fd, r;
union sockaddr_union sa;
}
dprintf(fd,
+ "%s\n"
"%s\n"
"%i\n"
"%i\n"
"%i\n"
"%i\n",
context->syslog_identifier ? context->syslog_identifier : ident,
+ unit_id,
context->syslog_priority,
!!context->syslog_level_prefix,
output == EXEC_OUTPUT_SYSLOG || output == EXEC_OUTPUT_SYSLOG_AND_CONSOLE,
tty_path(context),
i == EXEC_INPUT_TTY_FAIL,
i == EXEC_INPUT_TTY_FORCE,
- false)) < 0)
+ false,
+ (usec_t) -1)) < 0)
return fd;
if (fd != STDIN_FILENO) {
}
}
-static int setup_output(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
+static int setup_output(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
ExecOutput o;
ExecInput i;
case EXEC_OUTPUT_KMSG_AND_CONSOLE:
case EXEC_OUTPUT_JOURNAL:
case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
- return connect_logger_as(context, o, ident, STDOUT_FILENO);
+ return connect_logger_as(context, o, ident, unit_id, STDOUT_FILENO);
case EXEC_OUTPUT_SOCKET:
assert(socket_fd >= 0);
}
}
-static int setup_error(const ExecContext *context, int socket_fd, const char *ident, bool apply_tty_stdin) {
+static int setup_error(const ExecContext *context, int socket_fd, const char *ident, const char *unit_id, bool apply_tty_stdin) {
ExecOutput o, e;
ExecInput i;
case EXEC_OUTPUT_KMSG_AND_CONSOLE:
case EXEC_OUTPUT_JOURNAL:
case EXEC_OUTPUT_JOURNAL_AND_CONSOLE:
- return connect_logger_as(context, e, ident, STDERR_FILENO);
+ return connect_logger_as(context, e, ident, unit_id, STDERR_FILENO);
case EXEC_OUTPUT_SOCKET:
assert(socket_fd >= 0);
return 0;
}
-static int setup_confirm_stdio(const ExecContext *context,
- int *_saved_stdin,
+static int setup_confirm_stdio(int *_saved_stdin,
int *_saved_stdout) {
int fd = -1, saved_stdin, saved_stdout = -1, r;
- assert(context);
assert(_saved_stdin);
assert(_saved_stdout);
- /* This returns positive EXIT_xxx return values instead of
- * negative errno style values! */
-
- if ((saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3)) < 0)
- return EXIT_STDIN;
+ saved_stdin = fcntl(STDIN_FILENO, F_DUPFD, 3);
+ if (saved_stdin < 0)
+ return -errno;
- if ((saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3)) < 0) {
- r = EXIT_STDOUT;
+ saved_stdout = fcntl(STDOUT_FILENO, F_DUPFD, 3);
+ if (saved_stdout < 0) {
+ r = errno;
goto fail;
}
- if ((fd = acquire_terminal(
- tty_path(context),
- context->std_input == EXEC_INPUT_TTY_FAIL,
- context->std_input == EXEC_INPUT_TTY_FORCE,
- false)) < 0) {
- r = EXIT_STDIN;
+ fd = acquire_terminal(
+ "/dev/console",
+ false,
+ false,
+ false,
+ DEFAULT_CONFIRM_USEC);
+ if (fd < 0) {
+ r = fd;
goto fail;
}
- if (chown_terminal(fd, getuid()) < 0) {
- r = EXIT_STDIN;
+ r = chown_terminal(fd, getuid());
+ if (r < 0)
goto fail;
- }
if (dup2(fd, STDIN_FILENO) < 0) {
- r = EXIT_STDIN;
+ r = -errno;
goto fail;
}
if (dup2(fd, STDOUT_FILENO) < 0) {
- r = EXIT_STDOUT;
+ r = -errno;
goto fail;
}
return r;
}
-static int restore_confirm_stdio(const ExecContext *context,
- int *saved_stdin,
- int *saved_stdout,
- bool *keep_stdin,
- bool *keep_stdout) {
+static int write_confirm_message(const char *format, ...) {
+ int fd;
+ va_list ap;
- assert(context);
- assert(saved_stdin);
- assert(*saved_stdin >= 0);
- assert(saved_stdout);
- assert(*saved_stdout >= 0);
+ assert(format);
- /* This returns positive EXIT_xxx return values instead of
- * negative errno style values! */
+ fd = open_terminal("/dev/console", O_WRONLY|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ return fd;
- if (is_terminal_input(context->std_input)) {
+ va_start(ap, format);
+ vdprintf(fd, format, ap);
+ va_end(ap);
- /* The service wants terminal input. */
+ close_nointr_nofail(fd);
- *keep_stdin = true;
- *keep_stdout =
- context->std_output == EXEC_OUTPUT_INHERIT ||
- context->std_output == EXEC_OUTPUT_TTY;
+ return 0;
+}
- } else {
- /* If the service doesn't want a controlling terminal,
- * then we need to get rid entirely of what we have
- * already. */
+static int restore_confirm_stdio(int *saved_stdin,
+ int *saved_stdout) {
- if (release_terminal() < 0)
- return EXIT_STDIN;
+ int r = 0;
+ assert(saved_stdin);
+ assert(saved_stdout);
+
+ release_terminal();
+
+ if (*saved_stdin >= 0)
if (dup2(*saved_stdin, STDIN_FILENO) < 0)
- return EXIT_STDIN;
+ r = -errno;
+ if (*saved_stdout >= 0)
if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
- return EXIT_STDOUT;
+ r = -errno;
- *keep_stdout = *keep_stdin = false;
- }
+ if (*saved_stdin >= 0)
+ close_nointr_nofail(*saved_stdin);
- return 0;
+ if (*saved_stdout >= 0)
+ close_nointr_nofail(*saved_stdout);
+
+ return r;
+}
+
+static int ask_for_confirmation(char *response, char **argv) {
+ int saved_stdout = -1, saved_stdin = -1, r;
+ char *line;
+
+ r = setup_confirm_stdio(&saved_stdin, &saved_stdout);
+ if (r < 0)
+ return r;
+
+ line = exec_command_line(argv);
+ if (!line)
+ return -ENOMEM;
+
+ r = ask(response, "yns", "Execute %s? [Yes, No, Skip] ", line);
+ free(line);
+
+ restore_confirm_stdio(&saved_stdin, &saved_stdout);
+
+ return r;
}
static int enforce_groups(const ExecContext *context, const char *username, gid_t gid) {
static int setup_pam(
const char *name,
const char *user,
+ uid_t uid,
const char *tty,
char ***pam_env,
int fds[], unsigned n_fds) {
open here that have been opened by PAM. */
close_many(fds, n_fds);
- /* Wait until our parent died. This will most likely
- * not work since the kernel does not allow
- * unprivileged parents kill their privileged children
- * this way. We rely on the control groups kill logic
+ /* Drop privileges - we don't need any to pam_close_session
+ * and this will make PR_SET_PDEATHSIG work in most cases.
+ * If this fails, ignore the error - but expect sd-pam threads
+ * to fail to exit normally */
+ if (setresuid(uid, uid, uid) < 0)
+ log_error("Error: Failed to setresuid() in sd-pam: %s", strerror(-r));
+
+ /* Wait until our parent died. This will only work if
+ * the above setresuid() succeeds, otherwise the kernel
+ * will not allow unprivileged parents kill their privileged
+ * children this way. We rely on the control groups kill logic
* to do the rest for us. */
if (prctl(PR_SET_PDEATHSIG, SIGTERM) < 0)
goto child_finish;
}
#endif
-static int do_capability_bounding_set_drop(uint64_t drop) {
- unsigned long i;
- cap_t old_cap = NULL, new_cap = NULL;
- cap_flag_value_t fv;
- int r;
-
- /* If we are run as PID 1 we will lack CAP_SETPCAP by default
- * in the effective set (yes, the kernel drops that when
- * executing init!), so get it back temporarily so that we can
- * call PR_CAPBSET_DROP. */
-
- old_cap = cap_get_proc();
- if (!old_cap)
- return -errno;
-
- if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (fv != CAP_SET) {
- static const cap_value_t v = CAP_SETPCAP;
-
- new_cap = cap_dup(old_cap);
- if (!new_cap) {
- r = -errno;
- goto finish;
- }
-
- if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (cap_set_proc(new_cap) < 0) {
- r = -errno;
- goto finish;
- }
- }
-
- for (i = 0; i <= cap_last_cap(); i++)
- if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
- if (prctl(PR_CAPBSET_DROP, i) < 0) {
- r = -errno;
- goto finish;
- }
- }
-
- r = 0;
-
-finish:
- if (new_cap)
- cap_free(new_cap);
-
- if (old_cap) {
- cap_set_proc(old_cap);
- cap_free(old_cap);
- }
-
- return r;
-}
-
static void rename_process_from_path(const char *path) {
char process_name[11];
const char *p;
/* This resulting string must fit in 10 chars (i.e. the length
* of "/sbin/init") to look pretty in /bin/ps */
- p = file_name_from_path(path);
+ p = path_get_file_name(path);
if (isempty(p)) {
rename_process("(...)");
return;
rename_process(process_name);
}
+static int apply_seccomp(uint32_t *syscall_filter) {
+ static const struct sock_filter header[] = {
+ VALIDATE_ARCHITECTURE,
+ EXAMINE_SYSCALL
+ };
+ static const struct sock_filter footer[] = {
+ _KILL_PROCESS
+ };
+
+ int i;
+ unsigned n;
+ struct sock_filter *f;
+ struct sock_fprog prog;
+
+ assert(syscall_filter);
+
+ /* First: count the syscalls to check for */
+ for (i = 0, n = 0; i < syscall_max(); i++)
+ if (syscall_filter[i >> 4] & (1 << (i & 31)))
+ n++;
+
+ /* Second: build the filter program from a header the syscall
+ * matches and the footer */
+ f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
+ memcpy(f, header, sizeof(header));
+
+ for (i = 0, n = 0; i < syscall_max(); i++)
+ if (syscall_filter[i >> 4] & (1 << (i & 31))) {
+ struct sock_filter item[] = {
+ BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, i, 0, 1),
+ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
+ };
+
+ assert_cc(ELEMENTSOF(item) == 2);
+
+ f[ELEMENTSOF(header) + 2*n] = item[0];
+ f[ELEMENTSOF(header) + 2*n+1] = item[1];
+
+ n++;
+ }
+
+ memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
+
+ /* Third: install the filter */
+ zero(prog);
+ prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
+ prog.filter = f;
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
+ return -errno;
+
+ return 0;
+}
+
int exec_spawn(ExecCommand *command,
char **argv,
const ExecContext *context,
CGroupBonding *cgroup_bondings,
CGroupAttribute *cgroup_attributes,
const char *cgroup_suffix,
+ const char *unit_id,
+ int idle_pipe[2],
pid_t *ret) {
pid_t pid;
if (!argv)
argv = command->argv;
- if (!(line = exec_command_line(argv))) {
+ line = exec_command_line(argv);
+ if (!line) {
r = -ENOMEM;
goto fail_parent;
}
gid_t gid = (gid_t) -1;
char **our_env = NULL, **pam_env = NULL, **final_env = NULL, **final_argv = NULL;
unsigned n_env = 0;
- int saved_stdout = -1, saved_stdin = -1;
- bool keep_stdout = false, keep_stdin = false, set_access = false;
+ bool set_access = false;
/* child */
goto fail_child;
}
+ if (idle_pipe) {
+ if (idle_pipe[1] >= 0)
+ close_nointr_nofail(idle_pipe[1]);
+ if (idle_pipe[0] >= 0) {
+ fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT_USEC);
+ close_nointr_nofail(idle_pipe[0]);
+ }
+ }
+
/* Close sockets very early to make sure we don't
* block init reexecution because it cannot bind its
* sockets */
exec_context_tty_reset(context);
- /* We skip the confirmation step if we shall not apply the TTY */
- if (confirm_spawn &&
- (!is_terminal_input(context->std_input) || apply_tty_stdin)) {
+ if (confirm_spawn) {
char response;
- /* Set up terminal for the question */
- if ((r = setup_confirm_stdio(context,
- &saved_stdin, &saved_stdout))) {
- err = -errno;
- goto fail_child;
- }
-
- /* Now ask the question. */
- if (!(line = exec_command_line(argv))) {
- err = -ENOMEM;
- r = EXIT_MEMORY;
- goto fail_child;
- }
-
- r = ask(&response, "yns", "Execute %s? [Yes, No, Skip] ", line);
- free(line);
-
- if (r < 0 || response == 'n') {
+ err = ask_for_confirmation(&response, argv);
+ if (err == -ETIMEDOUT)
+ write_confirm_message("Confirmation question timed out, assuming positive response.\n");
+ else if (err < 0)
+ write_confirm_message("Couldn't ask confirmation question, assuming positive response: %s\n", strerror(-err));
+ else if (response == 's') {
+ write_confirm_message("Skipping execution.\n");
err = -ECANCELED;
r = EXIT_CONFIRM;
goto fail_child;
- } else if (response == 's') {
+ } else if (response == 'n') {
+ write_confirm_message("Failing execution.\n");
err = r = 0;
goto fail_child;
}
-
- /* Release terminal for the question */
- if ((r = restore_confirm_stdio(context,
- &saved_stdin, &saved_stdout,
- &keep_stdin, &keep_stdout))) {
- err = -errno;
- goto fail_child;
- }
}
/* If a socket is connected to STDIN/STDOUT/STDERR, we
if (socket_fd >= 0)
fd_nonblock(socket_fd, false);
- if (!keep_stdin) {
- err = setup_input(context, socket_fd, apply_tty_stdin);
- if (err < 0) {
- r = EXIT_STDIN;
- goto fail_child;
- }
+ err = setup_input(context, socket_fd, apply_tty_stdin);
+ if (err < 0) {
+ r = EXIT_STDIN;
+ goto fail_child;
}
- if (!keep_stdout) {
- err = setup_output(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
- if (err < 0) {
- r = EXIT_STDOUT;
- goto fail_child;
- }
+ err = setup_output(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
+ if (err < 0) {
+ r = EXIT_STDOUT;
+ goto fail_child;
}
- err = setup_error(context, socket_fd, file_name_from_path(command->path), apply_tty_stdin);
+ err = setup_error(context, socket_fd, path_get_file_name(command->path), unit_id, apply_tty_stdin);
if (err < 0) {
r = EXIT_STDERR;
goto fail_child;
char_array_0(t);
if (write_one_line_file("/proc/self/oom_score_adj", t) < 0) {
- /* Compatibility with Linux <= 2.6.35 */
-
- int adj;
-
- adj = (context->oom_score_adjust * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX;
- adj = CLAMP(adj, OOM_DISABLE, OOM_ADJUST_MAX);
-
- snprintf(t, sizeof(t), "%i", adj);
- char_array_0(t);
-
- if (write_one_line_file("/proc/self/oom_adj", t) < 0
- && errno != EACCES) {
- err = -errno;
- r = EXIT_OOM_ADJUST;
- goto fail_child;
- }
+ err = -errno;
+ r = EXIT_OOM_ADJUST;
+ goto fail_child;
}
}
goto fail_child;
}
- if (context->timer_slack_nsec_set)
+ if (context->timer_slack_nsec != (nsec_t) -1)
if (prctl(PR_SET_TIMERSLACK, context->timer_slack_nsec) < 0) {
err = -errno;
r = EXIT_TIMERSLACK;
if (context->user) {
username = context->user;
- err = get_user_creds(&username, &uid, &gid, &home);
+ err = get_user_creds(&username, &uid, &gid, &home, NULL);
if (err < 0) {
r = EXIT_USER;
goto fail_child;
umask(context->umask);
#ifdef HAVE_PAM
- if (context->pam_name && username) {
- err = setup_pam(context->pam_name, username, context->tty_path, &pam_env, fds, n_fds);
+ if (apply_permissions && context->pam_name && username) {
+ err = setup_pam(context->pam_name, username, uid, context->tty_path, &pam_env, fds, n_fds);
if (err < 0) {
r = EXIT_PAM;
goto fail_child;
if (strv_length(context->read_write_dirs) > 0 ||
strv_length(context->read_only_dirs) > 0 ||
strv_length(context->inaccessible_dirs) > 0 ||
- context->mount_flags != MS_SHARED ||
+ context->mount_flags != 0 ||
context->private_tmp) {
err = setup_namespace(context->read_write_dirs,
context->read_only_dirs,
}
if (context->capability_bounding_set_drop) {
- err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
+ err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
if (err < 0) {
r = EXIT_CAPABILITIES;
goto fail_child;
r = EXIT_CAPABILITIES;
goto fail_child;
}
+
+ if (context->no_new_privileges)
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
+ err = -errno;
+ r = EXIT_NO_NEW_PRIVILEGES;
+ goto fail_child;
+ }
+
+ if (context->syscall_filter) {
+ err = apply_seccomp(context->syscall_filter);
+ if (err < 0) {
+ r = EXIT_SECCOMP;
+ goto fail_child;
+ }
+ }
}
if (!(our_env = new0(char*, 7))) {
strv_free(files_env);
strv_free(final_argv);
- if (saved_stdin >= 0)
- close_nointr_nofail(saved_stdin);
-
- if (saved_stdout >= 0)
- close_nointr_nofail(saved_stdout);
-
_exit(r);
}
c->cpu_sched_policy = SCHED_OTHER;
c->syslog_priority = LOG_DAEMON|LOG_INFO;
c->syslog_level_prefix = true;
- c->mount_flags = MS_SHARED;
- c->kill_signal = SIGTERM;
- c->send_sigkill = true;
c->control_group_persistent = -1;
c->ignore_sigpipe = true;
+ c->timer_slack_nsec = (nsec_t) -1;
}
void exec_context_done(ExecContext *c) {
free(c->utmp_id);
c->utmp_id = NULL;
+
+ free(c->syscall_filter);
+ c->syscall_filter = NULL;
}
void exec_command_done(ExecCommand *c) {
"%sPrivateTmp: %s\n"
"%sControlGroupModify: %s\n"
"%sControlGroupPersistent: %s\n"
- "%sPrivateNetwork: %s\n",
+ "%sPrivateNetwork: %s\n"
+ "%sIgnoreSIGPIPE: %s\n",
prefix, c->umask,
prefix, c->working_directory ? c->working_directory : "/",
prefix, c->root_directory ? c->root_directory : "/",
prefix, yes_no(c->private_tmp),
prefix, yes_no(c->control_group_modify),
prefix, yes_no(c->control_group_persistent),
- prefix, yes_no(c->private_network));
+ prefix, yes_no(c->private_network),
+ prefix, yes_no(c->ignore_sigpipe));
STRV_FOREACH(e, c->environment)
fprintf(f, "%sEnvironment: %s\n", prefix, *e);
fputs("\n", f);
}
- if (c->timer_slack_nsec_set)
- fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, c->timer_slack_nsec);
+ if (c->timer_slack_nsec != (nsec_t) -1)
+ fprintf(f, "%sTimerSlackNSec: %lu\n", prefix, (unsigned long)c->timer_slack_nsec);
fprintf(f,
"%sStandardInput: %s\n"
fputs("\n", f);
}
- fprintf(f,
- "%sKillMode: %s\n"
- "%sKillSignal: SIG%s\n"
- "%sSendSIGKILL: %s\n"
- "%sIgnoreSIGPIPE: %s\n",
- prefix, kill_mode_to_string(c->kill_mode),
- prefix, signal_to_string(c->kill_signal),
- prefix, yes_no(c->send_sigkill),
- prefix, yes_no(c->ignore_sigpipe));
-
if (c->utmp_id)
fprintf(f,
"%sUtmpIdentifier: %s\n",
};
DEFINE_STRING_TABLE_LOOKUP(exec_output, ExecOutput);
-
-static const char* const kill_mode_table[_KILL_MODE_MAX] = {
- [KILL_CONTROL_GROUP] = "control-group",
- [KILL_PROCESS] = "process",
- [KILL_NONE] = "none"
-};
-
-DEFINE_STRING_TABLE_LOOKUP(kill_mode, KillMode);
-
-static const char* const kill_who_table[_KILL_WHO_MAX] = {
- [KILL_MAIN] = "main",
- [KILL_CONTROL] = "control",
- [KILL_ALL] = "all"
-};
-
-DEFINE_STRING_TABLE_LOOKUP(kill_who, KillWho);