#include <linux/fs.h>
#include <linux/oom.h>
#include <sys/poll.h>
-#include <linux/seccomp-bpf.h>
#include <glob.h>
+#include <sys/personality.h>
#include <libgen.h>
#undef basename
#include <selinux/selinux.h>
#endif
+#ifdef HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
+#ifdef HAVE_APPARMOR
+#include <sys/apparmor.h>
+#endif
+
#include "execute.h"
#include "strv.h"
#include "macro.h"
#include "utmp-wtmp.h"
#include "def.h"
#include "path-util.h"
-#include "syscall-list.h"
#include "env-util.h"
#include "fileio.h"
#include "unit.h"
#include "async.h"
#include "selinux-util.h"
+#include "errno-list.h"
+#include "af-list.h"
+#include "mkdir.h"
+#include "apparmor-util.h"
+
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
#define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
#define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
if ((nfd = fcntl(fds[i], F_DUPFD, i+3)) < 0)
return -errno;
- close_nointr_nofail(fds[i]);
+ safe_close(fds[i]);
fds[i] = nfd;
/* Hmm, the fd we wanted isn't free? Then
if (fd != nfd) {
r = dup2(fd, nfd) < 0 ? -errno : nfd;
- close_nointr_nofail(fd);
+ safe_close(fd);
} else
r = nfd;
r = connect(fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(sa.un.sun_path));
if (r < 0) {
- close_nointr_nofail(fd);
+ safe_close(fd);
return -errno;
}
if (shutdown(fd, SHUT_RD) < 0) {
- close_nointr_nofail(fd);
+ safe_close(fd);
return -errno;
}
if (fd != nfd) {
r = dup2(fd, nfd) < 0 ? -errno : nfd;
- close_nointr_nofail(fd);
+ safe_close(fd);
} else
r = nfd;
if (fd != nfd) {
r = dup2(fd, nfd) < 0 ? -errno : nfd;
- close_nointr_nofail(fd);
+ safe_close(fd);
} else
r = nfd;
if (fd != STDIN_FILENO) {
r = dup2(fd, STDIN_FILENO) < 0 ? -errno : STDIN_FILENO;
- close_nointr_nofail(fd);
+ safe_close(fd);
} else
r = STDIN_FILENO;
}
if (fd >= 2)
- close_nointr_nofail(fd);
+ safe_close(fd);
*_saved_stdin = saved_stdin;
*_saved_stdout = saved_stdout;
return 0;
fail:
- if (saved_stdout >= 0)
- close_nointr_nofail(saved_stdout);
-
- if (saved_stdin >= 0)
- close_nointr_nofail(saved_stdin);
-
- if (fd >= 0)
- close_nointr_nofail(fd);
+ safe_close(saved_stdout);
+ safe_close(saved_stdin);
+ safe_close(fd);
return r;
}
_printf_(1, 2) static int write_confirm_message(const char *format, ...) {
- int fd;
+ _cleanup_close_ int fd = -1;
va_list ap;
assert(format);
vdprintf(fd, format, ap);
va_end(ap);
- close_nointr_nofail(fd);
-
return 0;
}
if (dup2(*saved_stdout, STDOUT_FILENO) < 0)
r = -errno;
- if (*saved_stdin >= 0)
- close_nointr_nofail(*saved_stdin);
-
- if (*saved_stdout >= 0)
- close_nointr_nofail(*saved_stdout);
+ safe_close(*saved_stdin);
+ safe_close(*saved_stdout);
return r;
}
rename_process(process_name);
}
-static int apply_seccomp(uint32_t *syscall_filter) {
- static const struct sock_filter header[] = {
- VALIDATE_ARCHITECTURE,
- EXAMINE_SYSCALL
- };
- static const struct sock_filter footer[] = {
- _KILL_PROCESS
- };
+#ifdef HAVE_SECCOMP
- int i;
- unsigned n;
- struct sock_filter *f;
- struct sock_fprog prog = {};
+static int apply_seccomp(ExecContext *c) {
+ uint32_t negative_action, action;
+ scmp_filter_ctx *seccomp;
+ Iterator i;
+ void *id;
+ int r;
- assert(syscall_filter);
+ assert(c);
+
+ negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
- /* First: count the syscalls to check for */
- for (i = 0, n = 0; i < syscall_max(); i++)
- if (syscall_filter[i >> 4] & (1 << (i & 31)))
- n++;
+ seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
- /* Second: build the filter program from a header the syscall
- * matches and the footer */
- f = alloca(sizeof(struct sock_filter) * (ELEMENTSOF(header) + 2*n + ELEMENTSOF(footer)));
- memcpy(f, header, sizeof(header));
+ if (c->syscall_archs) {
- for (i = 0, n = 0; i < syscall_max(); i++)
- if (syscall_filter[i >> 4] & (1 << (i & 31))) {
- struct sock_filter item[] = {
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, INDEX_TO_SYSCALL(i), 0, 1),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
- };
+ SET_FOREACH(id, c->syscall_archs, i) {
+ r = seccomp_arch_add(seccomp, PTR_TO_UINT32(id) - 1);
+ if (r == -EEXIST)
+ continue;
+ if (r < 0)
+ goto finish;
+ }
+
+ } else {
+ r = seccomp_add_secondary_archs(seccomp);
+ if (r < 0)
+ goto finish;
+ }
- assert_cc(ELEMENTSOF(item) == 2);
+ action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
+ SET_FOREACH(id, c->syscall_filter, i) {
+ r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
+ if (r < 0)
+ goto finish;
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
- f[ELEMENTSOF(header) + 2*n] = item[0];
- f[ELEMENTSOF(header) + 2*n+1] = item[1];
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
+}
+
+static int apply_address_families(ExecContext *c) {
+ scmp_filter_ctx *seccomp;
+ Iterator i;
+ int r;
+
+ assert(c);
+
+ seccomp = seccomp_init(SCMP_ACT_ALLOW);
+ if (!seccomp)
+ return -ENOMEM;
- n++;
+ r = seccomp_add_secondary_archs(seccomp);
+ if (r < 0)
+ goto finish;
+
+ if (c->address_families_whitelist) {
+ int af, first = 0, last = 0;
+ void *afp;
+
+ /* If this is a whitelist, we first block the address
+ * families that are out of range and then everything
+ * that is not in the set. First, we find the lowest
+ * and highest address family in the set. */
+
+ SET_FOREACH(afp, c->address_families, i) {
+ af = PTR_TO_INT(afp);
+
+ if (af <= 0 || af >= af_max())
+ continue;
+
+ if (first == 0 || af < first)
+ first = af;
+
+ if (last == 0 || af > last)
+ last = af;
}
- memcpy(f + (ELEMENTSOF(header) + 2*n), footer, sizeof(footer));
+ assert((first == 0) == (last == 0));
- /* Third: install the filter */
- prog.len = ELEMENTSOF(header) + ELEMENTSOF(footer) + 2*n;
- prog.filter = f;
- if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0)
- return -errno;
+ if (first == 0) {
- return 0;
+ /* No entries in the valid range, block everything */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+ SCMP_SYS(socket),
+ 0);
+ if (r < 0)
+ goto finish;
+
+ } else {
+
+ /* Block everything below the first entry */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_LT, first));
+ if (r < 0)
+ goto finish;
+
+ /* Block everything above the last entry */
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_GT, last));
+ if (r < 0)
+ goto finish;
+
+ /* Block everything between the first and last
+ * entry */
+ for (af = 1; af < af_max(); af++) {
+
+ if (set_contains(c->address_families, INT_TO_PTR(af)))
+ continue;
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, af));
+ if (r < 0)
+ goto finish;
+ }
+ }
+
+ } else {
+ void *af;
+
+ /* If this is a blacklist, then generate one rule for
+ * each address family that are then combined in OR
+ * checks. */
+
+ SET_FOREACH(af, c->address_families, i) {
+
+ r = seccomp_rule_add(
+ seccomp,
+ SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+ SCMP_SYS(socket),
+ 1,
+ SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+ if (r < 0)
+ goto finish;
+ }
+ }
+
+ r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+ if (r < 0)
+ goto finish;
+
+ r = seccomp_load(seccomp);
+
+finish:
+ seccomp_release(seccomp);
+ return r;
}
+#endif
+
static void do_idle_pipe_dance(int idle_pipe[4]) {
assert(idle_pipe);
- if (idle_pipe[1] >= 0)
- close_nointr_nofail(idle_pipe[1]);
- if (idle_pipe[2] >= 0)
- close_nointr_nofail(idle_pipe[2]);
+
+ safe_close(idle_pipe[1]);
+ safe_close(idle_pipe[2]);
if (idle_pipe[0] >= 0) {
int r;
fd_wait_for_event(idle_pipe[0], POLLHUP, IDLE_TIMEOUT2_USEC);
}
- close_nointr_nofail(idle_pipe[0]);
+ safe_close(idle_pipe[0]);
}
- if (idle_pipe[3] >= 0)
- close_nointr_nofail(idle_pipe[3]);
+ safe_close(idle_pipe[3]);
}
static int build_environment(
bool confirm_spawn,
CGroupControllerMask cgroup_supported,
const char *cgroup_path,
+ const char *runtime_prefix,
const char *unit_id,
usec_t watchdog_usec,
int idle_pipe[4],
goto fail_child;
}
+ if (context->personality != 0xffffffffUL)
+ if (personality(context->personality) < 0) {
+ err = -errno;
+ r = EXIT_PERSONALITY;
+ goto fail_child;
+ }
+
if (context->utmp_id)
utmp_put_init_process(context->utmp_id, getpid(), getsid(0), context->tty_path);
}
#endif
+ if (!strv_isempty(context->runtime_directory) && runtime_prefix) {
+ char **rt;
+
+ STRV_FOREACH(rt, context->runtime_directory) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(runtime_prefix, "/", *rt, NULL);
+ if (!p) {
+ r = EXIT_RUNTIME_DIRECTORY;
+ err = -ENOMEM;
+ goto fail_child;
+ }
+
+ err = mkdir_safe(p, context->runtime_directory_mode, uid, gid);
+ if (err < 0) {
+ r = EXIT_RUNTIME_DIRECTORY;
+ goto fail_child;
+ }
+ }
+ }
+
if (apply_permissions) {
err = enforce_groups(context, username, gid);
if (err < 0) {
if (apply_permissions) {
- for (i = 0; i < RLIMIT_NLIMITS; i++) {
+ for (i = 0; i < _RLIMIT_MAX; i++) {
if (!context->rlimit[i])
continue;
goto fail_child;
}
- if (context->syscall_filter) {
- err = apply_seccomp(context->syscall_filter);
+#ifdef HAVE_SECCOMP
+ if (context->address_families_whitelist ||
+ !set_isempty(context->address_families)) {
+ err = apply_address_families(context);
+ if (err < 0) {
+ r = EXIT_ADDRESS_FAMILIES;
+ goto fail_child;
+ }
+ }
+
+ if (context->syscall_whitelist ||
+ !set_isempty(context->syscall_filter) ||
+ !set_isempty(context->syscall_archs)) {
+ err = apply_seccomp(context);
if (err < 0) {
r = EXIT_SECCOMP;
goto fail_child;
}
}
+#endif
+
#ifdef HAVE_SELINUX
if (context->selinux_context && use_selinux()) {
- bool ignore;
- char* c;
-
- c = context->selinux_context;
- if (c[0] == '-') {
- c++;
- ignore = true;
- } else
- ignore = false;
-
- err = setexeccon(c);
- if (err < 0 && !ignore) {
+ err = setexeccon(context->selinux_context);
+ if (err < 0 && !context->selinux_context_ignore) {
r = EXIT_SELINUX_CONTEXT;
goto fail_child;
}
}
#endif
+
+#ifdef HAVE_APPARMOR
+ if (context->apparmor_profile && use_apparmor()) {
+ err = aa_change_onexec(context->apparmor_profile);
+ if (err < 0 && !context->apparmor_profile_ignore) {
+ r = EXIT_APPARMOR_PROFILE;
+ goto fail_child;
+ }
+ }
+#endif
}
err = build_environment(context, n_fds, watchdog_usec, home, username, shell, &our_env);
c->syslog_level_prefix = true;
c->ignore_sigpipe = true;
c->timer_slack_nsec = (nsec_t) -1;
+ c->personality = 0xffffffffUL;
+ c->runtime_directory_mode = 0755;
}
void exec_context_done(ExecContext *c) {
free(c->selinux_context);
c->selinux_context = NULL;
- free(c->syscall_filter);
+ free(c->apparmor_profile);
+ c->apparmor_profile = NULL;
+
+ set_free(c->syscall_filter);
c->syscall_filter = NULL;
+
+ set_free(c->syscall_archs);
+ c->syscall_archs = NULL;
+
+ set_free(c->address_families);
+ c->address_families = NULL;
+
+ strv_free(c->runtime_directory);
+ c->runtime_directory = NULL;
+}
+
+int exec_context_destroy_runtime_directory(ExecContext *c, const char *runtime_prefix) {
+ char **i;
+
+ assert(c);
+
+ if (!runtime_prefix)
+ return 0;
+
+ STRV_FOREACH(i, c->runtime_directory) {
+ _cleanup_free_ char *p;
+
+ p = strjoin(runtime_prefix, "/", *i, NULL);
+ if (!p)
+ return -ENOMEM;
+
+ /* We execute this synchronously, since we need to be
+ * sure this is gone when we start the service
+ * next. */
+ rm_rf_dangerous(p, false, true, false);
+ }
+
+ return 0;
}
void exec_command_done(ExecCommand *c) {
fprintf(f, "%s%s: %llu\n", prefix, rlimit_to_string(i), (unsigned long long) c->rlimit[i]->rlim_max);
if (c->ioprio_set) {
- char *class_str;
- int r;
+ _cleanup_free_ char *class_str = NULL;
- r = ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
- if (r < 0)
- class_str = NULL;
+ ioprio_class_to_string_alloc(IOPRIO_PRIO_CLASS(c->ioprio), &class_str);
fprintf(f,
"%sIOSchedulingClass: %s\n"
"%sIOPriority: %i\n",
prefix, strna(class_str),
prefix, (int) IOPRIO_PRIO_DATA(c->ioprio));
- free(class_str);
}
if (c->cpu_sched_set) {
- char *policy_str;
- int r;
+ _cleanup_free_ char *policy_str = NULL;
- r = sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
- if (r < 0)
- policy_str = NULL;
+ sched_policy_to_string_alloc(c->cpu_sched_policy, &policy_str);
fprintf(f,
"%sCPUSchedulingPolicy: %s\n"
"%sCPUSchedulingPriority: %i\n"
prefix, strna(policy_str),
prefix, c->cpu_sched_priority,
prefix, yes_no(c->cpu_sched_reset_on_fork));
- free(policy_str);
}
if (c->cpuset) {
if (c->selinux_context)
fprintf(f,
- "%sSELinuxContext: %s\n",
- prefix, c->selinux_context);
+ "%sSELinuxContext: %s%s\n",
+ prefix, c->selinux_context_ignore ? "-" : "", c->selinux_context);
+
+ if (c->personality != 0xffffffffUL)
+ fprintf(f,
+ "%sPersonality: %s\n",
+ prefix, strna(personality_to_string(c->personality)));
+
+ if (c->syscall_filter) {
+#ifdef HAVE_SECCOMP
+ Iterator j;
+ void *id;
+ bool first = true;
+#endif
+
+ fprintf(f,
+ "%sSystemCallFilter: ",
+ prefix);
+
+ if (!c->syscall_whitelist)
+ fputc('~', f);
+
+#ifdef HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_filter, j) {
+ _cleanup_free_ char *name = NULL;
+
+ if (first)
+ first = false;
+ else
+ fputc(' ', f);
+
+ name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+ fputs(strna(name), f);
+ }
+#endif
+
+ fputc('\n', f);
+ }
+
+ if (c->syscall_archs) {
+#ifdef HAVE_SECCOMP
+ Iterator j;
+ void *id;
+#endif
+
+ fprintf(f,
+ "%sSystemCallArchitectures:",
+ prefix);
+#ifdef HAVE_SECCOMP
+ SET_FOREACH(id, c->syscall_archs, j)
+ fprintf(f, " %s", strna(seccomp_arch_to_string(PTR_TO_UINT32(id) - 1)));
+#endif
+ fputc('\n', f);
+ }
+
+ if (c->syscall_errno != 0)
+ fprintf(f,
+ "%sSystemCallErrorNumber: %s\n",
+ prefix, strna(errno_to_name(c->syscall_errno)));
+
+ if (c->apparmor_profile)
+ fprintf(f,
+ "%sAppArmorProfile: %s%s\n",
+ prefix, c->apparmor_profile_ignore ? "-" : "", c->apparmor_profile);
}
void exec_status_start(ExecStatus *s, pid_t pid) {
if (r->n_ref <= 0) {
free(r->tmp_dir);
free(r->var_tmp_dir);
- close_pipe(r->netns_storage_socket);
+ safe_close_pair(r->netns_storage_socket);
free(r);
}
if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
else {
- if ((*rt)->netns_storage_socket[0] >= 0)
- close_nointr_nofail((*rt)->netns_storage_socket[0]);
-
+ safe_close((*rt)->netns_storage_socket[0]);
(*rt)->netns_storage_socket[0] = fdset_remove(fds, fd);
}
} else if (streq(key, "netns-socket-1")) {
if (safe_atoi(value, &fd) < 0 || !fdset_contains(fds, fd))
log_debug_unit(u->id, "Failed to parse netns socket value %s", value);
else {
- if ((*rt)->netns_storage_socket[1] >= 0)
- close_nointr_nofail((*rt)->netns_storage_socket[1]);
-
+ safe_close((*rt)->netns_storage_socket[1]);
(*rt)->netns_storage_socket[1] = fdset_remove(fds, fd);
}
} else
}
void exec_runtime_destroy(ExecRuntime *rt) {
+ int r;
+
if (!rt)
return;
if (rt->tmp_dir) {
log_debug("Spawning thread to nuke %s", rt->tmp_dir);
- asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->tmp_dir);
+ if (r < 0) {
+ log_warning("Failed to nuke %s: %s", rt->tmp_dir, strerror(-r));
+ free(rt->tmp_dir);
+ }
+
rt->tmp_dir = NULL;
}
if (rt->var_tmp_dir) {
log_debug("Spawning thread to nuke %s", rt->var_tmp_dir);
- asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
+
+ r = asynchronous_job(remove_tmpdir_thread, rt->var_tmp_dir);
+ if (r < 0) {
+ log_warning("Failed to nuke %s: %s", rt->var_tmp_dir, strerror(-r));
+ free(rt->var_tmp_dir);
+ }
+
rt->var_tmp_dir = NULL;
}
- close_pipe(rt->netns_storage_socket);
+ safe_close_pair(rt->netns_storage_socket);
}
static const char* const exec_input_table[_EXEC_INPUT_MAX] = {