From 17df7223be064b1542dbe868e3b35cca977ee639 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 12 Feb 2014 18:28:21 +0100 Subject: [PATCH] core: rework syscall filter - Allow configuration of an errno error to return from blacklisted syscalls, instead of immediately terminating a process. - Fix parsing logic when libseccomp support is turned off - Only keep the actual syscall set in the ExecContext, and generate the string version only on demand. --- configure.ac | 4 +- man/systemd.exec.xml | 54 +++++--- src/core/dbus-execute.c | 57 +++++++- src/core/execute.c | 86 ++++++++---- src/core/execute.h | 9 +- src/core/load-fragment-gperf.gperf.m4 | 6 +- src/core/load-fragment.c | 182 +++++++++++++++----------- src/core/load-fragment.h | 1 + 8 files changed, 273 insertions(+), 126 deletions(-) diff --git a/configure.ac b/configure.ac index 518f545bd..48d63e879 100644 --- a/configure.ac +++ b/configure.ac @@ -327,7 +327,9 @@ have_seccomp=no AC_ARG_ENABLE(seccomp, AS_HELP_STRING([--disable-seccomp], [Disable optional SECCOMP support])) if test "x$enable_seccomp" != "xno"; then PKG_CHECK_MODULES(SECCOMP, [libseccomp >= 1.0.0], - [AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available]) have_seccomp=yes], + [AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available]) + have_seccomp=yes + M4_DEFINES="$M4_DEFINES -DHAVE_SECCOMP"], [have_seccomp=no]) if test "x$have_seccomp" = "xno" -a "x$enable_seccomp" = "xyes"; then AC_MSG_ERROR([*** seccomp support requested but libraries not found]) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 0c6ca5acf..86ad7e223 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1001,7 +1001,7 @@ list of system call names. If this setting is used, all system calls executed by the unit - process except for the listed ones + processes except for the listed ones will result in immediate process termination with the SIGSYS signal @@ -1031,23 +1031,47 @@ prior assignments will have no effect. - If you specify both types of this option - (i.e. whitelisting and blacklisting) the first - encountered will take precedence and will - dictate the default action (termination - or approval of a system call). Then the - next occurrences of this option will add or - delete the listed system calls from the set - of the filtered system calls, depending of - its type and the default action (e.g. You - have started with a whitelisting of - read and write - and right after it add a blacklisting of - write, then - write will be removed from the set) + If you specify both types of + this option (i.e. whitelisting and + blacklisting) the first encountered + will take precedence and will dictate + the default action (termination or + approval of a system call). Then the + next occurrences of this option will + add or delete the listed system calls + from the set of the filtered system + calls, depending of its type and the + default action (e.g. You have started + with a whitelisting of + read and + write and right + after it add a blacklisting of + write, then + write will be + removed from the set). + + SystemCallErrorNumber= + + Takes an + errno error number + name to return when the system call + filter configured with + SystemCallFilter= + is triggered, instead of terminating + the process immediately. Takes an + error name such as + EPERM, + EACCES or + EUCLEAN. When this + setting is not used, or when the empty + string is assigned the process will be + terminated immediately when the filter + is triggered. + + diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 2ed7a3c73..34d8eedc0 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -347,13 +347,67 @@ static int property_get_syscall_filter( void *userdata, sd_bus_error *error) { + ExecContext *c = userdata; + _cleanup_strv_free_ char **l = NULL; + _cleanup_free_ char *t = NULL; + Iterator i; + void *id; + int r; + + assert(bus); + assert(reply); + assert(c); + + SET_FOREACH(id, c->syscall_filter, i) { + char *name; + + name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1); + if (!name) + continue; + + r = strv_push(&l, name); + if (r < 0) { + free(name); + return -ENOMEM; + } + } + + strv_sort(l); + + t = strv_join(l, " "); + if (!t) + return -ENOMEM; + + if (!c->syscall_whitelist) { + char *d; + + d = strappend("~", t); + if (!d) + return -ENOMEM; + + free(t); + t = d; + } + + return sd_bus_message_append(reply, "s", t); +} + +static int property_get_syscall_errno( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + ExecContext *c = userdata; assert(bus); assert(reply); assert(c); - return sd_bus_message_append(reply, "s", c->syscall_filter_string); + return sd_bus_message_append(reply, "i", (int32_t) c->syscall_errno); } const sd_bus_vtable bus_exec_vtable[] = { @@ -419,6 +473,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SystemCallFilter", "s", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; diff --git a/src/core/execute.c b/src/core/execute.c index d2e5b740b..118477729 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -40,11 +40,6 @@ #include #include #include -#ifdef HAVE_SECCOMP -#include - -#include "set.h" -#endif #undef basename #ifdef HAVE_PAM @@ -55,6 +50,10 @@ #include #endif +#ifdef HAVE_SECCOMP +#include +#endif + #include "execute.h" #include "strv.h" #include "macro.h" @@ -76,6 +75,7 @@ #include "unit.h" #include "async.h" #include "selinux-util.h" +#include "errno-list.h" #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC) #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC) @@ -937,29 +937,36 @@ static void rename_process_from_path(const char *path) { } #ifdef HAVE_SECCOMP + static int apply_seccomp(ExecContext *c) { - uint32_t action = SCMP_ACT_ALLOW; + uint32_t negative_action, action; + scmp_filter_ctx *seccomp; Iterator i; void *id; + int r; assert(c); - c->syscall_filter = seccomp_init(c->syscall_filter_default_action); - if (!c->syscall_filter) - return -1; + negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno); + + seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; - if (c->syscall_filter_default_action == SCMP_ACT_ALLOW) - action = SCMP_ACT_KILL; + action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action; - SET_FOREACH(id, c->filtered_syscalls, i) { - int r = seccomp_rule_add(c->syscall_filter, action, PTR_TO_INT(id) - 1, 0); + SET_FOREACH(id, c->syscall_filter, i) { + r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0); if (r < 0) { - log_error("Failed to add syscall filter"); + seccomp_release(seccomp); return r; } } - return seccomp_load(c->syscall_filter); + r = seccomp_load(seccomp); + seccomp_release(seccomp); + + return r; } #endif @@ -1541,7 +1548,7 @@ int exec_spawn(ExecCommand *command, } #ifdef HAVE_SECCOMP - if (context->filtered_syscalls) { + if (context->syscall_filter) { err = apply_seccomp(context); if (err < 0) { r = EXIT_SECCOMP; @@ -1549,6 +1556,7 @@ int exec_spawn(ExecCommand *command, } } #endif + #ifdef HAVE_SELINUX if (context->selinux_context && use_selinux()) { bool ignore; @@ -1729,19 +1737,9 @@ void exec_context_done(ExecContext *c) { free(c->selinux_context); c->selinux_context = NULL; - free(c->syscall_filter); - c->syscall_filter = NULL; - - free(c->syscall_filter_string); - c->syscall_filter_string = NULL; - #ifdef HAVE_SECCOMP - if (c->syscall_filter) { - seccomp_release(c->syscall_filter); - c->syscall_filter = NULL; - } - set_free(c->filtered_syscalls); - c->filtered_syscalls = NULL; + set_free(c->syscall_filter); + c->syscall_filter = NULL; #endif } @@ -2115,6 +2113,38 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { fprintf(f, "%sSELinuxContext: %s\n", prefix, c->selinux_context); + + if (c->syscall_filter) { + Iterator j; + void *id; + bool first = true; + + fprintf(f, + "%sSystemCallFilter: \n", + prefix); + + if (!c->syscall_whitelist) + fputc('~', f); + + SET_FOREACH(id, c->syscall_filter, j) { + _cleanup_free_ char *name = NULL; + + if (first) + first = false; + else + fputc(' ', f); + + name = seccomp_syscall_resolve_num_arch(PTR_TO_INT(id)-1, SCMP_ARCH_NATIVE); + fputs(strna(name), f); + } + + fputc('\n', f); + } + + if (c->syscall_errno != 0) + fprintf(f, + "%sSystemCallErrorNumber: %s\n", + prefix, strna(errno_to_name(c->syscall_errno))); } void exec_status_start(ExecStatus *s, pid_t pid) { diff --git a/src/core/execute.h b/src/core/execute.h index b2d70d7d8..baf430a04 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -167,12 +167,9 @@ struct ExecContext { * don't enter a trigger loop. */ bool same_pgrp; -#ifdef HAVE_SECCOMP - scmp_filter_ctx syscall_filter; - Set *filtered_syscalls; - uint32_t syscall_filter_default_action; -#endif - char *syscall_filter_string; + Set *syscall_filter; + int syscall_errno; + bool syscall_whitelist:1; bool oom_score_adjust_set:1; bool nice_set:1; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 7d405788d..16c9e2578 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -49,7 +49,11 @@ $1.SecureBits, config_parse_exec_secure_bits, 0, $1.CapabilityBoundingSet, config_parse_bounding_set, 0, offsetof($1, exec_context.capability_bounding_set_drop) $1.TimerSlackNSec, config_parse_nsec, 0, offsetof($1, exec_context.timer_slack_nsec) $1.NoNewPrivileges, config_parse_bool, 0, offsetof($1, exec_context.no_new_privileges) -$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) +m4_ifdef(`HAVE_SECCOMP', +`$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) +$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)', +`$1.SystemCallFilter, config_parse_warn_compat, 0, 0 +$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) $1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 06ff18b57..1b5856e27 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -33,10 +33,9 @@ #include #include #include + #ifdef HAVE_SECCOMP #include - -#include "set.h" #endif #include "sd-messages.h" @@ -56,18 +55,20 @@ #include "cgroup.h" #include "bus-util.h" #include "bus-error.h" +#include "errno-list.h" #if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP) -int config_parse_warn_compat(const char *unit, - const char *filename, - unsigned line, - const char *section, - unsigned section_line, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +int config_parse_warn_compat( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { log_syntax(unit, LOG_DEBUG, filename, line, EINVAL, "Support for option %s= has been disabled at compile time and is ignored", @@ -1921,33 +1922,31 @@ int config_parse_documentation(const char *unit, } #ifdef HAVE_SECCOMP -int config_parse_syscall_filter(const char *unit, - const char *filename, - unsigned line, - const char *section, - unsigned section_line, - const char *lvalue, - int ltype, - const char *rvalue, - void *data, - void *userdata) { +int config_parse_syscall_filter( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + static const char default_syscalls[] = + "execve\0" + "exit\0" + "exit_group\0" + "rt_sigreturn\0" + "sigreturn\0"; + ExecContext *c = data; Unit *u = userdata; bool invert = false; - char *w; + char *w, *state; size_t l; - char *state; - _cleanup_strv_free_ char **syscalls = strv_new(NULL, NULL); - _cleanup_free_ char *sorted_syscalls = NULL; - uint32_t action = SCMP_ACT_ALLOW; - Iterator i; - void *e; - static char const *default_syscalls[] = {"execve", - "exit", - "exit_group", - "rt_sigreturn", - "sigreturn", - NULL}; + int r; assert(filename); assert(lvalue); @@ -1956,42 +1955,51 @@ int config_parse_syscall_filter(const char *unit, if (isempty(rvalue)) { /* Empty assignment resets the list */ - set_free(c->filtered_syscalls); - c->filtered_syscalls= NULL; - free(c->syscall_filter_string); - c->syscall_filter_string = NULL; + set_free(c->syscall_filter); + c->syscall_filter = NULL; + c->syscall_whitelist = false; return 0; } if (rvalue[0] == '~') { invert = true; - action = SCMP_ACT_KILL; rvalue++; } - if (!c->filtered_syscalls) { - c->filtered_syscalls = set_new(trivial_hash_func, trivial_compare_func); + if (!c->syscall_filter) { + c->syscall_filter = set_new(trivial_hash_func, trivial_compare_func); + if (!c->syscall_filter) + return log_oom(); + if (invert) - c->syscall_filter_default_action = SCMP_ACT_ALLOW; + /* Allow everything but the ones listed */ + c->syscall_whitelist = false; else { - char const **syscall; + const char *i; + + /* Allow nothing but the ones listed */ + c->syscall_whitelist = true; - c->syscall_filter_default_action = SCMP_ACT_KILL; + /* Accept default syscalls if we are on a whitelist */ + NULSTR_FOREACH(i, default_syscalls) { + int id; - /* accept default syscalls if we are on a whitelist */ - STRV_FOREACH(syscall, default_syscalls) { - int id = seccomp_syscall_resolve_name(*syscall); + id = seccomp_syscall_resolve_name(i); if (id < 0) continue; - set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1)); + r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + if (r == -EEXIST) + continue; + if (r < 0) + return log_oom(); } } } FOREACH_WORD_QUOTED(w, l, rvalue, state) { - int id; _cleanup_free_ char *t = NULL; + int id; t = strndup(w, l); if (!t) @@ -1999,34 +2007,60 @@ int config_parse_syscall_filter(const char *unit, id = seccomp_syscall_resolve_name(t); if (id < 0) { - log_syntax(unit, LOG_ERR, filename, line, EINVAL, - "Failed to parse syscall, ignoring: %s", t); + log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse system call, ignoring: %s", t); continue; } - /* If we previously wanted to forbid a syscall - * and now we want to allow it, then remove it from the list - * libseccomp will also return -EPERM if we try to add - * a rule with the same action as the default + /* If we previously wanted to forbid a syscall and now + * we want to allow it, then remove it from the list */ - if (action == c->syscall_filter_default_action) - set_remove(c->filtered_syscalls, INT_TO_PTR(id + 1)); - else - set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1)); + if (!invert == c->syscall_whitelist) { + r = set_put(c->syscall_filter, INT_TO_PTR(id + 1)); + if (r == -EEXIST) + continue; + if (r < 0) + return log_oom(); + } else + set_remove(c->syscall_filter, INT_TO_PTR(id + 1)); } - SET_FOREACH(e, c->filtered_syscalls, i) { - char *name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(e) - 1); - strv_push(&syscalls, name); + c->no_new_privileges = true; + + return 0; +} + +int config_parse_syscall_errno( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + int e; + + assert(filename); + assert(lvalue); + assert(rvalue); + + if (isempty(rvalue)) { + /* Empty assignment resets to KILL */ + c->syscall_errno = 0; + return 0; } - sorted_syscalls = strv_join(strv_sort(syscalls), " "); - if (invert) - c->syscall_filter_string = strv_join(STRV_MAKE("~", sorted_syscalls, NULL), ""); - else - c->syscall_filter_string = strdup(sorted_syscalls); - c->no_new_privileges = true; + e = errno_from_name(rvalue); + if (e < 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse error number, ignoring: %s", rvalue); + return 0; + } + c->syscall_errno = e; return 0; } #endif @@ -2742,6 +2776,9 @@ void unit_dump_config_items(FILE *f) { const ConfigParserCallback callback; const char *rvalue; } table[] = { +#if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP) + { config_parse_warn_compat, "NOTSUPPORTED" }, +#endif { config_parse_int, "INTEGER" }, { config_parse_unsigned, "UNSIGNED" }, { config_parse_bytes_size, "SIZE" }, @@ -2773,8 +2810,6 @@ void unit_dump_config_items(FILE *f) { { config_parse_service_restart, "SERVICERESTART" }, #ifdef HAVE_SYSV_COMPAT { config_parse_sysv_priority, "SYSVPRIORITY" }, -#else - { config_parse_warn_compat, "NOTSUPPORTED" }, #endif { config_parse_kill_mode, "KILLMODE" }, { config_parse_kill_signal, "SIGNAL" }, @@ -2803,9 +2838,8 @@ void unit_dump_config_items(FILE *f) { { config_parse_service_sockets, "SOCKETS" }, { config_parse_environ, "ENVIRON" }, #ifdef HAVE_SECCOMP - { config_parse_syscall_filter, "SYSCALL" }, -#else - { config_parse_warn_compat, "NOTSUPPORTED" }, + { config_parse_syscall_filter, "SYSCALLS" }, + { config_parse_syscall_errno, "ERRNO" }, #endif { config_parse_cpu_shares, "SHARES" }, { config_parse_memory_limit, "LIMIT" }, diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index dcd09ad1f..4cefa7615 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -74,6 +74,7 @@ int config_parse_notify_access(const char *unit, const char *filename, unsigned int config_parse_start_limit_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_syscall_errno(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); -- 2.30.2