chiark / gitweb /
core: rework syscall filter
authorLennart Poettering <lennart@poettering.net>
Wed, 12 Feb 2014 17:28:21 +0000 (18:28 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 12 Feb 2014 17:30:36 +0000 (18:30 +0100)
- Allow configuration of an errno error to return from blacklisted
  syscalls, instead of immediately terminating a process.

- Fix parsing logic when libseccomp support is turned off

- Only keep the actual syscall set in the ExecContext, and generate the
  string version only on demand.

configure.ac
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h

index 518f545..48d63e8 100644 (file)
@@ -327,7 +327,9 @@ have_seccomp=no
 AC_ARG_ENABLE(seccomp, AS_HELP_STRING([--disable-seccomp], [Disable optional SECCOMP support]))
 if test "x$enable_seccomp" != "xno"; then
         PKG_CHECK_MODULES(SECCOMP, [libseccomp >= 1.0.0],
-               [AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available]) have_seccomp=yes],
+               [AC_DEFINE(HAVE_SECCOMP, 1, [Define if seccomp is available])
+                have_seccomp=yes
+                M4_DEFINES="$M4_DEFINES -DHAVE_SECCOMP"],
                [have_seccomp=no])
         if test "x$have_seccomp" = "xno" -a "x$enable_seccomp" = "xyes"; then
                 AC_MSG_ERROR([*** seccomp support requested but libraries not found])
index 0c6ca5a..86ad7e2 100644 (file)
                                 list of system call
                                 names. If this setting is used, all
                                 system calls executed by the unit
-                                process except for the listed ones
+                                processes except for the listed ones
                                 will result in immediate process
                                 termination with the
                                 <constant>SIGSYS</constant> signal
                                 prior assignments will have no
                                 effect.</para>
 
-                                <para>If you specify both types of this option
-                                (i.e. whitelisting and blacklisting) the first
-                                encountered will take precedence and will
-                                dictate the default action (termination
-                                or approval of a system call). Then the
-                                next occurrences of this option will add or
-                                delete the listed system calls from the set
-                                of the filtered system calls, depending of
-                                its type and the default action (e.g. You
-                                have started with a whitelisting of <function>
-                                read</function> and <function>write</function>
-                                and right after it add a blacklisting of
-                                <function>write</function>, then <function>
-                                write</function> will be removed from the set)
+                                <para>If you specify both types of
+                                this option (i.e. whitelisting and
+                                blacklisting) the first encountered
+                                will take precedence and will dictate
+                                the default action (termination or
+                                approval of a system call). Then the
+                                next occurrences of this option will
+                                add or delete the listed system calls
+                                from the set of the filtered system
+                                calls, depending of its type and the
+                                default action (e.g. You have started
+                                with a whitelisting of
+                                <function>read</function> and
+                                <function>write</function> and right
+                                after it add a blacklisting of
+                                <function>write</function>, then
+                                <function>write</function> will be
+                                removed from the set).
                                 </para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><varname>SystemCallErrorNumber=</varname></term>
+
+                                <listitem><para>Takes an
+                                <literal>errno</literal> error number
+                                name to return when the system call
+                                filter configured with
+                                <varname>SystemCallFilter=</varname>
+                                is triggered, instead of terminating
+                                the process immediately. Takes an
+                                error name such as
+                                <literal>EPERM</literal>,
+                                <literal>EACCES</literal> or
+                                <literal>EUCLEAN</literal>. When this
+                                setting is not used, or when the empty
+                                string is assigned the process will be
+                                terminated immediately when the filter
+                                is triggered.</para></listitem>
+                        </varlistentry>
+
                 </variablelist>
         </refsect1>
 
index 2ed7a3c..34d8eed 100644 (file)
@@ -348,12 +348,66 @@ static int property_get_syscall_filter(
                 sd_bus_error *error) {
 
         ExecContext *c = userdata;
+        _cleanup_strv_free_ char **l = NULL;
+        _cleanup_free_ char *t = NULL;
+        Iterator i;
+        void *id;
+        int r;
+
+        assert(bus);
+        assert(reply);
+        assert(c);
+
+        SET_FOREACH(id, c->syscall_filter, i) {
+                char *name;
+
+                name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(id) - 1);
+                if (!name)
+                        continue;
+
+                r = strv_push(&l, name);
+                if (r < 0) {
+                        free(name);
+                        return -ENOMEM;
+                }
+        }
+
+        strv_sort(l);
+
+        t = strv_join(l, " ");
+        if (!t)
+                return -ENOMEM;
+
+        if (!c->syscall_whitelist) {
+                char *d;
+
+                d = strappend("~", t);
+                if (!d)
+                        return -ENOMEM;
+
+                free(t);
+                t = d;
+        }
+
+        return sd_bus_message_append(reply, "s", t);
+}
+
+static int property_get_syscall_errno(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
 
         assert(bus);
         assert(reply);
         assert(c);
 
-        return sd_bus_message_append(reply, "s", c->syscall_filter_string);
+        return sd_bus_message_append(reply, "i", (int32_t) c->syscall_errno);
 }
 
 const sd_bus_vtable bus_exec_vtable[] = {
@@ -419,6 +473,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("NoNewPrivileges", "b", bus_property_get_bool, offsetof(ExecContext, no_new_privileges), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SystemCallFilter", "s", property_get_syscall_filter, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_VTABLE_END
 };
 
index d2e5b74..1184777 100644 (file)
 #include <sys/poll.h>
 #include <glob.h>
 #include <libgen.h>
-#ifdef HAVE_SECCOMP
-#include <seccomp.h>
-
-#include "set.h"
-#endif
 #undef basename
 
 #ifdef HAVE_PAM
 #include <selinux/selinux.h>
 #endif
 
+#ifdef HAVE_SECCOMP
+#include <seccomp.h>
+#endif
+
 #include "execute.h"
 #include "strv.h"
 #include "macro.h"
@@ -76,6 +75,7 @@
 #include "unit.h"
 #include "async.h"
 #include "selinux-util.h"
+#include "errno-list.h"
 
 #define IDLE_TIMEOUT_USEC (5*USEC_PER_SEC)
 #define IDLE_TIMEOUT2_USEC (1*USEC_PER_SEC)
@@ -937,29 +937,36 @@ static void rename_process_from_path(const char *path) {
 }
 
 #ifdef HAVE_SECCOMP
+
 static int apply_seccomp(ExecContext *c) {
-        uint32_t action = SCMP_ACT_ALLOW;
+        uint32_t negative_action, action;
+        scmp_filter_ctx *seccomp;
         Iterator i;
         void *id;
+        int r;
 
         assert(c);
 
-        c->syscall_filter = seccomp_init(c->syscall_filter_default_action);
-        if (!c->syscall_filter)
-                return -1;
+        negative_action = c->syscall_errno == 0 ? SCMP_ACT_KILL : SCMP_ACT_ERRNO(c->syscall_errno);
+
+        seccomp = seccomp_init(c->syscall_whitelist ? negative_action : SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
 
-        if (c->syscall_filter_default_action == SCMP_ACT_ALLOW)
-                action = SCMP_ACT_KILL;
+        action = c->syscall_whitelist ? SCMP_ACT_ALLOW : negative_action;
 
-        SET_FOREACH(id, c->filtered_syscalls, i) {
-                int r = seccomp_rule_add(c->syscall_filter, action, PTR_TO_INT(id) - 1, 0);
+        SET_FOREACH(id, c->syscall_filter, i) {
+                r = seccomp_rule_add(seccomp, action, PTR_TO_INT(id) - 1, 0);
                 if (r < 0) {
-                        log_error("Failed to add syscall filter");
+                        seccomp_release(seccomp);
                         return r;
                 }
         }
 
-        return seccomp_load(c->syscall_filter);
+        r = seccomp_load(seccomp);
+        seccomp_release(seccomp);
+
+        return r;
 }
 #endif
 
@@ -1541,7 +1548,7 @@ int exec_spawn(ExecCommand *command,
                                 }
 
 #ifdef HAVE_SECCOMP
-                        if (context->filtered_syscalls) {
+                        if (context->syscall_filter) {
                                 err = apply_seccomp(context);
                                 if (err < 0) {
                                         r = EXIT_SECCOMP;
@@ -1549,6 +1556,7 @@ int exec_spawn(ExecCommand *command,
                                 }
                         }
 #endif
+
 #ifdef HAVE_SELINUX
                         if (context->selinux_context && use_selinux()) {
                                 bool ignore;
@@ -1729,19 +1737,9 @@ void exec_context_done(ExecContext *c) {
         free(c->selinux_context);
         c->selinux_context = NULL;
 
-        free(c->syscall_filter);
-        c->syscall_filter = NULL;
-
-        free(c->syscall_filter_string);
-        c->syscall_filter_string = NULL;
-
 #ifdef HAVE_SECCOMP
-        if (c->syscall_filter) {
-                seccomp_release(c->syscall_filter);
-                c->syscall_filter = NULL;
-        }
-        set_free(c->filtered_syscalls);
-        c->filtered_syscalls = NULL;
+        set_free(c->syscall_filter);
+        c->syscall_filter = NULL;
 #endif
 }
 
@@ -2115,6 +2113,38 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 fprintf(f,
                         "%sSELinuxContext: %s\n",
                         prefix, c->selinux_context);
+
+        if (c->syscall_filter) {
+                Iterator j;
+                void *id;
+                bool first = true;
+
+                fprintf(f,
+                        "%sSystemCallFilter: \n",
+                        prefix);
+
+                if (!c->syscall_whitelist)
+                        fputc('~', f);
+
+                SET_FOREACH(id, c->syscall_filter, j) {
+                        _cleanup_free_ char *name = NULL;
+
+                        if (first)
+                                first = false;
+                        else
+                                fputc(' ', f);
+
+                        name = seccomp_syscall_resolve_num_arch(PTR_TO_INT(id)-1, SCMP_ARCH_NATIVE);
+                        fputs(strna(name), f);
+                }
+
+                fputc('\n', f);
+        }
+
+        if (c->syscall_errno != 0)
+                fprintf(f,
+                        "%sSystemCallErrorNumber: %s\n",
+                        prefix, strna(errno_to_name(c->syscall_errno)));
 }
 
 void exec_status_start(ExecStatus *s, pid_t pid) {
index b2d70d7..baf430a 100644 (file)
@@ -167,12 +167,9 @@ struct ExecContext {
          * don't enter a trigger loop. */
         bool same_pgrp;
 
-#ifdef HAVE_SECCOMP
-        scmp_filter_ctx syscall_filter;
-        Set *filtered_syscalls;
-        uint32_t syscall_filter_default_action;
-#endif
-        char *syscall_filter_string;
+        Set *syscall_filter;
+        int syscall_errno;
+        bool syscall_whitelist:1;
 
         bool oom_score_adjust_set:1;
         bool nice_set:1;
index 7d40578..16c9e25 100644 (file)
@@ -49,7 +49,11 @@ $1.SecureBits,                   config_parse_exec_secure_bits,      0,
 $1.CapabilityBoundingSet,        config_parse_bounding_set,          0,                             offsetof($1, exec_context.capability_bounding_set_drop)
 $1.TimerSlackNSec,               config_parse_nsec,                  0,                             offsetof($1, exec_context.timer_slack_nsec)
 $1.NoNewPrivileges,              config_parse_bool,                  0,                             offsetof($1, exec_context.no_new_privileges)
-$1.SystemCallFilter,             config_parse_syscall_filter,        0,                             offsetof($1, exec_context)
+m4_ifdef(`HAVE_SECCOMP',
+`$1.SystemCallFilter,            config_parse_syscall_filter,        0,                             offsetof($1, exec_context)
+$1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,                             offsetof($1, exec_context)',
+`$1.SystemCallFilter,            config_parse_warn_compat,           0,                             0
+$1.SystemCallErrorNumber,        config_parse_warn_compat,           0,                             0')
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
 $1.LimitDATA,                    config_parse_limit,                 RLIMIT_DATA,                   offsetof($1, exec_context.rlimit)
index 06ff18b..1b5856e 100644 (file)
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/resource.h>
+
 #ifdef HAVE_SECCOMP
 #include <seccomp.h>
-
-#include "set.h"
 #endif
 
 #include "sd-messages.h"
 #include "cgroup.h"
 #include "bus-util.h"
 #include "bus-error.h"
+#include "errno-list.h"
 
 #if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP)
-int config_parse_warn_compat(const char *unit,
-                             const char *filename,
-                             unsigned line,
-                             const char *section,
-                             unsigned section_line,
-                             const char *lvalue,
-                             int ltype,
-                             const char *rvalue,
-                             void *data,
-                             void *userdata) {
+int config_parse_warn_compat(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
 
         log_syntax(unit, LOG_DEBUG, filename, line, EINVAL,
                    "Support for option %s= has been disabled at compile time and is ignored",
@@ -1921,33 +1922,31 @@ int config_parse_documentation(const char *unit,
 }
 
 #ifdef HAVE_SECCOMP
-int config_parse_syscall_filter(const char *unit,
-                                const char *filename,
-                                unsigned line,
-                                const char *section,
-                                unsigned section_line,
-                                const char *lvalue,
-                                int ltype,
-                                const char *rvalue,
-                                void *data,
-                                void *userdata) {
+int config_parse_syscall_filter(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        static const char default_syscalls[] =
+                "execve\0"
+                "exit\0"
+                "exit_group\0"
+                "rt_sigreturn\0"
+                "sigreturn\0";
+
         ExecContext *c = data;
         Unit *u = userdata;
         bool invert = false;
-        char *w;
+        char *w, *state;
         size_t l;
-        char *state;
-        _cleanup_strv_free_ char **syscalls = strv_new(NULL, NULL);
-        _cleanup_free_ char *sorted_syscalls = NULL;
-        uint32_t action = SCMP_ACT_ALLOW;
-        Iterator i;
-        void *e;
-        static char const *default_syscalls[] = {"execve",
-                                                 "exit",
-                                                 "exit_group",
-                                                 "rt_sigreturn",
-                                                 "sigreturn",
-                                                 NULL};
+        int r;
 
         assert(filename);
         assert(lvalue);
@@ -1956,42 +1955,51 @@ int config_parse_syscall_filter(const char *unit,
 
         if (isempty(rvalue)) {
                 /* Empty assignment resets the list */
-                set_free(c->filtered_syscalls);
-                c->filtered_syscalls= NULL;
-                free(c->syscall_filter_string);
-                c->syscall_filter_string = NULL;
+                set_free(c->syscall_filter);
+                c->syscall_filter = NULL;
+                c->syscall_whitelist = false;
                 return 0;
         }
 
         if (rvalue[0] == '~') {
                 invert = true;
-                action = SCMP_ACT_KILL;
                 rvalue++;
         }
 
-        if (!c->filtered_syscalls) {
-                c->filtered_syscalls = set_new(trivial_hash_func, trivial_compare_func);
+        if (!c->syscall_filter) {
+                c->syscall_filter = set_new(trivial_hash_func, trivial_compare_func);
+                if (!c->syscall_filter)
+                        return log_oom();
+
                 if (invert)
-                        c->syscall_filter_default_action = SCMP_ACT_ALLOW;
+                        /* Allow everything but the ones listed */
+                        c->syscall_whitelist = false;
                 else {
-                        char const **syscall;
+                        const char *i;
+
+                        /* Allow nothing but the ones listed */
+                        c->syscall_whitelist = true;
 
-                        c->syscall_filter_default_action = SCMP_ACT_KILL;
+                        /* Accept default syscalls if we are on a whitelist */
+                        NULSTR_FOREACH(i, default_syscalls)  {
+                                int id;
 
-                        /* accept default syscalls if we are on a whitelist */
-                        STRV_FOREACH(syscall, default_syscalls) {
-                                int id = seccomp_syscall_resolve_name(*syscall);
+                                id = seccomp_syscall_resolve_name(i);
                                 if (id < 0)
                                         continue;
 
-                                set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
+                                r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
+                                if (r == -EEXIST)
+                                        continue;
+                                if (r < 0)
+                                        return log_oom();
                         }
                 }
         }
 
         FOREACH_WORD_QUOTED(w, l, rvalue, state) {
-                int id;
                 _cleanup_free_ char *t = NULL;
+                int id;
 
                 t = strndup(w, l);
                 if (!t)
@@ -1999,34 +2007,60 @@ int config_parse_syscall_filter(const char *unit,
 
                 id = seccomp_syscall_resolve_name(t);
                 if (id < 0)  {
-                        log_syntax(unit, LOG_ERR, filename, line, EINVAL,
-                                   "Failed to parse syscall, ignoring: %s", t);
+                        log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse system call, ignoring: %s", t);
                         continue;
                 }
 
-                /* If we previously wanted to forbid a syscall
-                 * and now we want to allow it, then remove it from the list
-                 * libseccomp will also return -EPERM if we try to add
-                 * a rule with the same action as the default
+                /* If we previously wanted to forbid a syscall and now
+                 * we want to allow it, then remove it from the list
                  */
-                if (action == c->syscall_filter_default_action)
-                        set_remove(c->filtered_syscalls, INT_TO_PTR(id + 1));
-                else
-                        set_replace(c->filtered_syscalls, INT_TO_PTR(id + 1));
+                if (!invert == c->syscall_whitelist)  {
+                        r = set_put(c->syscall_filter, INT_TO_PTR(id + 1));
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return log_oom();
+                } else
+                        set_remove(c->syscall_filter, INT_TO_PTR(id + 1));
         }
 
-        SET_FOREACH(e, c->filtered_syscalls, i) {
-                char *name = seccomp_syscall_resolve_num_arch(SCMP_ARCH_NATIVE, PTR_TO_INT(e) - 1);
-                strv_push(&syscalls, name);
+        c->no_new_privileges = true;
+
+        return 0;
+}
+
+int config_parse_syscall_errno(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        ExecContext *c = data;
+        int e;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+
+        if (isempty(rvalue)) {
+                /* Empty assignment resets to KILL */
+                c->syscall_errno = 0;
+                return 0;
         }
 
-        sorted_syscalls = strv_join(strv_sort(syscalls), " ");
-        if (invert)
-                c->syscall_filter_string = strv_join(STRV_MAKE("~", sorted_syscalls, NULL), "");
-        else
-                c->syscall_filter_string = strdup(sorted_syscalls);
-        c->no_new_privileges = true;
+        e = errno_from_name(rvalue);
+        if (e < 0) {
+                log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse error number, ignoring: %s", rvalue);
+                return 0;
+        }
 
+        c->syscall_errno = e;
         return 0;
 }
 #endif
@@ -2742,6 +2776,9 @@ void unit_dump_config_items(FILE *f) {
                 const ConfigParserCallback callback;
                 const char *rvalue;
         } table[] = {
+#if !defined(HAVE_SYSV_COMPAT) || !defined(HAVE_SECCOMP)
+                { config_parse_warn_compat,           "NOTSUPPORTED" },
+#endif
                 { config_parse_int,                   "INTEGER" },
                 { config_parse_unsigned,              "UNSIGNED" },
                 { config_parse_bytes_size,            "SIZE" },
@@ -2773,8 +2810,6 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_service_restart,       "SERVICERESTART" },
 #ifdef HAVE_SYSV_COMPAT
                 { config_parse_sysv_priority,         "SYSVPRIORITY" },
-#else
-                { config_parse_warn_compat,           "NOTSUPPORTED" },
 #endif
                 { config_parse_kill_mode,             "KILLMODE" },
                 { config_parse_kill_signal,           "SIGNAL" },
@@ -2803,9 +2838,8 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_service_sockets,       "SOCKETS" },
                 { config_parse_environ,               "ENVIRON" },
 #ifdef HAVE_SECCOMP
-                { config_parse_syscall_filter,        "SYSCALL" },
-#else
-                { config_parse_warn_compat,           "NOTSUPPORTED" },
+                { config_parse_syscall_filter,        "SYSCALLS" },
+                { config_parse_syscall_errno,         "ERRNO" },
 #endif
                 { config_parse_cpu_shares,            "SHARES" },
                 { config_parse_memory_limit,          "LIMIT" },
index dcd09ad..4cefa76 100644 (file)
@@ -74,6 +74,7 @@ int config_parse_notify_access(const char *unit, const char *filename, unsigned
 int config_parse_start_limit_action(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_requires_mounts_for(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_syscall_filter(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_syscall_errno(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_environ(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_slice(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_cpu_shares(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);