chiark / gitweb /
core: add new RestrictAddressFamilies= switch
authorLennart Poettering <lennart@poettering.net>
Tue, 25 Feb 2014 19:37:03 +0000 (20:37 +0100)
committerLennart Poettering <lennart@poettering.net>
Wed, 26 Feb 2014 01:19:28 +0000 (02:19 +0100)
This new unit settings allows restricting which address families are
available to processes. This is an effective way to minimize the attack
surface of services, by turning off entire network stacks for them.

This is based on seccomp, and does not work on x86-32, since seccomp
cannot filter socketcall() syscalls on that platform.

13 files changed:
Makefile.am
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/shared/.gitignore
src/shared/af-list.c [new file with mode: 0644]
src/shared/af-list.h [new file with mode: 0644]
src/shared/exit-status.c
src/shared/exit-status.h

index 529b525..dd067f6 100644 (file)
@@ -764,6 +764,8 @@ libsystemd_shared_la_SOURCES = \
        src/shared/net-util.h \
        src/shared/errno-list.c \
        src/shared/errno-list.h \
+       src/shared/af-list.c \
+       src/shared/af-list.h \
        src/shared/audit.c \
        src/shared/audit.h \
        src/shared/xml.c \
@@ -775,7 +777,9 @@ libsystemd_shared_la_SOURCES = \
 
 nodist_libsystemd_shared_la_SOURCES = \
        src/shared/errno-from-name.h \
-       src/shared/errno-to-name.h
+       src/shared/errno-to-name.h \
+       src/shared/af-from-name.h \
+       src/shared/af-to-name.h
 
 libsystemd_shared_la_CFLAGS = \
        $(AM_CFLAGS) \
@@ -1059,11 +1063,15 @@ CLEANFILES += \
        src/core/load-fragment-gperf.c \
        src/core/load-fragment-gperf-nulstr.c \
        src/shared/errno-list.txt \
-       src/shared/errno-from-name.gperf
+       src/shared/errno-from-name.gperf \
+       src/shared/af-list.txt \
+       src/shared/af-from-name.gperf
 
 BUILT_SOURCES += \
        src/shared/errno-from-name.h \
-       src/shared/errno-to-name.h
+       src/shared/errno-to-name.h \
+       src/shared/af-from-name.h \
+       src/shared/af-to-name.h
 
 src/shared/errno-list.txt:
        $(AM_V_at)$(MKDIR_P) $(dir $@)
@@ -1081,6 +1089,22 @@ src/shared/errno-to-name.h: src/shared/errno-list.txt
        $(AM_V_at)$(MKDIR_P) $(dir $@)
        $(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const errno_names[] = { "} { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
 
+src/shared/af-list.txt:
+       $(AM_V_at)$(MKDIR_P) $(dir $@)
+       $(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include sys/socket.h - < /dev/null | grep -v AF_UNSPEC | grep -v AF_MAX | $(AWK) '/^#define[ \t]+AF_[^ \t]+[ \t]+PF_[^ \t]/ { print $$2; }'  > $@
+
+src/shared/af-from-name.gperf: src/shared/af-list.txt
+       $(AM_V_at)$(MKDIR_P) $(dir $@)
+       $(AM_V_GEN)$(AWK) 'BEGIN{ print "struct af_name { const char* name; int id; };"; print "%null-strings"; print "%%";} { printf "%s, %s\n", $$1, $$1 }' < $< > $@
+
+src/shared/af-from-name.h: src/shared/af-from-name.gperf
+       $(AM_V_at)$(MKDIR_P) $(dir $@)
+       $(AM_V_GPERF)$(GPERF) -L ANSI-C -t --ignore-case -N lookup_af -H hash_af_name -p -C < $< > $@
+
+src/shared/af-to-name.h: src/shared/af-list.txt
+       $(AM_V_at)$(MKDIR_P) $(dir $@)
+       $(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const af_names[] = { "} !/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@
+
 # ------------------------------------------------------------------------------
 systemd_SOURCES = \
        src/core/main.c
index 1983993..413d81d 100644 (file)
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>RestrictAddressFamilies=</varname></term>
+
+                                <listitem><para>Restricts the set of
+                                socket address families accessible to
+                                the processes of this unit. Takes a
+                                space-separated list of address family
+                                names to whitelist, such as
+                                <constant>AF_UNIX</constant>,
+                                <constant>AF_INET</constant> or
+                                <constant>AF_INET6</constant>. When
+                                prefixed with <constant>~</constant>
+                                the listed address families will be
+                                applied as blacklist, otherwise as
+                                whitelist. Note that this restricts
+                                access to the
+                                <citerefentry><refentrytitle>socket</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                system call only. Sockets passed into
+                                the process by other means (for
+                                example, by using socket activation
+                                with socket units, see
+                                <citerefentry><refentrytitle>systemd.socket</refentrytitle><manvolnum>5</manvolnum></citerefentry>)
+                                are unaffected. Also, sockets created
+                                with <function>socketpair()</function>
+                                (which creates connected AF_UNIX
+                                sockets only) are unaffected. Note
+                                that this option has no effect on
+                                32bit x86 and is ignored (but works
+                                correctly on x86-64). By default no
+                                restriction applies, all address
+                                families are accessible to
+                                processes. If assigned the empty
+                                string any previous list changes are
+                                undone.</para>
+
+                                <para>Use this option to limit
+                                exposure of processes to remote
+                                systems, in particular via exotic
+                                network protocols. Note that in most
+                                cases the local
+                                <constant>AF_UNIX</constant> address
+                                family should be included in the
+                                configured whitelist as it is
+                                frequently used for local
+                                communication, including for
+                                <citerefentry><refentrytitle>syslog</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+                                logging.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>Personality=</varname></term>
 
                                 <listitem><para>Controls which
                                 host system's
                                 kernel.</para></listitem>
                         </varlistentry>
+
                 </variablelist>
         </refsect1>
 
index 935c62b..02e2a6d 100644 (file)
@@ -34,6 +34,7 @@
 #include "dbus-execute.h"
 #include "capability.h"
 #include "env-util.h"
+#include "af-list.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -518,6 +519,54 @@ static int property_get_personality(
         return sd_bus_message_append(reply, "s", personality_to_string(c->personality));
 }
 
+static int property_get_address_families(
+                sd_bus *bus,
+                const char *path,
+                const char *interface,
+                const char *property,
+                sd_bus_message *reply,
+                void *userdata,
+                sd_bus_error *error) {
+
+        ExecContext *c = userdata;
+        _cleanup_strv_free_ char **l = NULL;
+        Iterator i;
+        void *af;
+        int r;
+
+        assert(bus);
+        assert(reply);
+        assert(c);
+
+        r = sd_bus_message_open_container(reply, 'r', "bas");
+        if (r < 0)
+                return r;
+
+        r = sd_bus_message_append(reply, "b", c->address_families_whitelist);
+        if (r < 0)
+                return r;
+
+        SET_FOREACH(af, c->address_families, i) {
+                const char *name;
+
+                name = af_to_name(PTR_TO_INT(af));
+                if (!name)
+                        continue;
+
+                r = strv_extend(&l, name);
+                if (r < 0)
+                        return -ENOMEM;
+        }
+
+        strv_sort(l);
+
+        r = sd_bus_message_append_strv(reply, l);
+        if (r < 0)
+                return r;
+
+        return sd_bus_message_close_container(reply);
+}
+
 const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_VTABLE_START(0),
         SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST),
@@ -585,6 +634,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_VTABLE_END
 };
 
index aeddd2e..fff25c2 100644 (file)
@@ -81,6 +81,7 @@
 #include "async.h"
 #include "selinux-util.h"
 #include "errno-list.h"
+#include "af-list.h"
 #include "apparmor-util.h"
 
 #ifdef HAVE_SECCOMP
@@ -994,9 +995,130 @@ static int apply_seccomp(ExecContext *c) {
 
 finish:
         seccomp_release(seccomp);
+        return r;
+}
+
+static int apply_address_families(ExecContext *c) {
+        scmp_filter_ctx *seccomp;
+        Iterator i;
+        int r;
+
+        assert(c);
+
+        seccomp = seccomp_init(SCMP_ACT_ALLOW);
+        if (!seccomp)
+                return -ENOMEM;
+
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0)
+                goto finish;
+
+        if (c->address_families_whitelist) {
+                int af, first = 0, last = 0;
+                void *afp;
+
+                /* If this is a whitelist, we first block the address
+                 * families that are out of range and then everything
+                 * that is not in the set. First, we find the lowest
+                 * and highest address family in the set. */
+
+                SET_FOREACH(afp, c->address_families, i) {
+                        af = PTR_TO_INT(afp);
 
+                        if (af <= 0 || af >= af_max())
+                                continue;
+
+                        if (first == 0 || af < first)
+                                first = af;
+
+                        if (last == 0 || af > last)
+                                last = af;
+                }
+
+                assert((first == 0) == (last == 0));
+
+                if (first == 0) {
+
+                        /* No entries in the valid range, block everything */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        0);
+                        if (r < 0)
+                                goto finish;
+
+                } else {
+
+                        /* Block everything below the first entry */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_LT, first));
+                        if (r < 0)
+                                goto finish;
+
+                        /* Block everything above the last entry */
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_GT, last));
+                        if (r < 0)
+                                goto finish;
+
+                        /* Block everything between the first and last
+                         * entry */
+                        for (af = 1; af < af_max(); af++) {
+
+                                if (set_contains(c->address_families, INT_TO_PTR(af)))
+                                        continue;
+
+                                r = seccomp_rule_add(
+                                                seccomp,
+                                                SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                                SCMP_SYS(socket),
+                                                1,
+                                                SCMP_A0(SCMP_CMP_EQ, af));
+                                if (r < 0)
+                                        goto finish;
+                        }
+                }
+
+        } else {
+                void *af;
+
+                /* If this is a blacklist, then generate one rule for
+                 * each address family that are then combined in OR
+                 * checks. */
+
+                SET_FOREACH(af, c->address_families, i) {
+
+                        r = seccomp_rule_add(
+                                        seccomp,
+                                        SCMP_ACT_ERRNO(EPROTONOSUPPORT),
+                                        SCMP_SYS(socket),
+                                        1,
+                                        SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af)));
+                        if (r < 0)
+                                goto finish;
+                }
+        }
+
+        r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
+        if (r < 0)
+                goto finish;
+
+        r = seccomp_load(seccomp);
+
+finish:
+        seccomp_release(seccomp);
         return r;
 }
+
 #endif
 
 static void do_idle_pipe_dance(int idle_pipe[4]) {
@@ -1584,6 +1706,14 @@ int exec_spawn(ExecCommand *command,
                                 }
 
 #ifdef HAVE_SECCOMP
+                        if (context->address_families) {
+                                err = apply_address_families(context);
+                                if (err < 0) {
+                                        r = EXIT_ADDRESS_FAMILIES;
+                                        goto fail_child;
+                                }
+                        }
+
                         if (context->syscall_filter || context->syscall_archs) {
                                 err = apply_seccomp(context);
                                 if (err < 0) {
@@ -1777,13 +1907,14 @@ void exec_context_done(ExecContext *c) {
         free(c->apparmor_profile);
         c->apparmor_profile = NULL;
 
-#ifdef HAVE_SECCOMP
         set_free(c->syscall_filter);
         c->syscall_filter = NULL;
 
         set_free(c->syscall_archs);
         c->syscall_archs = NULL;
-#endif
+
+        set_free(c->address_families);
+        c->address_families = NULL;
 }
 
 void exec_command_done(ExecCommand *c) {
index 2bfe227..3c905ce 100644 (file)
@@ -178,6 +178,9 @@ struct ExecContext {
         int syscall_errno;
         bool syscall_whitelist:1;
 
+        Set *address_families;
+        bool address_families_whitelist:1;
+
         bool oom_score_adjust_set:1;
         bool nice_set:1;
         bool ioprio_set:1;
index 26146b1..beff290 100644 (file)
@@ -52,10 +52,12 @@ $1.NoNewPrivileges,              config_parse_bool,                  0,
 m4_ifdef(`HAVE_SECCOMP',
 `$1.SystemCallFilter,            config_parse_syscall_filter,        0,                             offsetof($1, exec_context)
 $1.SystemCallArchitectures,      config_parse_syscall_archs,         0,                             offsetof($1, exec_context.syscall_archs)
-$1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,                             offsetof($1, exec_context)',
+$1.SystemCallErrorNumber,        config_parse_syscall_errno,         0,                             offsetof($1, exec_context)
+$1.RestrictAddressFamilies,      config_parse_address_families,      0,                             offsetof($1, exec_context)',
 `$1.SystemCallFilter,            config_parse_warn_compat,           0,                             0
 $1.SystemCallArchitectures,      config_parse_warn_compat,           0,                             0
-$1.SystemCallErrorNumber,        config_parse_warn_compat,           0,                             0')
+$1.SystemCallErrorNumber,        config_parse_warn_compat,           0,                             0
+$1.RestrictAddressFamilies,      config_parse_warn_compat,           0,                             0')
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
 $1.LimitDATA,                    config_parse_limit,                 RLIMIT_DATA,                   offsetof($1, exec_context.rlimit)
index 82aed1e..478d22c 100644 (file)
@@ -56,6 +56,7 @@
 #include "bus-util.h"
 #include "bus-error.h"
 #include "errno-list.h"
+#include "af-list.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -2216,6 +2217,81 @@ int config_parse_syscall_errno(
         c->syscall_errno = e;
         return 0;
 }
+
+int config_parse_address_families(
+                const char *unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        ExecContext *c = data;
+        Unit *u = userdata;
+        bool invert = false;
+        char *w, *state;
+        size_t l;
+        int r;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(u);
+
+        if (isempty(rvalue)) {
+                /* Empty assignment resets the list */
+                set_free(c->address_families);
+                c->address_families = NULL;
+                c->address_families_whitelist = false;
+                return 0;
+        }
+
+        if (rvalue[0] == '~') {
+                invert = true;
+                rvalue++;
+        }
+
+        if (!c->address_families) {
+                c->address_families = set_new(trivial_hash_func, trivial_compare_func);
+                if (!c->address_families)
+                        return log_oom();
+
+                c->address_families_whitelist = !invert;
+        }
+
+        FOREACH_WORD_QUOTED(w, l, rvalue, state) {
+                _cleanup_free_ char *t = NULL;
+                int af;
+
+                t = strndup(w, l);
+                if (!t)
+                        return log_oom();
+
+                af = af_from_name(t);
+                if (af <= 0)  {
+                        log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse address family, ignoring: %s", t);
+                        continue;
+                }
+
+                /* If we previously wanted to forbid an address family and now
+                 * we want to allow it, then remove it from the list
+                 */
+                if (!invert == c->address_families_whitelist)  {
+                        r = set_put(c->address_families, INT_TO_PTR(af));
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return log_oom();
+                } else
+                        set_remove(c->address_families, INT_TO_PTR(af));
+        }
+
+        return 0;
+}
 #endif
 
 int config_parse_unit_slice(
@@ -3024,6 +3100,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_syscall_filter,        "SYSCALLS" },
                 { config_parse_syscall_archs,         "ARCHS" },
                 { config_parse_syscall_errno,         "ERRNO" },
+                { config_parse_address_families,      "FAMILIES" },
 #endif
                 { config_parse_cpu_shares,            "SHARES" },
                 { config_parse_memory_limit,          "LIMIT" },
@@ -3039,6 +3116,7 @@ void unit_dump_config_items(FILE *f) {
 #endif
                 { config_parse_job_mode,              "MODE" },
                 { config_parse_job_mode_isolate,      "BOOLEAN" },
+                { config_parse_personality,           "PERSONALITY" },
         };
 
         const char *prev = NULL;
index 4a5ec35..c5dbe61 100644 (file)
@@ -90,6 +90,7 @@ int config_parse_job_mode_isolate(const char *unit, const char *filename, unsign
 int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_apparmor_profile(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_address_families(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
index c9b5f81..9f4ec9f 100644 (file)
@@ -2,3 +2,7 @@
 /errno-from-name.h
 /errno-list.txt
 /errno-to-name.h
+/af-from-name.gperf
+/af-from-name.h
+/af-list.txt
+/af-to-name.h
diff --git a/src/shared/af-list.c b/src/shared/af-list.c
new file mode 100644 (file)
index 0000000..f396115
--- /dev/null
@@ -0,0 +1,58 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2013 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/socket.h>
+#include <string.h>
+
+#include "util.h"
+#include "af-list.h"
+
+static const struct af_name* lookup_af(register const char *str, register unsigned int len);
+
+#include "af-to-name.h"
+#include "af-from-name.h"
+
+const char *af_to_name(int id) {
+
+        if (id <= 0)
+                return NULL;
+
+        if (id >= (int) ELEMENTSOF(af_names))
+                return NULL;
+
+        return af_names[id];
+}
+
+int af_from_name(const char *name) {
+        const struct af_name *sc;
+
+        assert(name);
+
+        sc = lookup_af(name, strlen(name));
+        if (!sc)
+                return AF_UNSPEC;
+
+        return sc->id;
+}
+
+int af_max(void) {
+        return ELEMENTSOF(af_names);
+}
diff --git a/src/shared/af-list.h b/src/shared/af-list.h
new file mode 100644 (file)
index 0000000..e346ab8
--- /dev/null
@@ -0,0 +1,27 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2014 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+const char *af_to_name(int id);
+int af_from_name(const char *name);
+
+int af_max(void);
index 902f55a..c1b04a3 100644 (file)
@@ -139,6 +139,9 @@ const char* exit_status_to_string(ExitStatus status, ExitStatusLevel level) {
 
                 case EXIT_APPARMOR_PROFILE:
                         return "APPARMOR";
+
+                case EXIT_ADDRESS_FAMILIES:
+                        return "ADDRESS_FAMILIES";
                 }
         }
 
index de379f1..e7f1203 100644 (file)
@@ -70,7 +70,8 @@ typedef enum ExitStatus {
         EXIT_SECCOMP,
         EXIT_SELINUX_CONTEXT,
         EXIT_PERSONALITY,  /* 230 */
-        EXIT_APPARMOR_PROFILE
+        EXIT_APPARMOR_PROFILE,
+        EXIT_ADDRESS_FAMILIES,
 } ExitStatus;
 
 typedef enum ExitStatusLevel {