From: Lennart Poettering Date: Tue, 25 Feb 2014 19:37:03 +0000 (+0100) Subject: core: add new RestrictAddressFamilies= switch X-Git-Tag: v211~203 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=4298d0b5128326621c8f537107c4c8b459490721 core: add new RestrictAddressFamilies= switch This new unit settings allows restricting which address families are available to processes. This is an effective way to minimize the attack surface of services, by turning off entire network stacks for them. This is based on seccomp, and does not work on x86-32, since seccomp cannot filter socketcall() syscalls on that platform. --- diff --git a/Makefile.am b/Makefile.am index 529b52588..dd067f6d0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -764,6 +764,8 @@ libsystemd_shared_la_SOURCES = \ src/shared/net-util.h \ src/shared/errno-list.c \ src/shared/errno-list.h \ + src/shared/af-list.c \ + src/shared/af-list.h \ src/shared/audit.c \ src/shared/audit.h \ src/shared/xml.c \ @@ -775,7 +777,9 @@ libsystemd_shared_la_SOURCES = \ nodist_libsystemd_shared_la_SOURCES = \ src/shared/errno-from-name.h \ - src/shared/errno-to-name.h + src/shared/errno-to-name.h \ + src/shared/af-from-name.h \ + src/shared/af-to-name.h libsystemd_shared_la_CFLAGS = \ $(AM_CFLAGS) \ @@ -1059,11 +1063,15 @@ CLEANFILES += \ src/core/load-fragment-gperf.c \ src/core/load-fragment-gperf-nulstr.c \ src/shared/errno-list.txt \ - src/shared/errno-from-name.gperf + src/shared/errno-from-name.gperf \ + src/shared/af-list.txt \ + src/shared/af-from-name.gperf BUILT_SOURCES += \ src/shared/errno-from-name.h \ - src/shared/errno-to-name.h + src/shared/errno-to-name.h \ + src/shared/af-from-name.h \ + src/shared/af-to-name.h src/shared/errno-list.txt: $(AM_V_at)$(MKDIR_P) $(dir $@) @@ -1081,6 +1089,22 @@ src/shared/errno-to-name.h: src/shared/errno-list.txt $(AM_V_at)$(MKDIR_P) $(dir $@) $(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const errno_names[] = { "} { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@ +src/shared/af-list.txt: + $(AM_V_at)$(MKDIR_P) $(dir $@) + $(AM_V_GEN)$(CPP) $(CFLAGS) $(AM_CPPFLAGS) $(CPPFLAGS) -dM -include sys/socket.h - < /dev/null | grep -v AF_UNSPEC | grep -v AF_MAX | $(AWK) '/^#define[ \t]+AF_[^ \t]+[ \t]+PF_[^ \t]/ { print $$2; }' > $@ + +src/shared/af-from-name.gperf: src/shared/af-list.txt + $(AM_V_at)$(MKDIR_P) $(dir $@) + $(AM_V_GEN)$(AWK) 'BEGIN{ print "struct af_name { const char* name; int id; };"; print "%null-strings"; print "%%";} { printf "%s, %s\n", $$1, $$1 }' < $< > $@ + +src/shared/af-from-name.h: src/shared/af-from-name.gperf + $(AM_V_at)$(MKDIR_P) $(dir $@) + $(AM_V_GPERF)$(GPERF) -L ANSI-C -t --ignore-case -N lookup_af -H hash_af_name -p -C < $< > $@ + +src/shared/af-to-name.h: src/shared/af-list.txt + $(AM_V_at)$(MKDIR_P) $(dir $@) + $(AM_V_GEN)$(AWK) 'BEGIN{ print "static const char* const af_names[] = { "} !/AF_FILE/ && !/AF_ROUTE/ && !/AF_LOCAL/ { printf "[%s] = \"%s\",\n", $$1, $$1 } END{print "};"}' < $< > $@ + # ------------------------------------------------------------------------------ systemd_SOURCES = \ src/core/main.c diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 19839937c..413d81d33 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1121,6 +1121,55 @@ applied. + + RestrictAddressFamilies= + + Restricts the set of + socket address families accessible to + the processes of this unit. Takes a + space-separated list of address family + names to whitelist, such as + AF_UNIX, + AF_INET or + AF_INET6. When + prefixed with ~ + the listed address families will be + applied as blacklist, otherwise as + whitelist. Note that this restricts + access to the + socket2 + system call only. Sockets passed into + the process by other means (for + example, by using socket activation + with socket units, see + systemd.socket5) + are unaffected. Also, sockets created + with socketpair() + (which creates connected AF_UNIX + sockets only) are unaffected. Note + that this option has no effect on + 32bit x86 and is ignored (but works + correctly on x86-64). By default no + restriction applies, all address + families are accessible to + processes. If assigned the empty + string any previous list changes are + undone. + + Use this option to limit + exposure of processes to remote + systems, in particular via exotic + network protocols. Note that in most + cases the local + AF_UNIX address + family should be included in the + configured whitelist as it is + frequently used for local + communication, including for + syslog2 + logging. + + Personality= @@ -1138,6 +1187,7 @@ host system's kernel. + diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 935c62bdf..02e2a6d3d 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -34,6 +34,7 @@ #include "dbus-execute.h" #include "capability.h" #include "env-util.h" +#include "af-list.h" #ifdef HAVE_SECCOMP #include "seccomp-util.h" @@ -518,6 +519,54 @@ static int property_get_personality( return sd_bus_message_append(reply, "s", personality_to_string(c->personality)); } +static int property_get_address_families( + sd_bus *bus, + const char *path, + const char *interface, + const char *property, + sd_bus_message *reply, + void *userdata, + sd_bus_error *error) { + + ExecContext *c = userdata; + _cleanup_strv_free_ char **l = NULL; + Iterator i; + void *af; + int r; + + assert(bus); + assert(reply); + assert(c); + + r = sd_bus_message_open_container(reply, 'r', "bas"); + if (r < 0) + return r; + + r = sd_bus_message_append(reply, "b", c->address_families_whitelist); + if (r < 0) + return r; + + SET_FOREACH(af, c->address_families, i) { + const char *name; + + name = af_to_name(PTR_TO_INT(af)); + if (!name) + continue; + + r = strv_extend(&l, name); + if (r < 0) + return -ENOMEM; + } + + strv_sort(l); + + r = sd_bus_message_append_strv(reply, l); + if (r < 0) + return r; + + return sd_bus_message_close_container(reply); +} + const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_VTABLE_START(0), SD_BUS_PROPERTY("Environment", "as", NULL, offsetof(ExecContext, environment), SD_BUS_VTABLE_PROPERTY_CONST), @@ -585,6 +634,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("SystemCallArchitectures", "as", property_get_syscall_archs, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SystemCallErrorNumber", "i", property_get_syscall_errno, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("Personality", "s", property_get_personality, 0, SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("RestrictAddressFamilies", "(bas)", property_get_address_families, 0, SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_VTABLE_END }; diff --git a/src/core/execute.c b/src/core/execute.c index aeddd2e41..fff25c2b2 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -81,6 +81,7 @@ #include "async.h" #include "selinux-util.h" #include "errno-list.h" +#include "af-list.h" #include "apparmor-util.h" #ifdef HAVE_SECCOMP @@ -994,9 +995,130 @@ static int apply_seccomp(ExecContext *c) { finish: seccomp_release(seccomp); + return r; +} + +static int apply_address_families(ExecContext *c) { + scmp_filter_ctx *seccomp; + Iterator i; + int r; + + assert(c); + + seccomp = seccomp_init(SCMP_ACT_ALLOW); + if (!seccomp) + return -ENOMEM; + + r = seccomp_add_secondary_archs(seccomp); + if (r < 0) + goto finish; + + if (c->address_families_whitelist) { + int af, first = 0, last = 0; + void *afp; + + /* If this is a whitelist, we first block the address + * families that are out of range and then everything + * that is not in the set. First, we find the lowest + * and highest address family in the set. */ + + SET_FOREACH(afp, c->address_families, i) { + af = PTR_TO_INT(afp); + if (af <= 0 || af >= af_max()) + continue; + + if (first == 0 || af < first) + first = af; + + if (last == 0 || af > last) + last = af; + } + + assert((first == 0) == (last == 0)); + + if (first == 0) { + + /* No entries in the valid range, block everything */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPROTONOSUPPORT), + SCMP_SYS(socket), + 0); + if (r < 0) + goto finish; + + } else { + + /* Block everything below the first entry */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPROTONOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_LT, first)); + if (r < 0) + goto finish; + + /* Block everything above the last entry */ + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPROTONOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_GT, last)); + if (r < 0) + goto finish; + + /* Block everything between the first and last + * entry */ + for (af = 1; af < af_max(); af++) { + + if (set_contains(c->address_families, INT_TO_PTR(af))) + continue; + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPROTONOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, af)); + if (r < 0) + goto finish; + } + } + + } else { + void *af; + + /* If this is a blacklist, then generate one rule for + * each address family that are then combined in OR + * checks. */ + + SET_FOREACH(af, c->address_families, i) { + + r = seccomp_rule_add( + seccomp, + SCMP_ACT_ERRNO(EPROTONOSUPPORT), + SCMP_SYS(socket), + 1, + SCMP_A0(SCMP_CMP_EQ, PTR_TO_INT(af))); + if (r < 0) + goto finish; + } + } + + r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0); + if (r < 0) + goto finish; + + r = seccomp_load(seccomp); + +finish: + seccomp_release(seccomp); return r; } + #endif static void do_idle_pipe_dance(int idle_pipe[4]) { @@ -1584,6 +1706,14 @@ int exec_spawn(ExecCommand *command, } #ifdef HAVE_SECCOMP + if (context->address_families) { + err = apply_address_families(context); + if (err < 0) { + r = EXIT_ADDRESS_FAMILIES; + goto fail_child; + } + } + if (context->syscall_filter || context->syscall_archs) { err = apply_seccomp(context); if (err < 0) { @@ -1777,13 +1907,14 @@ void exec_context_done(ExecContext *c) { free(c->apparmor_profile); c->apparmor_profile = NULL; -#ifdef HAVE_SECCOMP set_free(c->syscall_filter); c->syscall_filter = NULL; set_free(c->syscall_archs); c->syscall_archs = NULL; -#endif + + set_free(c->address_families); + c->address_families = NULL; } void exec_command_done(ExecCommand *c) { diff --git a/src/core/execute.h b/src/core/execute.h index 2bfe22714..3c905cefa 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -178,6 +178,9 @@ struct ExecContext { int syscall_errno; bool syscall_whitelist:1; + Set *address_families; + bool address_families_whitelist:1; + bool oom_score_adjust_set:1; bool nice_set:1; bool ioprio_set:1; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 26146b192..beff29025 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -52,10 +52,12 @@ $1.NoNewPrivileges, config_parse_bool, 0, m4_ifdef(`HAVE_SECCOMP', `$1.SystemCallFilter, config_parse_syscall_filter, 0, offsetof($1, exec_context) $1.SystemCallArchitectures, config_parse_syscall_archs, 0, offsetof($1, exec_context.syscall_archs) -$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context)', +$1.SystemCallErrorNumber, config_parse_syscall_errno, 0, offsetof($1, exec_context) +$1.RestrictAddressFamilies, config_parse_address_families, 0, offsetof($1, exec_context)', `$1.SystemCallFilter, config_parse_warn_compat, 0, 0 $1.SystemCallArchitectures, config_parse_warn_compat, 0, 0 -$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0') +$1.SystemCallErrorNumber, config_parse_warn_compat, 0, 0 +$1.RestrictAddressFamilies, config_parse_warn_compat, 0, 0') $1.LimitCPU, config_parse_limit, RLIMIT_CPU, offsetof($1, exec_context.rlimit) $1.LimitFSIZE, config_parse_limit, RLIMIT_FSIZE, offsetof($1, exec_context.rlimit) $1.LimitDATA, config_parse_limit, RLIMIT_DATA, offsetof($1, exec_context.rlimit) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 82aed1eb9..478d22c4b 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -56,6 +56,7 @@ #include "bus-util.h" #include "bus-error.h" #include "errno-list.h" +#include "af-list.h" #ifdef HAVE_SECCOMP #include "seccomp-util.h" @@ -2216,6 +2217,81 @@ int config_parse_syscall_errno( c->syscall_errno = e; return 0; } + +int config_parse_address_families( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + Unit *u = userdata; + bool invert = false; + char *w, *state; + size_t l; + int r; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(u); + + if (isempty(rvalue)) { + /* Empty assignment resets the list */ + set_free(c->address_families); + c->address_families = NULL; + c->address_families_whitelist = false; + return 0; + } + + if (rvalue[0] == '~') { + invert = true; + rvalue++; + } + + if (!c->address_families) { + c->address_families = set_new(trivial_hash_func, trivial_compare_func); + if (!c->address_families) + return log_oom(); + + c->address_families_whitelist = !invert; + } + + FOREACH_WORD_QUOTED(w, l, rvalue, state) { + _cleanup_free_ char *t = NULL; + int af; + + t = strndup(w, l); + if (!t) + return log_oom(); + + af = af_from_name(t); + if (af <= 0) { + log_syntax(unit, LOG_ERR, filename, line, EINVAL, "Failed to parse address family, ignoring: %s", t); + continue; + } + + /* If we previously wanted to forbid an address family and now + * we want to allow it, then remove it from the list + */ + if (!invert == c->address_families_whitelist) { + r = set_put(c->address_families, INT_TO_PTR(af)); + if (r == -EEXIST) + continue; + if (r < 0) + return log_oom(); + } else + set_remove(c->address_families, INT_TO_PTR(af)); + } + + return 0; +} #endif int config_parse_unit_slice( @@ -3024,6 +3100,7 @@ void unit_dump_config_items(FILE *f) { { config_parse_syscall_filter, "SYSCALLS" }, { config_parse_syscall_archs, "ARCHS" }, { config_parse_syscall_errno, "ERRNO" }, + { config_parse_address_families, "FAMILIES" }, #endif { config_parse_cpu_shares, "SHARES" }, { config_parse_memory_limit, "LIMIT" }, @@ -3039,6 +3116,7 @@ void unit_dump_config_items(FILE *f) { #endif { config_parse_job_mode, "MODE" }, { config_parse_job_mode_isolate, "BOOLEAN" }, + { config_parse_personality, "PERSONALITY" }, }; const char *prev = NULL; diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 4a5ec35cb..c5dbe6157 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -90,6 +90,7 @@ int config_parse_job_mode_isolate(const char *unit, const char *filename, unsign int config_parse_exec_selinux_context(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_personality(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_exec_apparmor_profile(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_address_families(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/shared/.gitignore b/src/shared/.gitignore index c9b5f8171..9f4ec9f9c 100644 --- a/src/shared/.gitignore +++ b/src/shared/.gitignore @@ -2,3 +2,7 @@ /errno-from-name.h /errno-list.txt /errno-to-name.h +/af-from-name.gperf +/af-from-name.h +/af-list.txt +/af-to-name.h diff --git a/src/shared/af-list.c b/src/shared/af-list.c new file mode 100644 index 000000000..f396115a3 --- /dev/null +++ b/src/shared/af-list.c @@ -0,0 +1,58 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +/*** + This file is part of systemd. + + Copyright 2013 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +#include +#include + +#include "util.h" +#include "af-list.h" + +static const struct af_name* lookup_af(register const char *str, register unsigned int len); + +#include "af-to-name.h" +#include "af-from-name.h" + +const char *af_to_name(int id) { + + if (id <= 0) + return NULL; + + if (id >= (int) ELEMENTSOF(af_names)) + return NULL; + + return af_names[id]; +} + +int af_from_name(const char *name) { + const struct af_name *sc; + + assert(name); + + sc = lookup_af(name, strlen(name)); + if (!sc) + return AF_UNSPEC; + + return sc->id; +} + +int af_max(void) { + return ELEMENTSOF(af_names); +} diff --git a/src/shared/af-list.h b/src/shared/af-list.h new file mode 100644 index 000000000..e346ab87f --- /dev/null +++ b/src/shared/af-list.h @@ -0,0 +1,27 @@ +/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ + +#pragma once + +/*** + This file is part of systemd. + + Copyright 2014 Lennart Poettering + + systemd is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or + (at your option) any later version. + + systemd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with systemd; If not, see . +***/ + +const char *af_to_name(int id); +int af_from_name(const char *name); + +int af_max(void); diff --git a/src/shared/exit-status.c b/src/shared/exit-status.c index 902f55ac6..c1b04a386 100644 --- a/src/shared/exit-status.c +++ b/src/shared/exit-status.c @@ -139,6 +139,9 @@ const char* exit_status_to_string(ExitStatus status, ExitStatusLevel level) { case EXIT_APPARMOR_PROFILE: return "APPARMOR"; + + case EXIT_ADDRESS_FAMILIES: + return "ADDRESS_FAMILIES"; } } diff --git a/src/shared/exit-status.h b/src/shared/exit-status.h index de379f1aa..e7f12032e 100644 --- a/src/shared/exit-status.h +++ b/src/shared/exit-status.h @@ -70,7 +70,8 @@ typedef enum ExitStatus { EXIT_SECCOMP, EXIT_SELINUX_CONTEXT, EXIT_PERSONALITY, /* 230 */ - EXIT_APPARMOR_PROFILE + EXIT_APPARMOR_PROFILE, + EXIT_ADDRESS_FAMILIES, } ExitStatus; typedef enum ExitStatusLevel {