From 417116f23432073162ebfcb286a7800846482eed Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 3 Jun 2014 23:41:44 +0200 Subject: [PATCH] core: add new ReadOnlySystem= and ProtectedHome= settings for service units ReadOnlySystem= uses fs namespaces to mount /usr and /boot read-only for a service. ProtectedHome= uses fs namespaces to mount /home and /run/user inaccessible or read-only for a service. This patch also enables these settings for all our long-running services. Together they should be good building block for a minimal service sandbox, removing the ability for services to modify the operating system or access the user's private data. --- man/systemd.exec.xml | 61 ++++++++++++++++++++++- src/core/dbus-execute.c | 5 ++ src/core/execute.c | 11 +++- src/core/execute.h | 3 ++ src/core/load-fragment-gperf.gperf.m4 | 2 + src/core/load-fragment.c | 43 ++++++++++++++++ src/core/load-fragment.h | 1 + src/core/namespace.c | 26 +++++++++- src/core/namespace.h | 15 ++++++ src/test/test-ns.c | 2 + units/systemd-hostnamed.service.in | 2 + units/systemd-journal-gatewayd.service.in | 2 + units/systemd-journald.service.in | 2 + units/systemd-localed.service.in | 2 + units/systemd-logind.service.in | 2 + units/systemd-machined.service.in | 2 + units/systemd-networkd.service.in | 2 + units/systemd-resolved.service.in | 2 + units/systemd-timedated.service.in | 2 + units/systemd-timesyncd.service.in | 2 + units/systemd-udevd.service.in | 2 + 21 files changed, 187 insertions(+), 4 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 3f27d13c3..366430349 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -764,7 +764,7 @@ capability sets as documented in cap_from_text3. Note that these capability sets are - usually influenced by the capabilities + usually influenced (and filtered) by the capabilities attached to the executed file. Due to that CapabilityBoundingSet= @@ -934,6 +934,63 @@ accessible). + + ReadOnlySystem= + + Takes a boolean + argument. If true, mounts the + /usr and + /boot directories + read-only for processes invoked by + this unit. This setting ensures that + any modification of the vendor + supplied operating system is + prohibited for the service. It is + recommended to enable this setting for + all long-running services, unless they + are involved with system updates or + need to modify the operating system in + other ways. Note however, that + processes retaining the CAP_SYS_ADMIN + capability can undo the effect of this + setting. This setting is hence + particularly useful for daemons which + have this capability removed, for + example with + CapabilityBoundingSet=. Defaults + to off. + + + + ProtectedHome= + + Takes a boolean + argument or + read-only. If true, + the directories + /home and + /run/user are + made inaccessible and empty for + processes invoked by this unit. If set + to read-only the + two directores are made read-only + instead. It is recommended to enable + this setting for all long-running + services (in particular network-facing + one), to ensure they cannot get access + to private user data, unless the + services actually require access to + the user's private data. Note however, + that processes retaining the + CAP_SYS_ADMIN capability can undo the + effect of this setting. This setting + is hence particularly useful for + daemons which have this capability + removed, for example with + CapabilityBoundingSet=. Defaults + to off. + + MountFlags= @@ -968,6 +1025,8 @@ namespace related options (PrivateTmp=, PrivateDevices=, + ReadOnlySystem=, + ProtectedHome=, ReadOnlyDirectories=, InaccessibleDirectories= and diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 13b3d0dd1..2aa08c143 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -35,6 +35,7 @@ #include "capability.h" #include "env-util.h" #include "af-list.h" +#include "namespace.h" #ifdef HAVE_SECCOMP #include "seccomp-util.h" @@ -44,6 +45,8 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutp static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput); +static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_protected_home, protected_home, ProtectedHome); + static int property_get_environment_files( sd_bus *bus, const char *path, @@ -626,6 +629,8 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectedHome", "s", bus_property_get_protected_home, offsetof(ExecContext, protected_home), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ReadOnlySystem", "b", bus_property_get_bool, offsetof(ExecContext, read_only_system), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/execute.c b/src/core/execute.c index af8e7c725..ce8b9bcb8 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1569,7 +1569,9 @@ int exec_spawn(ExecCommand *command, !strv_isempty(context->inaccessible_dirs) || context->mount_flags != 0 || (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) || - context->private_devices) { + context->private_devices || + context->read_only_system || + context->protected_home != PROTECTED_HOME_NO) { char *tmp = NULL, *var = NULL; @@ -1593,8 +1595,9 @@ int exec_spawn(ExecCommand *command, tmp, var, context->private_devices, + context->protected_home, + context->read_only_system, context->mount_flags); - if (err < 0) { r = EXIT_NAMESPACE; goto fail_child; @@ -2111,6 +2114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sPrivateTmp: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateDevices: %s\n" + "%sProtectedHome: %s\n" + "%sReadOnlySystem: %s\n" "%sIgnoreSIGPIPE: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", @@ -2119,6 +2124,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_network), prefix, yes_no(c->private_devices), + prefix, protected_home_to_string(c->protected_home), + prefix, yes_no(c->read_only_system), prefix, yes_no(c->ignore_sigpipe)); STRV_FOREACH(e, c->environment) diff --git a/src/core/execute.h b/src/core/execute.h index c9e29ffc8..3d6f77c8e 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -39,6 +39,7 @@ typedef struct ExecRuntime ExecRuntime; #include "set.h" #include "fdset.h" #include "missing.h" +#include "namespace.h" typedef enum ExecInput { EXEC_INPUT_NULL, @@ -156,6 +157,8 @@ struct ExecContext { bool private_tmp; bool private_network; bool private_devices; + bool read_only_system; + ProtectedHome protected_home; bool no_new_privileges; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 4c066a8b9..97382d474 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -80,6 +80,8 @@ $1.InaccessibleDirectories, config_parse_namespace_path_strv, 0, $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) +$1.ReadOnlySystem, config_parse_bool, 0, offsetof($1, exec_context.read_only_system) +$1.ProtectedHome, config_parse_protected_home, 0, offsetof($1, exec_context) $1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context) $1.Personality, config_parse_personality, 0, offsetof($1, exec_context.personality) $1.RuntimeDirectoryMode, config_parse_mode, 0, offsetof($1, exec_context.runtime_directory_mode) diff --git a/src/core/load-fragment.c b/src/core/load-fragment.c index 6403e4111..9df78082a 100644 --- a/src/core/load-fragment.c +++ b/src/core/load-fragment.c @@ -3044,6 +3044,49 @@ int config_parse_no_new_privileges( return 0; } +int config_parse_protected_home( + const char* unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + ExecContext *c = data; + int k; + + assert(filename); + assert(lvalue); + assert(rvalue); + assert(data); + + /* Our enum shall be a superset of booleans, hence first try + * to parse as as boolean, and then as enum */ + + k = parse_boolean(rvalue); + if (k > 0) + c->protected_home = PROTECTED_HOME_YES; + else if (k == 0) + c->protected_home = PROTECTED_HOME_NO; + else { + ProtectedHome h; + + h = protected_home_from_string(rvalue); + if (h < 0){ + log_syntax(unit, LOG_ERR, filename, line, -h, "Failed to parse protected home value, ignoring: %s", rvalue); + return 0; + } + + c->protected_home = h; + } + + return 0; +} + #define FOLLOW_MAX 8 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) { diff --git a/src/core/load-fragment.h b/src/core/load-fragment.h index 9ef9caa99..279efa983 100644 --- a/src/core/load-fragment.h +++ b/src/core/load-fragment.h @@ -97,6 +97,7 @@ int config_parse_set_status(const char *unit, const char *filename, unsigned lin int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); +int config_parse_protected_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata); /* gperf prototypes */ const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length); diff --git a/src/core/namespace.c b/src/core/namespace.c index 9f15211cb..de09e9f2c 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -331,6 +331,8 @@ int setup_namespace( char* tmp_dir, char* var_tmp_dir, bool private_dev, + ProtectedHome protected_home, + bool read_only_system, unsigned mount_flags) { BindMount *m, *mounts = NULL; @@ -347,7 +349,9 @@ int setup_namespace( strv_length(read_write_dirs) + strv_length(read_only_dirs) + strv_length(inaccessible_dirs) + - private_dev; + private_dev + + (protected_home != PROTECTED_HOME_NO ? 2 : 0) + + (read_only_system ? 2 : 0); if (n > 0) { m = mounts = (BindMount *) alloca(n * sizeof(BindMount)); @@ -381,6 +385,18 @@ int setup_namespace( m++; } + if (protected_home != PROTECTED_HOME_NO) { + r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE); + if (r < 0) + return r; + } + + if (read_only_system) { + r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY); + if (r < 0) + return r; + } + assert(mounts + n == m); qsort(mounts, n, sizeof(BindMount), mount_path_compare); @@ -581,3 +597,11 @@ fail: return r; } + +static const char *const protected_home_table[_PROTECTED_HOME_MAX] = { + [PROTECTED_HOME_NO] = "no", + [PROTECTED_HOME_YES] = "yes", + [PROTECTED_HOME_READ_ONLY] = "read-only", +}; + +DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome); diff --git a/src/core/namespace.h b/src/core/namespace.h index fb1fc6ec0..b985bdf51 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -23,12 +23,24 @@ #include +#include "macro.h" + +typedef enum ProtectedHome { + PROTECTED_HOME_NO, + PROTECTED_HOME_YES, + PROTECTED_HOME_READ_ONLY, + _PROTECTED_HOME_MAX, + _PROTECTED_HOME_INVALID = -1 +} ProtectedHome; + int setup_namespace(char **read_write_dirs, char **read_only_dirs, char **inaccessible_dirs, char *tmp_dir, char *var_tmp_dir, bool private_dev, + ProtectedHome protected_home, + bool read_only_system, unsigned mount_flags); int setup_tmp_dirs(const char *id, @@ -36,3 +48,6 @@ int setup_tmp_dirs(const char *id, char **var_tmp_dir); int setup_netns(int netns_storage_socket[2]); + +const char* protected_home_to_string(ProtectedHome p) _const_; +ProtectedHome protected_home_from_string(const char *s) _pure_; diff --git a/src/test/test-ns.c b/src/test/test-ns.c index ad0d0419c..71581934c 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -60,6 +60,8 @@ int main(int argc, char *argv[]) { tmp_dir, var_tmp_dir, true, + PROTECTED_HOME_NO, + false, 0); if (r < 0) { log_error("Failed to setup namespace: %s", strerror(-r)); diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in index 79e22c1d4..497b8d997 100644 --- a/units/systemd-hostnamed.service.in +++ b/units/systemd-hostnamed.service.in @@ -18,3 +18,5 @@ WatchdogSec=1min PrivateTmp=yes PrivateDevices=yes PrivateNetwork=yes +ReadOnlySystem=yes +ProtectedHome=yes diff --git a/units/systemd-journal-gatewayd.service.in b/units/systemd-journal-gatewayd.service.in index e8e571e69..3695240cb 100644 --- a/units/systemd-journal-gatewayd.service.in +++ b/units/systemd-journal-gatewayd.service.in @@ -17,6 +17,8 @@ SupplementaryGroups=systemd-journal PrivateTmp=yes PrivateDevices=yes PrivateNetwork=yes +ReadOnlySystem=yes +ProtectedHome=yes [Install] Also=systemd-journal-gatewayd.socket diff --git a/units/systemd-journald.service.in b/units/systemd-journald.service.in index de9387933..ba3f84720 100644 --- a/units/systemd-journald.service.in +++ b/units/systemd-journald.service.in @@ -20,6 +20,8 @@ RestartSec=0 NotifyAccess=all StandardOutput=null CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID +ReadOnlySystem=yes +ProtectedHome=yes WatchdogSec=1min # Increase the default a bit in order to allow many simultaneous diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in index ae1c5e59d..e1792d654 100644 --- a/units/systemd-localed.service.in +++ b/units/systemd-localed.service.in @@ -18,3 +18,5 @@ WatchdogSec=1min PrivateTmp=yes PrivateDevices=yes PrivateNetwork=yes +ReadOnlySystem=yes +ProtectedHome=yes diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in index c6cbd1c8d..68803fb38 100644 --- a/units/systemd-logind.service.in +++ b/units/systemd-logind.service.in @@ -25,6 +25,8 @@ RestartSec=0 BusName=org.freedesktop.login1 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG WatchdogSec=1min +ReadOnlySystem=yes +ProtectedHome=yes # Increase the default a bit in order to allow many simultaneous # logins since we keep one fd open per session. diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in index 1a27c6e42..07522e12a 100644 --- a/units/systemd-machined.service.in +++ b/units/systemd-machined.service.in @@ -20,3 +20,5 @@ WatchdogSec=1min PrivateTmp=yes PrivateDevices=yes PrivateNetwork=yes +ReadOnlySystem=yes +ProtectedHome=yes diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in index 3538295df..a92899920 100644 --- a/units/systemd-networkd.service.in +++ b/units/systemd-networkd.service.in @@ -20,6 +20,8 @@ Restart=always RestartSec=0 ExecStart=@rootlibexecdir@/systemd-networkd CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER +ReadOnlySystem=yes +ProtectedHome=yes WatchdogSec=1min [Install] diff --git a/units/systemd-resolved.service.in b/units/systemd-resolved.service.in index 9d422ca7f..787fde2c4 100644 --- a/units/systemd-resolved.service.in +++ b/units/systemd-resolved.service.in @@ -16,6 +16,8 @@ Restart=always RestartSec=0 ExecStart=@rootlibexecdir@/systemd-resolved CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER +ReadOnlySystem=yes +ProtectedHome=yes [Install] WantedBy=multi-user.target diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in index 5c90290cd..9658149ee 100644 --- a/units/systemd-timedated.service.in +++ b/units/systemd-timedated.service.in @@ -16,3 +16,5 @@ BusName=org.freedesktop.timedate1 CapabilityBoundingSet=CAP_SYS_TIME WatchdogSec=1min PrivateTmp=yes +ReadOnlySystem=yes +ProtectedHome=yes diff --git a/units/systemd-timesyncd.service.in b/units/systemd-timesyncd.service.in index cbde3ff67..030e4a042 100644 --- a/units/systemd-timesyncd.service.in +++ b/units/systemd-timesyncd.service.in @@ -23,6 +23,8 @@ ExecStart=@rootlibexecdir@/systemd-timesyncd CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER PrivateTmp=yes PrivateDevices=yes +ReadOnlySystem=yes +ProtectedHome=yes WatchdogSec=1min [Install] diff --git a/units/systemd-udevd.service.in b/units/systemd-udevd.service.in index ddee01537..82275f052 100644 --- a/units/systemd-udevd.service.in +++ b/units/systemd-udevd.service.in @@ -22,3 +22,5 @@ Restart=always RestartSec=0 ExecStart=@rootlibexecdir@/systemd-udevd MountFlags=slave +ReadOnlySystem=yes +ProtectedHome=yes -- 2.30.2