chiark / gitweb /
core: add new ReadOnlySystem= and ProtectedHome= settings for service units
authorLennart Poettering <lennart@poettering.net>
Tue, 3 Jun 2014 21:41:44 +0000 (23:41 +0200)
committerLennart Poettering <lennart@poettering.net>
Tue, 3 Jun 2014 21:57:51 +0000 (23:57 +0200)
ReadOnlySystem= uses fs namespaces to mount /usr and /boot read-only for
a service.

ProtectedHome= uses fs namespaces to mount /home and /run/user
inaccessible or read-only for a service.

This patch also enables these settings for all our long-running services.

Together they should be good building block for a minimal service
sandbox, removing the ability for services to modify the operating
system or access the user's private data.

21 files changed:
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/core/namespace.c
src/core/namespace.h
src/test/test-ns.c
units/systemd-hostnamed.service.in
units/systemd-journal-gatewayd.service.in
units/systemd-journald.service.in
units/systemd-localed.service.in
units/systemd-logind.service.in
units/systemd-machined.service.in
units/systemd-networkd.service.in
units/systemd-resolved.service.in
units/systemd-timedated.service.in
units/systemd-timesyncd.service.in
units/systemd-udevd.service.in

index 3f27d13..3664303 100644 (file)
                                 capability sets as documented in
                                 <citerefentry><refentrytitle>cap_from_text</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
                                 Note that these capability sets are
-                                usually influenced by the capabilities
+                                usually influenced (and filtered) by the capabilities
                                 attached to the executed file. Due to
                                 that
                                 <varname>CapabilityBoundingSet=</varname>
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>ReadOnlySystem=</varname></term>
+
+                                <listitem><para>Takes a boolean
+                                argument. If true, mounts the
+                                <filename>/usr</filename> and
+                                <filename>/boot</filename> directories
+                                read-only for processes invoked by
+                                this unit. This setting ensures that
+                                any modification of the vendor
+                                supplied operating system is
+                                prohibited for the service. It is
+                                recommended to enable this setting for
+                                all long-running services, unless they
+                                are involved with system updates or
+                                need to modify the operating system in
+                                other ways. Note however, that
+                                processes retaining the CAP_SYS_ADMIN
+                                capability can undo the effect of this
+                                setting. This setting is hence
+                                particularly useful for daemons which
+                                have this capability removed, for
+                                example with
+                                <varname>CapabilityBoundingSet=</varname>. Defaults
+                                to off.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
+                                <term><varname>ProtectedHome=</varname></term>
+
+                                <listitem><para>Takes a boolean
+                                argument or
+                                <literal>read-only</literal>. If true,
+                                the directories
+                                <filename>/home</filename> and
+                                <filename>/run/user</filename> are
+                                made inaccessible and empty for
+                                processes invoked by this unit. If set
+                                to <literal>read-only</literal> the
+                                two directores are made read-only
+                                instead. It is recommended to enable
+                                this setting for all long-running
+                                services (in particular network-facing
+                                one), to ensure they cannot get access
+                                to private user data, unless the
+                                services actually require access to
+                                the user's private data. Note however,
+                                that processes retaining the
+                                CAP_SYS_ADMIN capability can undo the
+                                effect of this setting. This setting
+                                is hence particularly useful for
+                                daemons which have this capability
+                                removed, for example with
+                                <varname>CapabilityBoundingSet=</varname>. Defaults
+                                to off.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>MountFlags=</varname></term>
 
                                 <listitem><para>Takes a mount
                                 namespace related options
                                 (<varname>PrivateTmp=</varname>,
                                 <varname>PrivateDevices=</varname>,
+                                <varname>ReadOnlySystem=</varname>,
+                                <varname>ProtectedHome=</varname>,
                                 <varname>ReadOnlyDirectories=</varname>,
                                 <varname>InaccessibleDirectories=</varname>
                                 and
index 13b3d0d..2aa08c1 100644 (file)
@@ -35,6 +35,7 @@
 #include "capability.h"
 #include "env-util.h"
 #include "af-list.h"
+#include "namespace.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -44,6 +45,8 @@ BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_exec_output, exec_output, ExecOutp
 
 static BUS_DEFINE_PROPERTY_GET_ENUM(property_get_exec_input, exec_input, ExecInput);
 
+static BUS_DEFINE_PROPERTY_GET_ENUM(bus_property_get_protected_home, protected_home, ProtectedHome);
+
 static int property_get_environment_files(
                 sd_bus *bus,
                 const char *path,
@@ -626,6 +629,8 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ProtectedHome", "s", bus_property_get_protected_home, offsetof(ExecContext, protected_home), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("ReadOnlySystem", "b", bus_property_get_bool, offsetof(ExecContext, read_only_system), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SELinuxContext", "(bs)", property_get_selinux_context, 0, SD_BUS_VTABLE_PROPERTY_CONST),
index af8e7c7..ce8b9bc 100644 (file)
@@ -1569,7 +1569,9 @@ int exec_spawn(ExecCommand *command,
                     !strv_isempty(context->inaccessible_dirs) ||
                     context->mount_flags != 0 ||
                     (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
-                    context->private_devices) {
+                    context->private_devices ||
+                    context->read_only_system ||
+                    context->protected_home != PROTECTED_HOME_NO) {
 
                         char *tmp = NULL, *var = NULL;
 
@@ -1593,8 +1595,9 @@ int exec_spawn(ExecCommand *command,
                                         tmp,
                                         var,
                                         context->private_devices,
+                                        context->protected_home,
+                                        context->read_only_system,
                                         context->mount_flags);
-
                         if (err < 0) {
                                 r = EXIT_NAMESPACE;
                                 goto fail_child;
@@ -2111,6 +2114,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sPrivateTmp: %s\n"
                 "%sPrivateNetwork: %s\n"
                 "%sPrivateDevices: %s\n"
+                "%sProtectedHome: %s\n"
+                "%sReadOnlySystem: %s\n"
                 "%sIgnoreSIGPIPE: %s\n",
                 prefix, c->umask,
                 prefix, c->working_directory ? c->working_directory : "/",
@@ -2119,6 +2124,8 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->private_tmp),
                 prefix, yes_no(c->private_network),
                 prefix, yes_no(c->private_devices),
+                prefix, protected_home_to_string(c->protected_home),
+                prefix, yes_no(c->read_only_system),
                 prefix, yes_no(c->ignore_sigpipe));
 
         STRV_FOREACH(e, c->environment)
index c9e29ff..3d6f77c 100644 (file)
@@ -39,6 +39,7 @@ typedef struct ExecRuntime ExecRuntime;
 #include "set.h"
 #include "fdset.h"
 #include "missing.h"
+#include "namespace.h"
 
 typedef enum ExecInput {
         EXEC_INPUT_NULL,
@@ -156,6 +157,8 @@ struct ExecContext {
         bool private_tmp;
         bool private_network;
         bool private_devices;
+        bool read_only_system;
+        ProtectedHome protected_home;
 
         bool no_new_privileges;
 
index 4c066a8..97382d4 100644 (file)
@@ -80,6 +80,8 @@ $1.InaccessibleDirectories,      config_parse_namespace_path_strv,   0,
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateNetwork,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_network)
 $1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
+$1.ReadOnlySystem,               config_parse_bool,                  0,                             offsetof($1, exec_context.read_only_system)
+$1.ProtectedHome,                config_parse_protected_home,        0,                             offsetof($1, exec_context)
 $1.MountFlags,                   config_parse_exec_mount_flags,      0,                             offsetof($1, exec_context)
 $1.Personality,                  config_parse_personality,           0,                             offsetof($1, exec_context.personality)
 $1.RuntimeDirectoryMode,         config_parse_mode,                  0,                             offsetof($1, exec_context.runtime_directory_mode)
index 6403e41..9df7808 100644 (file)
@@ -3044,6 +3044,49 @@ int config_parse_no_new_privileges(
         return 0;
 }
 
+int config_parse_protected_home(
+                const char* unit,
+                const char *filename,
+                unsigned line,
+                const char *section,
+                unsigned section_line,
+                const char *lvalue,
+                int ltype,
+                const char *rvalue,
+                void *data,
+                void *userdata) {
+
+        ExecContext *c = data;
+        int k;
+
+        assert(filename);
+        assert(lvalue);
+        assert(rvalue);
+        assert(data);
+
+        /* Our enum shall be a superset of booleans, hence first try
+         * to parse as as boolean, and then as enum */
+
+        k = parse_boolean(rvalue);
+        if (k > 0)
+                c->protected_home = PROTECTED_HOME_YES;
+        else if (k == 0)
+                c->protected_home = PROTECTED_HOME_NO;
+        else {
+                ProtectedHome h;
+
+                h = protected_home_from_string(rvalue);
+                if (h < 0){
+                        log_syntax(unit, LOG_ERR, filename, line, -h, "Failed to parse protected home value, ignoring: %s", rvalue);
+                        return 0;
+                }
+
+                c->protected_home = h;
+        }
+
+        return 0;
+}
+
 #define FOLLOW_MAX 8
 
 static int open_follow(char **filename, FILE **_f, Set *names, char **_final) {
index 9ef9caa..279efa9 100644 (file)
@@ -97,6 +97,7 @@ int config_parse_set_status(const char *unit, const char *filename, unsigned lin
 int config_parse_namespace_path_strv(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_no_new_privileges(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_cpu_quota(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_protected_home(const char* unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 
 /* gperf prototypes */
 const struct ConfigPerfItem* load_fragment_gperf_lookup(const char *key, unsigned length);
index 9f15211..de09e9f 100644 (file)
@@ -331,6 +331,8 @@ int setup_namespace(
                 char* tmp_dir,
                 char* var_tmp_dir,
                 bool private_dev,
+                ProtectedHome protected_home,
+                bool read_only_system,
                 unsigned mount_flags) {
 
         BindMount *m, *mounts = NULL;
@@ -347,7 +349,9 @@ int setup_namespace(
                 strv_length(read_write_dirs) +
                 strv_length(read_only_dirs) +
                 strv_length(inaccessible_dirs) +
-                private_dev;
+                private_dev +
+                (protected_home != PROTECTED_HOME_NO ? 2 : 0) +
+                (read_only_system ? 2 : 0);
 
         if (n > 0) {
                 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
@@ -381,6 +385,18 @@ int setup_namespace(
                         m++;
                 }
 
+                if (protected_home != PROTECTED_HOME_NO) {
+                        r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user"), protected_home == PROTECTED_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+                        if (r < 0)
+                                return r;
+                }
+
+                if (read_only_system) {
+                        r = append_mounts(&m, STRV_MAKE("/usr", "-/boot"), READONLY);
+                        if (r < 0)
+                                return r;
+                }
+
                 assert(mounts + n == m);
 
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
@@ -581,3 +597,11 @@ fail:
 
         return r;
 }
+
+static const char *const protected_home_table[_PROTECTED_HOME_MAX] = {
+        [PROTECTED_HOME_NO] = "no",
+        [PROTECTED_HOME_YES] = "yes",
+        [PROTECTED_HOME_READ_ONLY] = "read-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protected_home, ProtectedHome);
index fb1fc6e..b985bdf 100644 (file)
 
 #include <stdbool.h>
 
+#include "macro.h"
+
+typedef enum ProtectedHome {
+        PROTECTED_HOME_NO,
+        PROTECTED_HOME_YES,
+        PROTECTED_HOME_READ_ONLY,
+        _PROTECTED_HOME_MAX,
+        _PROTECTED_HOME_INVALID = -1
+} ProtectedHome;
+
 int setup_namespace(char **read_write_dirs,
                     char **read_only_dirs,
                     char **inaccessible_dirs,
                     char *tmp_dir,
                     char *var_tmp_dir,
                     bool private_dev,
+                    ProtectedHome protected_home,
+                    bool read_only_system,
                     unsigned mount_flags);
 
 int setup_tmp_dirs(const char *id,
@@ -36,3 +48,6 @@ int setup_tmp_dirs(const char *id,
                   char **var_tmp_dir);
 
 int setup_netns(int netns_storage_socket[2]);
+
+const char* protected_home_to_string(ProtectedHome p) _const_;
+ProtectedHome protected_home_from_string(const char *s) _pure_;
index ad0d041..7158193 100644 (file)
@@ -60,6 +60,8 @@ int main(int argc, char *argv[]) {
                             tmp_dir,
                             var_tmp_dir,
                             true,
+                            PROTECTED_HOME_NO,
+                            false,
                             0);
         if (r < 0) {
                 log_error("Failed to setup namespace: %s", strerror(-r));
index 79e22c1..497b8d9 100644 (file)
@@ -18,3 +18,5 @@ WatchdogSec=1min
 PrivateTmp=yes
 PrivateDevices=yes
 PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
index e8e571e..3695240 100644 (file)
@@ -17,6 +17,8 @@ SupplementaryGroups=systemd-journal
 PrivateTmp=yes
 PrivateDevices=yes
 PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
 
 [Install]
 Also=systemd-journal-gatewayd.socket
index de93879..ba3f847 100644 (file)
@@ -20,6 +20,8 @@ RestartSec=0
 NotifyAccess=all
 StandardOutput=null
 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE CAP_SYS_PTRACE CAP_SYSLOG CAP_AUDIT_CONTROL CAP_CHOWN CAP_DAC_READ_SEARCH CAP_FOWNER CAP_SETUID CAP_SETGID
+ReadOnlySystem=yes
+ProtectedHome=yes
 WatchdogSec=1min
 
 # Increase the default a bit in order to allow many simultaneous
index ae1c5e5..e1792d6 100644 (file)
@@ -18,3 +18,5 @@ WatchdogSec=1min
 PrivateTmp=yes
 PrivateDevices=yes
 PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
index c6cbd1c..68803fb 100644 (file)
@@ -25,6 +25,8 @@ RestartSec=0
 BusName=org.freedesktop.login1
 CapabilityBoundingSet=CAP_SYS_ADMIN CAP_AUDIT_CONTROL CAP_CHOWN CAP_KILL CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_FOWNER CAP_SYS_TTY_CONFIG
 WatchdogSec=1min
+ReadOnlySystem=yes
+ProtectedHome=yes
 
 # Increase the default a bit in order to allow many simultaneous
 # logins since we keep one fd open per session.
index 1a27c6e..07522e1 100644 (file)
@@ -20,3 +20,5 @@ WatchdogSec=1min
 PrivateTmp=yes
 PrivateDevices=yes
 PrivateNetwork=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
index 3538295..a928999 100644 (file)
@@ -20,6 +20,8 @@ Restart=always
 RestartSec=0
 ExecStart=@rootlibexecdir@/systemd-networkd
 CapabilityBoundingSet=CAP_NET_ADMIN CAP_NET_BIND_SERVICE CAP_NET_BROADCAST CAP_NET_RAW CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
+ReadOnlySystem=yes
+ProtectedHome=yes
 WatchdogSec=1min
 
 [Install]
index 9d422ca..787fde2 100644 (file)
@@ -16,6 +16,8 @@ Restart=always
 RestartSec=0
 ExecStart=@rootlibexecdir@/systemd-resolved
 CapabilityBoundingSet=CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
+ReadOnlySystem=yes
+ProtectedHome=yes
 
 [Install]
 WantedBy=multi-user.target
index 5c90290..9658149 100644 (file)
@@ -16,3 +16,5 @@ BusName=org.freedesktop.timedate1
 CapabilityBoundingSet=CAP_SYS_TIME
 WatchdogSec=1min
 PrivateTmp=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
index cbde3ff..030e4a0 100644 (file)
@@ -23,6 +23,8 @@ ExecStart=@rootlibexecdir@/systemd-timesyncd
 CapabilityBoundingSet=CAP_SYS_TIME CAP_SETUID CAP_SETGID CAP_SETPCAP CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER
 PrivateTmp=yes
 PrivateDevices=yes
+ReadOnlySystem=yes
+ProtectedHome=yes
 WatchdogSec=1min
 
 [Install]
index ddee015..82275f0 100644 (file)
@@ -22,3 +22,5 @@ Restart=always
 RestartSec=0
 ExecStart=@rootlibexecdir@/systemd-udevd
 MountFlags=slave
+ReadOnlySystem=yes
+ProtectedHome=yes