chiark / gitweb /
main: add configuration option to alter capability bounding set for PID 1
authorLennart Poettering <lennart@poettering.net>
Thu, 24 May 2012 02:00:56 +0000 (04:00 +0200)
committerLennart Poettering <lennart@poettering.net>
Thu, 24 May 2012 02:00:56 +0000 (04:00 +0200)
This also ensures that caps dropped from the bounding set are also
dropped from the inheritable set, to be extra-secure. Usually that should
change very little though as the inheritable set is empty for all our uses
anyway.

12 files changed:
TODO
man/systemd.conf.xml
man/systemd.exec.xml
src/core/execute.c
src/core/load-fragment-gperf.gperf.m4
src/core/load-fragment.c
src/core/load-fragment.h
src/core/main.c
src/core/system.conf
src/nspawn/nspawn.c
src/shared/capability.c
src/shared/capability.h

diff --git a/TODO b/TODO
index 6553cb0..6d5eb73 100644 (file)
--- a/TODO
+++ b/TODO
@@ -23,7 +23,9 @@ Bugfixes:
 
 Features:
 
-* For Type=idle don't get confused by ExecStartPre= getting the effect of the idle hup but delaying jobs going away
+* make use of /sys/power/wake_lock in inhibitors
+
+* introduce "systemctl help" which invokes man for the man pages listed in Documentation=
 
 * drop accountsservice's StandardOutput=syslog and Type=dbus fields
 
@@ -140,8 +142,6 @@ Features:
 
 * There's currently no way to cancel fsck (used to be possible via C-c or c on the console)
 
-* keep an eye on https://bugzilla.gnome.org/show_bug.cgi?id=670100
-
 * journal: sanely deal with entries which are larger than the individual file size, but where the componets would fit
 
 * add command to systemctl to plot dependency graph as tree (see rhbz 795365)
@@ -152,9 +152,6 @@ Features:
 
 * default unix qlen is too small (10). bump sysctl? add sockopt?
 
-* support units generated by a generator and placed in /run/systemd/system/; the directory is
-  currently ignored because it is empty before the generatores are executed
-
 * Possibly, detect whether SysV init scripts can do reloading by looking for "echo Usage:" lines
 
 * figure out whether we should leave dbus around during shutdown
@@ -213,8 +210,6 @@ Features:
 
 * when an instanced service exits, remove its parent cgroup too if possible.
 
-* as Tom Gundersen pointed out there's a always a dep loop if people use crypto file systems with random keys
-
 * automatically escape unit names passed on the service (i.e. think "systemctl start serial-getty.service@serial/by-path/jshdfjsdfhkjh" being automatically escaped as necessary.
 
 * if we can not get user quota for tmpfs, mount a separate tmpfs instance
@@ -361,10 +356,6 @@ External:
    - allow disabling of UID passing for AUTH EXTERNAL
    - always pass cred data along each message
 
-* systemd --user
-    PR_SET_CHILD_REAPER patch: https://lkml.org/lkml/2011/7/28/426
-    (patch in linux-next, on the way to the next kernel)
-
 * fix alsa mixer restore to not print error when no config is stored
 
 * gnome-shell python script/glxinfo/is-accelerated must die
@@ -379,9 +370,6 @@ External:
   we are in 11-minutes-mode. When we trust the system time to NTP we also want
   the RTC to sync up.
 
-* patch kernel for cpu feature modalias for autoloading aes/kvm/...
-    (patches in linux-next, on the way to the next kernel)
-
 * kernel: add device_type = "fb", "fbcon" to class "graphics"
 
 Regularly:
index 7dfaa18..2659f9a 100644 (file)
                         </varlistentry>
 
                         <varlistentry>
+                                <term><varname>CapabilityBoundingSet=</varname></term>
+
+                                <listitem><para>Controls which
+                                capabilities to include in the
+                                capability bounding set for PID 1 and
+                                its children. See
+                                <citerefentry><refentrytitle>capabilities</refentrytitle><manvolnum>7</manvolnum></citerefentry>
+                                for details. Takes a whitespace
+                                separated list of capability names as
+                                read by
+                                <citerefentry><refentrytitle>cap_from_name</refentrytitle><manvolnum>3</manvolnum></citerefentry>.
+                                Capabilities listed will be included
+                                in the bounding set, all others are
+                                removed. If the list of capabilities
+                                is prefixed with ~ all but the listed
+                                capabilities will be included, the
+                                effect of the assignment
+                                inverted. Note that this option also
+                                effects the respective capabilities in
+                                the effective, permitted and
+                                inheritable capability sets. The
+                                capability bounding set may also be
+                                individually configured for units
+                                using the
+                                <varname>CapabilityBoundingSet=</varname>
+                                directive for units, but note that
+                                capabilities dropped for PID 1 cannot
+                                be regained in individual units, they
+                                are lost for good.</para></listitem>
+                        </varlistentry>
+
+                        <varlistentry>
                                 <term><varname>DefaultLimitCPU=</varname></term>
                                 <term><varname>DefaultLimitFSIZE=</varname></term>
                                 <term><varname>DefaultLimitDATA=</varname></term>
                                 <term><varname>DefaultLimitNICE=</varname></term>
                                 <term><varname>DefaultLimitRTPRIO=</varname></term>
                                 <term><varname>DefaultLimitRTTIME=</varname></term>
+
                                 <listitem><para>These settings control
-                                various default resource limits for units. See
+                                various default resource limits for
+                                units. See
                                 <citerefentry><refentrytitle>setrlimit</refentrytitle><manvolnum>2</manvolnum></citerefentry>
                                 for details. Use the string
                                 <varname>infinity</varname> to
                                 configure no limit on a specific
-                               resource. They can be overriden in units files
-                               using corresponding LimitXXXX parameter.</para></listitem>
+                                resource. These settings may be
+                                overriden in individual units
+                                using the corresponding LimitXXX=
+                                directives. Note that these resource
+                                limits are only defaults for units,
+                                they are not applied to PID 1
+                                itself.</para></listitem>
                         </varlistentry>
                 </variablelist>
         </refsect1>
index 219733b..0dc2ed4 100644 (file)
                                 is prefixed with ~ all but the listed
                                 capabilities will be included, the
                                 effect of the assignment
-                                inverted. Note that this option does
-                                not actually set or unset any
-                                capabilities in the effective,
-                                permitted or inherited capability
-                                sets. That's what
-                                <varname>Capabilities=</varname> is
-                                for. If this option is not used the
+                                inverted. Note that this option also
+                                effects the respective capabilities in
+                                the effective, permitted and
+                                inheritable capability sets, on top of
+                                what <varname>Capabilities=</varname>
+                                does. If this option is not used the
                                 capability bounding set is not
                                 modified on process execution, hence
                                 no limits on the capabilities of the
-                                process are enforced.</para></listitem>
+                                process are
+                                enforced.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
index bb841b7..9c2006e 100644 (file)
@@ -870,68 +870,6 @@ fail:
 }
 #endif
 
-static int do_capability_bounding_set_drop(uint64_t drop) {
-        unsigned long i;
-        cap_t old_cap = NULL, new_cap = NULL;
-        cap_flag_value_t fv;
-        int r;
-
-        /* If we are run as PID 1 we will lack CAP_SETPCAP by default
-         * in the effective set (yes, the kernel drops that when
-         * executing init!), so get it back temporarily so that we can
-         * call PR_CAPBSET_DROP. */
-
-        old_cap = cap_get_proc();
-        if (!old_cap)
-                return -errno;
-
-        if (cap_get_flag(old_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
-                r = -errno;
-                goto finish;
-        }
-
-        if (fv != CAP_SET) {
-                static const cap_value_t v = CAP_SETPCAP;
-
-                new_cap = cap_dup(old_cap);
-                if (!new_cap) {
-                        r = -errno;
-                        goto finish;
-                }
-
-                if (cap_set_flag(new_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
-                        r = -errno;
-                        goto finish;
-                }
-
-                if (cap_set_proc(new_cap) < 0) {
-                        r = -errno;
-                        goto finish;
-                }
-        }
-
-        for (i = 0; i <= cap_last_cap(); i++)
-                if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
-                        if (prctl(PR_CAPBSET_DROP, i) < 0) {
-                                r = -errno;
-                                goto finish;
-                        }
-                }
-
-        r = 0;
-
-finish:
-        if (new_cap)
-                cap_free(new_cap);
-
-        if (old_cap) {
-                cap_set_proc(old_cap);
-                cap_free(old_cap);
-        }
-
-        return r;
-}
-
 static void rename_process_from_path(const char *path) {
         char process_name[11];
         const char *p;
@@ -1398,7 +1336,7 @@ int exec_spawn(ExecCommand *command,
                         }
 
                         if (context->capability_bounding_set_drop) {
-                                err = do_capability_bounding_set_drop(context->capability_bounding_set_drop);
+                                err = capability_bounding_set_drop(context->capability_bounding_set_drop, false);
                                 if (err < 0) {
                                         r = EXIT_CAPABILITIES;
                                         goto fail_child;
index ea502bd..6f2a0d6 100644 (file)
@@ -46,7 +46,7 @@ $1.SyslogLevel,                  config_parse_level,                 0,
 $1.SyslogLevelPrefix,            config_parse_bool,                  0,                             offsetof($1, exec_context.syslog_level_prefix)
 $1.Capabilities,                 config_parse_exec_capabilities,     0,                             offsetof($1, exec_context)
 $1.SecureBits,                   config_parse_exec_secure_bits,      0,                             offsetof($1, exec_context)
-$1.CapabilityBoundingSet,        config_parse_exec_bounding_set,     0,                             offsetof($1, exec_context)
+$1.CapabilityBoundingSet,        config_parse_bounding_set,          0,                             offsetof($1, exec_context.capability_bounding_set_drop)
 $1.TimerSlackNSec,               config_parse_exec_timer_slack_nsec, 0,                             offsetof($1, exec_context)
 $1.LimitCPU,                     config_parse_limit,                 RLIMIT_CPU,                    offsetof($1, exec_context.rlimit)
 $1.LimitFSIZE,                   config_parse_limit,                 RLIMIT_FSIZE,                  offsetof($1, exec_context.rlimit)
index d226772..ff6e13e 100644 (file)
@@ -931,7 +931,7 @@ int config_parse_exec_secure_bits(
         return 0;
 }
 
-int config_parse_exec_bounding_set(
+int config_parse_bounding_set(
                 const char *filename,
                 unsigned line,
                 const char *section,
@@ -941,7 +941,7 @@ int config_parse_exec_bounding_set(
                 void *data,
                 void *userdata) {
 
-        ExecContext *c = data;
+        uint64_t *capability_bounding_set_drop = data;
         char *w;
         size_t l;
         char *state;
@@ -968,7 +968,8 @@ int config_parse_exec_bounding_set(
                 int r;
                 cap_value_t cap;
 
-                if (!(t = strndup(w, l)))
+                t = strndup(w, l);
+                if (!t)
                         return -ENOMEM;
 
                 r = cap_from_name(t, &cap);
@@ -983,9 +984,9 @@ int config_parse_exec_bounding_set(
         }
 
         if (invert)
-                c->capability_bounding_set_drop |= sum;
+                *capability_bounding_set_drop |= sum;
         else
-                c->capability_bounding_set_drop |= ~sum;
+                *capability_bounding_set_drop |= ~sum;
 
         return 0;
 }
@@ -2447,7 +2448,7 @@ void unit_dump_config_items(FILE *f) {
                 { config_parse_level,                 "LEVEL" },
                 { config_parse_exec_capabilities,     "CAPABILITIES" },
                 { config_parse_exec_secure_bits,      "SECUREBITS" },
-                { config_parse_exec_bounding_set,     "BOUNDINGSET" },
+                { config_parse_bounding_set,          "BOUNDINGSET" },
                 { config_parse_exec_timer_slack_nsec, "TIMERSLACK" },
                 { config_parse_limit,                 "LIMIT" },
                 { config_parse_unit_cgroup,           "CGROUP [...]" },
index 3b2ed09..6b38249 100644 (file)
@@ -56,7 +56,7 @@ int config_parse_exec_cpu_sched_prio(const char *filename, unsigned line, const
 int config_parse_exec_cpu_affinity(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_capabilities(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_secure_bits(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
-int config_parse_exec_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
+int config_parse_bounding_set(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_exec_timer_slack_nsec(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
 int config_parse_unit_cgroup(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
index 4d9a2d4..9248c38 100644 (file)
@@ -50,6 +50,7 @@
 #include "watchdog.h"
 #include "path-util.h"
 #include "switch-root.h"
+#include "capability.h"
 
 #include "mount-setup.h"
 #include "loopback-setup.h"
@@ -88,6 +89,7 @@ static ExecOutput arg_default_std_error = EXEC_OUTPUT_INHERIT;
 static usec_t arg_runtime_watchdog = 0;
 static usec_t arg_shutdown_watchdog = 10 * USEC_PER_MINUTE;
 static struct rlimit *arg_default_rlimit[RLIMIT_NLIMITS] = {};
+static uint64_t arg_capability_bounding_set_drop = 0;
 
 static FILE* serialization = NULL;
 
@@ -678,6 +680,7 @@ static int parse_config_file(void) {
                 { "Manager", "JoinControllers",       config_parse_join_controllers, 0, &arg_join_controllers },
                 { "Manager", "RuntimeWatchdogSec",    config_parse_usec,         0, &arg_runtime_watchdog    },
                 { "Manager", "ShutdownWatchdogSec",   config_parse_usec,         0, &arg_shutdown_watchdog   },
+                { "Manager", "CapabilityBoundingSet", config_parse_bounding_set, 0, &arg_capability_bounding_set_drop },
                 { "Manager", "DefaultLimitCPU",       config_parse_limit,        0, &arg_default_rlimit[RLIMIT_CPU]},
                 { "Manager", "DefaultLimitFSIZE",     config_parse_limit,        0, &arg_default_rlimit[RLIMIT_FSIZE]},
                 { "Manager", "DefaultLimitDATA",      config_parse_limit,        0, &arg_default_rlimit[RLIMIT_DATA]},
@@ -1484,6 +1487,14 @@ int main(int argc, char *argv[]) {
         if (arg_running_as == MANAGER_SYSTEM && arg_runtime_watchdog > 0)
                 watchdog_set_timeout(&arg_runtime_watchdog);
 
+        if (arg_capability_bounding_set_drop) {
+                r = capability_bounding_set_drop(arg_capability_bounding_set_drop, true);
+                if (r < 0) {
+                        log_error("Failed to drop capability bounding set: %s", strerror(-r));
+                        goto finish;
+                }
+        }
+
         r = manager_new(arg_running_as, &m);
         if (r < 0) {
                 log_error("Failed to allocate manager object: %s", strerror(-r));
index 2b14d3e..7b9171b 100644 (file)
 #JoinControllers=cpu,cpuacct
 #RuntimeWatchdogSec=0
 #ShutdownWatchdogSec=10min
+#CapabilityBoundingSet=
+#DefaultLimitCPU=
+#DefaultLimitFSIZE=
+#DefaultLimitDATA=
+#DefaultLimitSTACK=
+#DefaultLimitCORE=
+#DefaultLimitRSS=
+#DefaultLimitNOFILE=
+#DefaultLimitAS=
+#DefaultLimitNPROC=
+#DefaultLimitMEMLOCK=
+#DefaultLimitLOCKS=
+#DefaultLimitSIGPENDING=
+#DefaultLimitMSGQUEUE=
+#DefaultLimitNICE=
+#DefaultLimitRTPRIO=
+#DefaultLimitRTTIME=
index 31e8b01..8a5eb34 100644 (file)
@@ -544,49 +544,31 @@ static int setup_hostname(void) {
 }
 
 static int drop_capabilities(void) {
-        static const unsigned long retain[] = {
-                CAP_CHOWN,
-                CAP_DAC_OVERRIDE,
-                CAP_DAC_READ_SEARCH,
-                CAP_FOWNER,
-                CAP_FSETID,
-                CAP_IPC_OWNER,
-                CAP_KILL,
-                CAP_LEASE,
-                CAP_LINUX_IMMUTABLE,
-                CAP_NET_BIND_SERVICE,
-                CAP_NET_BROADCAST,
-                CAP_NET_RAW,
-                CAP_SETGID,
-                CAP_SETFCAP,
-                CAP_SETPCAP,
-                CAP_SETUID,
-                CAP_SYS_ADMIN,
-                CAP_SYS_CHROOT,
-                CAP_SYS_NICE,
-                CAP_SYS_PTRACE,
-                CAP_SYS_TTY_CONFIG
-        };
-
-        unsigned long l;
-
-        for (l = 0; l <= cap_last_cap(); l++) {
-                unsigned i;
-
-                for (i = 0; i < ELEMENTSOF(retain); i++)
-                        if (retain[i] == l)
-                                break;
-
-                if (i < ELEMENTSOF(retain))
-                        continue;
 
-                if (prctl(PR_CAPBSET_DROP, l) < 0) {
-                        log_error("PR_CAPBSET_DROP failed: %m");
-                        return -errno;
-                }
-        }
-
-        return 0;
+        static const uint64_t retain =
+                (1ULL << CAP_CHOWN) |
+                (1ULL << CAP_DAC_OVERRIDE) |
+                (1ULL << CAP_DAC_READ_SEARCH) |
+                (1ULL << CAP_FOWNER) |
+                (1ULL << CAP_FSETID) |
+                (1ULL << CAP_IPC_OWNER) |
+                (1ULL << CAP_KILL) |
+                (1ULL << CAP_LEASE) |
+                (1ULL << CAP_LINUX_IMMUTABLE) |
+                (1ULL << CAP_NET_BIND_SERVICE) |
+                (1ULL << CAP_NET_BROADCAST) |
+                (1ULL << CAP_NET_RAW) |
+                (1ULL << CAP_SETGID) |
+                (1ULL << CAP_SETFCAP) |
+                (1ULL << CAP_SETPCAP) |
+                (1ULL << CAP_SETUID) |
+                (1ULL << CAP_SYS_ADMIN) |
+                (1ULL << CAP_SYS_CHROOT) |
+                (1ULL << CAP_SYS_NICE) |
+                (1ULL << CAP_SYS_PTRACE) |
+                (1ULL << CAP_SYS_TTY_CONFIG);
+
+        return capability_bounding_set_drop(~retain, false);
 }
 
 static int is_os_tree(const char *path) {
@@ -1041,8 +1023,10 @@ int main(int argc, char *argv[]) {
 
                 loopback_setup();
 
-                if (drop_capabilities() < 0)
+                if (drop_capabilities() < 0) {
+                        log_error("drop_capabilities() failed: %m");
                         goto child_fail;
+                }
 
                 if (arg_user) {
 
index b2cd9ed..08b7a20 100644 (file)
@@ -40,7 +40,8 @@ int have_effective_cap(int value) {
         cap_flag_value_t fv;
         int r;
 
-        if (!(cap = cap_get_proc()))
+        cap = cap_get_proc();
+        if (!cap)
                 return -errno;
 
         if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
@@ -84,3 +85,89 @@ unsigned long cap_last_cap(void) {
 
         return p;
 }
+
+int capability_bounding_set_drop(uint64_t drop, bool right_now) {
+        unsigned long i;
+        cap_t after_cap = NULL, temp_cap = NULL;
+        cap_flag_value_t fv;
+        int r;
+
+        /* If we are run as PID 1 we will lack CAP_SETPCAP by default
+         * in the effective set (yes, the kernel drops that when
+         * executing init!), so get it back temporarily so that we can
+         * call PR_CAPBSET_DROP. */
+
+        after_cap = cap_get_proc();
+        if (!after_cap)
+                return -errno;
+
+        if (cap_get_flag(after_cap, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0) {
+                cap_free(after_cap);
+                return -errno;
+        }
+
+        if (fv != CAP_SET) {
+                static const cap_value_t v = CAP_SETPCAP;
+
+                temp_cap = cap_dup(after_cap);
+                if (!temp_cap) {
+                        r = -errno;
+                        goto finish;
+                }
+
+                if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
+
+                if (cap_set_proc(temp_cap) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
+        }
+
+        for (i = 0; i <= cap_last_cap(); i++) {
+
+                if (drop & ((uint64_t) 1ULL << (uint64_t) i)) {
+                        cap_value_t v;
+
+                        /* Drop it from the bounding set */
+                        if (prctl(PR_CAPBSET_DROP, i) < 0) {
+                                r = -errno;
+                                goto finish;
+                        }
+                        v = i;
+
+                        /* Also drop it from the inheritable set, so
+                         * that anything we exec() loses the
+                         * capability for good. */
+                        if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
+                                r = -errno;
+                                goto finish;
+                        }
+
+                        /* If we shall apply this right now drop it
+                         * also from our own capability sets. */
+                        if (right_now) {
+                                if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
+                                    cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
+                                        r = -errno;
+                                        goto finish;
+                                }
+                        }
+                }
+        }
+
+        r = 0;
+
+finish:
+        if (temp_cap)
+                cap_free(temp_cap);
+
+        if (after_cap) {
+                cap_set_proc(after_cap);
+                cap_free(after_cap);
+        }
+
+        return r;
+}
index 9f9c49c..0cc5dd0 100644 (file)
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <inttypes.h>
+#include <stdbool.h>
+
 unsigned long cap_last_cap(void);
 int have_effective_cap(int value);
+int capability_bounding_set_drop(uint64_t caps, bool right_now);
+
 #endif