chiark / gitweb /
exec: introduce PrivateDevices= switch to provide services with a private /dev
authorLennart Poettering <lennart@poettering.net>
Mon, 20 Jan 2014 18:54:51 +0000 (19:54 +0100)
committerLennart Poettering <lennart@poettering.net>
Mon, 20 Jan 2014 20:28:37 +0000 (21:28 +0100)
Similar to PrivateNetwork=, PrivateTmp= introduce PrivateDevices= that
sets up a private /dev with only the API pseudo-devices like /dev/null,
/dev/zero, /dev/random, but not any physical devices in them.

13 files changed:
man/systemd.exec.xml
src/core/dbus-execute.c
src/core/execute.c
src/core/execute.h
src/core/load-fragment-gperf.gperf.m4
src/core/mount-setup.c
src/core/namespace.c
src/core/namespace.h
src/nspawn/nspawn.c
src/shared/def.h
src/shared/dev-setup.c
src/shared/dev-setup.h
src/test/test-ns.c

index 610c821dc095025117444e72bb5d9cfee1964970..7eaf52bc5bd59571a924cb613a67103d32dd623a 100644 (file)
                                 for details.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><varname>PrivateDevices=</varname></term>
+
+                                <listitem><para>Takes a boolean
+                                argument. If true, sets up a new /dev
+                                namespace for the executed processes
+                                and only adds API pseudo devices such
+                                as <filename>/dev/null</filename>,
+                                <filename>/dev/zero</filename> or
+                                <filename>/dev/random</filename> to
+                                it, but no physical devices such as
+                                <filename>/dev/sda</filename>. This is
+                                useful to securely turn off physical
+                                device access by the executed
+                                process. Defaults to
+                                false.</para></listitem>
+                        </varlistentry>
+
                         <varlistentry>
                                 <term><varname>MountFlags=</varname></term>
 
index 4e9529708d1742a859bb524b2759f831a0b87ad4..edf84f89fd6747b102b9ddb530faf03a86ef7e92 100644 (file)
@@ -415,6 +415,7 @@ const sd_bus_vtable bus_exec_vtable[] = {
         SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST),
+        SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST),
         SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST),
index 4317afad8ad0531fcb6d373e5c6078e574a0d6f1..91e4352f9a0a44cb045b16fb96561d03fa1a530b 100644 (file)
@@ -1427,7 +1427,8 @@ int exec_spawn(ExecCommand *command,
                     !strv_isempty(context->read_only_dirs) ||
                     !strv_isempty(context->inaccessible_dirs) ||
                     context->mount_flags != 0 ||
-                    (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) {
+                    (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) ||
+                    context->private_devices) {
 
                         char *tmp = NULL, *var = NULL;
 
@@ -1450,6 +1451,7 @@ int exec_spawn(ExecCommand *command,
                                         context->inaccessible_dirs,
                                         tmp,
                                         var,
+                                        context->private_devices,
                                         context->mount_flags);
 
                         if (err < 0) {
@@ -1896,6 +1898,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 "%sNonBlocking: %s\n"
                 "%sPrivateTmp: %s\n"
                 "%sPrivateNetwork: %s\n"
+                "%sPrivateDevices: %s\n"
                 "%sIgnoreSIGPIPE: %s\n",
                 prefix, c->umask,
                 prefix, c->working_directory ? c->working_directory : "/",
@@ -1903,6 +1906,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) {
                 prefix, yes_no(c->non_blocking),
                 prefix, yes_no(c->private_tmp),
                 prefix, yes_no(c->private_network),
+                prefix, yes_no(c->private_devices),
                 prefix, yes_no(c->ignore_sigpipe));
 
         STRV_FOREACH(e, c->environment)
index 989373f4818f20a9c40c364dccd5fdb32cee312e..4851152743ba7fd9e9e870fe6ebc6371410056b5 100644 (file)
@@ -149,6 +149,7 @@ struct ExecContext {
         bool non_blocking;
         bool private_tmp;
         bool private_network;
+        bool private_devices;
 
         bool no_new_privileges;
 
index a5033b224b564d44c49afba7dea04b03bb047e83..59b2a645d02f91bcd10bffc4c1fd91c59a9cf866 100644 (file)
@@ -71,6 +71,7 @@ $1.ReadOnlyDirectories,          config_parse_path_strv,             0,
 $1.InaccessibleDirectories,      config_parse_path_strv,             0,                             offsetof($1, exec_context.inaccessible_dirs)
 $1.PrivateTmp,                   config_parse_bool,                  0,                             offsetof($1, exec_context.private_tmp)
 $1.PrivateNetwork,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_network)
+$1.PrivateDevices,               config_parse_bool,                  0,                             offsetof($1, exec_context.private_devices)
 $1.MountFlags,                   config_parse_exec_mount_flags,      0,                             offsetof($1, exec_context)
 $1.TCPWrapName,                  config_parse_unit_string_printf,    0,                             offsetof($1, exec_context.tcpwrap_name)
 $1.PAMName,                      config_parse_unit_string_printf,    0,                             offsetof($1, exec_context.pam_name)
index c601c9742c7d3fd249a54d59093b01f075c61c2a..387030abfd6e7106a382a54e5d03c404e4b31c53 100644 (file)
 #include "virt.h"
 #include "efivars.h"
 #include "smack-util.h"
-
-#ifndef TTY_GID
-#define TTY_GID 5
-#endif
+#include "def.h"
 
 typedef enum MountMode {
         MNT_NONE  =        0,
index 85147be13017c0fab13f97c0d58291b7e17dab34..c034bfd161f0a287deb027302fe523dbb0df564b 100644 (file)
@@ -39,6 +39,9 @@
 #include "missing.h"
 #include "execute.h"
 #include "loopback-setup.h"
+#include "mkdir.h"
+#include "dev-setup.h"
+#include "def.h"
 
 typedef enum MountMode {
         /* This is ordered by priority! */
@@ -46,6 +49,7 @@ typedef enum MountMode {
         READONLY,
         PRIVATE_TMP,
         PRIVATE_VAR_TMP,
+        PRIVATE_DEV,
         READWRITE
 } MountMode;
 
@@ -129,6 +133,77 @@ static void drop_duplicates(BindMount *m, unsigned *n) {
         *n = t - m;
 }
 
+static int mount_dev(BindMount *m) {
+        static const char devnodes[] =
+                "/dev/null\0"
+                "/dev/zero\0"
+                "/dev/full\0"
+                "/dev/random\0"
+                "/dev/urandom\0"
+                "/dev/tty\0";
+
+        struct stat devnodes_stat[6] = {};
+        const char *d;
+        unsigned n = 0;
+        _cleanup_umask_ mode_t u;
+        int r;
+
+        assert(m);
+
+        u = umask(0000);
+
+        /* First: record device mode_t and dev_t */
+        NULSTR_FOREACH(d, devnodes) {
+                r = stat(d, &devnodes_stat[n]);
+                if (r < 0) {
+                        if (errno != ENOENT)
+                                return -errno;
+                } else {
+                        if (!S_ISBLK(devnodes_stat[n].st_mode) &&
+                            !S_ISCHR(devnodes_stat[n].st_mode))
+                                return -EINVAL;
+                }
+
+                n++;
+        }
+
+        assert(n == ELEMENTSOF(devnodes_stat));
+
+        r = mount("tmpfs", "/dev", "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755");
+        if (r < 0)
+                return m->ignore ? 0 : -errno;
+
+
+        mkdir_p("/dev/pts", 0755);
+
+        r = mount("devpts", "/dev/pts", "devpts", MS_NOSUID|MS_NOEXEC, "newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID));
+        if (r < 0)
+                return m->ignore ? 0 : -errno;
+
+        mkdir_p("/dev/shm", 0755);
+
+        r = mount("tmpfs", "/dev/shm", "tmpfs", MS_NOSUID|MS_NODEV|MS_STRICTATIME, "mode=1777");
+        if (r < 0)
+                return m->ignore ? 0 : -errno;
+
+        /* Second: actually create it */
+        n = 0;
+        NULSTR_FOREACH(d, devnodes) {
+                if (devnodes_stat[n].st_rdev == 0)
+                        continue;
+
+                r = mknod(d, devnodes_stat[n].st_mode, devnodes_stat[n].st_rdev);
+                if (r < 0)
+                        return m->ignore ? 0 : -errno;
+
+                n++;
+        }
+
+        dev_setup(NULL);
+
+        return 0;
+}
+
 static int apply_mount(
                 BindMount *m,
                 const char *tmp_dir,
@@ -141,6 +216,9 @@ static int apply_mount(
 
         switch (m->mode) {
 
+        case PRIVATE_DEV:
+                return mount_dev(m);
+
         case INACCESSIBLE:
                 what = "/run/systemd/inaccessible";
                 break;
@@ -194,6 +272,7 @@ int setup_namespace(
                 char** inaccessible_dirs,
                 char* tmp_dir,
                 char* var_tmp_dir,
+                bool private_dev,
                 unsigned mount_flags) {
 
         BindMount *m, *mounts = NULL;
@@ -209,7 +288,8 @@ int setup_namespace(
         n = !!tmp_dir + !!var_tmp_dir +
                 strv_length(read_write_dirs) +
                 strv_length(read_only_dirs) +
-                strv_length(inaccessible_dirs);
+                strv_length(inaccessible_dirs) +
+                private_dev;
 
         if (n > 0) {
                 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
@@ -237,6 +317,12 @@ int setup_namespace(
                         m++;
                 }
 
+                if (private_dev) {
+                        m->path = "/dev";
+                        m->mode = PRIVATE_DEV;
+                        m++;
+                }
+
                 assert(mounts + n == m);
 
                 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
index af0c01df9a0102ad6bdef55d4496d9545d0f14bb..fb1fc6ec0dadadd5a23d04d933a49b4c046b4bcf 100644 (file)
@@ -28,6 +28,7 @@ int setup_namespace(char **read_write_dirs,
                     char **inaccessible_dirs,
                     char *tmp_dir,
                     char *var_tmp_dir,
+                    bool private_dev,
                     unsigned mount_flags);
 
 int setup_tmp_dirs(const char *id,
index caf1aa9bedfee1373d5aca396cbf26a1dc50d37f..38ec89b9a69f9e1f6d646d3376922f3b28d78a9d 100644 (file)
 #include "ptyfwd.h"
 #include "bus-kernel.h"
 #include "env-util.h"
-
-#ifndef TTY_GID
-#define TTY_GID 5
-#endif
+#include "def.h"
 
 typedef enum LinkJournal {
         LINK_NO,
@@ -110,7 +107,8 @@ static uint64_t arg_retain =
         (1ULL << CAP_SYS_RESOURCE) |
         (1ULL << CAP_SYS_BOOT) |
         (1ULL << CAP_AUDIT_WRITE) |
-        (1ULL << CAP_AUDIT_CONTROL);
+        (1ULL << CAP_AUDIT_CONTROL) |
+        (1ULL << CAP_MKNOD);
 static char **arg_bind = NULL;
 static char **arg_bind_ro = NULL;
 static char **arg_setenv = NULL;
@@ -639,40 +637,30 @@ static int copy_devnodes(const char *dest) {
         u = umask(0000);
 
         NULSTR_FOREACH(d, devnodes) {
-                struct stat st;
                 _cleanup_free_ char *from = NULL, *to = NULL;
+                struct stat st;
 
-                asprintf(&from, "/dev/%s", d);
-                asprintf(&to, "%s/dev/%s", dest, d);
-
-                if (!from || !to) {
-                        log_oom();
-
-                        if (r == 0)
-                                r = -ENOMEM;
-
-                        break;
-                }
+                from = strappend("/dev/", d);
+                to = strjoin(dest, "/dev/", d, NULL);
+                if (!from || !to)
+                        return log_oom();
 
                 if (stat(from, &st) < 0) {
 
                         if (errno != ENOENT) {
                                 log_error("Failed to stat %s: %m", from);
-                                if (r == 0)
-                                        r = -errno;
+                                return -errno;
                         }
 
                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 
                         log_error("%s is not a char or block device, cannot copy", from);
-                        if (r == 0)
-                                r = -EIO;
+                        return -EIO;
 
                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
 
                         log_error("mknod(%s) failed: %m", dest);
-                        if (r == 0)
-                                r = -errno;
+                        return  -errno;
                 }
         }
 
index ac325bf8f96028d005ade948b0d55dde85e48840..a2304fddda8beef06a68b7f2dcce3efe450371be 100644 (file)
@@ -71,3 +71,7 @@
 
 #define UNIX_USER_BUS_FMT "unix:path=%s/bus"
 #define KERNEL_USER_BUS_FMT "kernel:path=/dev/kdbus/%lu-user/bus"
+
+#ifndef TTY_GID
+#define TTY_GID 5
+#endif
index 50a187fda914c764a0493bbee9ff5c2ee27805f8..e025e17bbeb014b341694be34af246e930ac0655 100644 (file)
@@ -50,7 +50,7 @@ static int symlink_and_label(const char *old_path, const char *new_path) {
         return r;
 }
 
-void dev_setup(const char *prefix) {
+int dev_setup(const char *prefix) {
         const char *j, *k;
 
         static const char symlinks[] =
@@ -69,16 +69,16 @@ void dev_setup(const char *prefix) {
                 }
 
                 if (prefix) {
-                        char *linkname;
+                        _cleanup_free_ char *link_name = NULL;
 
-                        if (asprintf(&linkname, "%s/%s", prefix, k) < 0) {
-                                log_oom();
-                                break;
-                        }
+                        link_name = strjoin(prefix, "/", k, NULL);
+                        if (!link_name)
+                                return -ENOMEM;
 
-                        symlink_and_label(j, linkname);
-                        free(linkname);
+                        symlink_and_label(j, link_name);
                 } else
                         symlink_and_label(j, k);
         }
+
+        return 0;
 }
index 320c0b30ba040a97476feb0b8a4bec9f93e300e8..d41b6eefbae897c378e4efe02911d0d3e8c91d71 100644 (file)
@@ -21,4 +21,4 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
-void dev_setup(const char *pathprefix);
+int dev_setup(const char *pathprefix);
index 37d0998e5e90ea4b0dfe25a557ff3bc337b01826..ad0d0419c424f41311a9c5264283a70c1ea0485b 100644 (file)
@@ -59,6 +59,7 @@ int main(int argc, char *argv[]) {
                             (char **) inaccessible,
                             tmp_dir,
                             var_tmp_dir,
+                            true,
                             0);
         if (r < 0) {
                 log_error("Failed to setup namespace: %s", strerror(-r));