From 7f112f50fea585411ea2d493b3582bea77eb4d6e Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 20 Jan 2014 19:54:51 +0100 Subject: [PATCH] exec: introduce PrivateDevices= switch to provide services with a private /dev Similar to PrivateNetwork=, PrivateTmp= introduce PrivateDevices= that sets up a private /dev with only the API pseudo-devices like /dev/null, /dev/zero, /dev/random, but not any physical devices in them. --- man/systemd.exec.xml | 18 ++++++ src/core/dbus-execute.c | 1 + src/core/execute.c | 6 +- src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/mount-setup.c | 5 +- src/core/namespace.c | 88 ++++++++++++++++++++++++++- src/core/namespace.h | 1 + src/nspawn/nspawn.c | 34 ++++------- src/shared/def.h | 4 ++ src/shared/dev-setup.c | 16 ++--- src/shared/dev-setup.h | 2 +- src/test/test-ns.c | 1 + 13 files changed, 140 insertions(+), 38 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 610c821dc..7eaf52bc5 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -894,6 +894,24 @@ for details. + + PrivateDevices= + + Takes a boolean + argument. If true, sets up a new /dev + namespace for the executed processes + and only adds API pseudo devices such + as /dev/null, + /dev/zero or + /dev/random to + it, but no physical devices such as + /dev/sda. This is + useful to securely turn off physical + device access by the executed + process. Defaults to + false. + + MountFlags= diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 4e9529708..edf84f89f 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -415,6 +415,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("MountFlags", "t", bus_property_get_ulong, offsetof(ExecContext, mount_flags), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateTmp", "b", bus_property_get_bool, offsetof(ExecContext, private_tmp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("SameProcessGroup", "b", bus_property_get_bool, offsetof(ExecContext, same_pgrp), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("UtmpIdentifier", "s", NULL, offsetof(ExecContext, utmp_id), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("IgnoreSIGPIPE", "b", bus_property_get_bool, offsetof(ExecContext, ignore_sigpipe), SD_BUS_VTABLE_PROPERTY_CONST), diff --git a/src/core/execute.c b/src/core/execute.c index 4317afad8..91e4352f9 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1427,7 +1427,8 @@ int exec_spawn(ExecCommand *command, !strv_isempty(context->read_only_dirs) || !strv_isempty(context->inaccessible_dirs) || context->mount_flags != 0 || - (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir))) { + (context->private_tmp && runtime && (runtime->tmp_dir || runtime->var_tmp_dir)) || + context->private_devices) { char *tmp = NULL, *var = NULL; @@ -1450,6 +1451,7 @@ int exec_spawn(ExecCommand *command, context->inaccessible_dirs, tmp, var, + context->private_devices, context->mount_flags); if (err < 0) { @@ -1896,6 +1898,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { "%sNonBlocking: %s\n" "%sPrivateTmp: %s\n" "%sPrivateNetwork: %s\n" + "%sPrivateDevices: %s\n" "%sIgnoreSIGPIPE: %s\n", prefix, c->umask, prefix, c->working_directory ? c->working_directory : "/", @@ -1903,6 +1906,7 @@ void exec_context_dump(ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->non_blocking), prefix, yes_no(c->private_tmp), prefix, yes_no(c->private_network), + prefix, yes_no(c->private_devices), prefix, yes_no(c->ignore_sigpipe)); STRV_FOREACH(e, c->environment) diff --git a/src/core/execute.h b/src/core/execute.h index 989373f48..485115274 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -149,6 +149,7 @@ struct ExecContext { bool non_blocking; bool private_tmp; bool private_network; + bool private_devices; bool no_new_privileges; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index a5033b224..59b2a645d 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -71,6 +71,7 @@ $1.ReadOnlyDirectories, config_parse_path_strv, 0, $1.InaccessibleDirectories, config_parse_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs) $1.PrivateTmp, config_parse_bool, 0, offsetof($1, exec_context.private_tmp) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) +$1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) $1.MountFlags, config_parse_exec_mount_flags, 0, offsetof($1, exec_context) $1.TCPWrapName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.tcpwrap_name) $1.PAMName, config_parse_unit_string_printf, 0, offsetof($1, exec_context.pam_name) diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index c601c9742..387030abf 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -43,10 +43,7 @@ #include "virt.h" #include "efivars.h" #include "smack-util.h" - -#ifndef TTY_GID -#define TTY_GID 5 -#endif +#include "def.h" typedef enum MountMode { MNT_NONE = 0, diff --git a/src/core/namespace.c b/src/core/namespace.c index 85147be13..c034bfd16 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -39,6 +39,9 @@ #include "missing.h" #include "execute.h" #include "loopback-setup.h" +#include "mkdir.h" +#include "dev-setup.h" +#include "def.h" typedef enum MountMode { /* This is ordered by priority! */ @@ -46,6 +49,7 @@ typedef enum MountMode { READONLY, PRIVATE_TMP, PRIVATE_VAR_TMP, + PRIVATE_DEV, READWRITE } MountMode; @@ -129,6 +133,77 @@ static void drop_duplicates(BindMount *m, unsigned *n) { *n = t - m; } +static int mount_dev(BindMount *m) { + static const char devnodes[] = + "/dev/null\0" + "/dev/zero\0" + "/dev/full\0" + "/dev/random\0" + "/dev/urandom\0" + "/dev/tty\0"; + + struct stat devnodes_stat[6] = {}; + const char *d; + unsigned n = 0; + _cleanup_umask_ mode_t u; + int r; + + assert(m); + + u = umask(0000); + + /* First: record device mode_t and dev_t */ + NULSTR_FOREACH(d, devnodes) { + r = stat(d, &devnodes_stat[n]); + if (r < 0) { + if (errno != ENOENT) + return -errno; + } else { + if (!S_ISBLK(devnodes_stat[n].st_mode) && + !S_ISCHR(devnodes_stat[n].st_mode)) + return -EINVAL; + } + + n++; + } + + assert(n == ELEMENTSOF(devnodes_stat)); + + r = mount("tmpfs", "/dev", "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755"); + if (r < 0) + return m->ignore ? 0 : -errno; + + + mkdir_p("/dev/pts", 0755); + + r = mount("devpts", "/dev/pts", "devpts", MS_NOSUID|MS_NOEXEC, "newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID)); + if (r < 0) + return m->ignore ? 0 : -errno; + + mkdir_p("/dev/shm", 0755); + + r = mount("tmpfs", "/dev/shm", "tmpfs", MS_NOSUID|MS_NODEV|MS_STRICTATIME, "mode=1777"); + if (r < 0) + return m->ignore ? 0 : -errno; + + /* Second: actually create it */ + n = 0; + NULSTR_FOREACH(d, devnodes) { + if (devnodes_stat[n].st_rdev == 0) + continue; + + r = mknod(d, devnodes_stat[n].st_mode, devnodes_stat[n].st_rdev); + if (r < 0) + return m->ignore ? 0 : -errno; + + n++; + } + + dev_setup(NULL); + + return 0; +} + static int apply_mount( BindMount *m, const char *tmp_dir, @@ -141,6 +216,9 @@ static int apply_mount( switch (m->mode) { + case PRIVATE_DEV: + return mount_dev(m); + case INACCESSIBLE: what = "/run/systemd/inaccessible"; break; @@ -194,6 +272,7 @@ int setup_namespace( char** inaccessible_dirs, char* tmp_dir, char* var_tmp_dir, + bool private_dev, unsigned mount_flags) { BindMount *m, *mounts = NULL; @@ -209,7 +288,8 @@ int setup_namespace( n = !!tmp_dir + !!var_tmp_dir + strv_length(read_write_dirs) + strv_length(read_only_dirs) + - strv_length(inaccessible_dirs); + strv_length(inaccessible_dirs) + + private_dev; if (n > 0) { m = mounts = (BindMount *) alloca(n * sizeof(BindMount)); @@ -237,6 +317,12 @@ int setup_namespace( m++; } + if (private_dev) { + m->path = "/dev"; + m->mode = PRIVATE_DEV; + m++; + } + assert(mounts + n == m); qsort(mounts, n, sizeof(BindMount), mount_path_compare); diff --git a/src/core/namespace.h b/src/core/namespace.h index af0c01df9..fb1fc6ec0 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -28,6 +28,7 @@ int setup_namespace(char **read_write_dirs, char **inaccessible_dirs, char *tmp_dir, char *var_tmp_dir, + bool private_dev, unsigned mount_flags); int setup_tmp_dirs(const char *id, diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index caf1aa9be..38ec89b9a 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -64,10 +64,7 @@ #include "ptyfwd.h" #include "bus-kernel.h" #include "env-util.h" - -#ifndef TTY_GID -#define TTY_GID 5 -#endif +#include "def.h" typedef enum LinkJournal { LINK_NO, @@ -110,7 +107,8 @@ static uint64_t arg_retain = (1ULL << CAP_SYS_RESOURCE) | (1ULL << CAP_SYS_BOOT) | (1ULL << CAP_AUDIT_WRITE) | - (1ULL << CAP_AUDIT_CONTROL); + (1ULL << CAP_AUDIT_CONTROL) | + (1ULL << CAP_MKNOD); static char **arg_bind = NULL; static char **arg_bind_ro = NULL; static char **arg_setenv = NULL; @@ -639,40 +637,30 @@ static int copy_devnodes(const char *dest) { u = umask(0000); NULSTR_FOREACH(d, devnodes) { - struct stat st; _cleanup_free_ char *from = NULL, *to = NULL; + struct stat st; - asprintf(&from, "/dev/%s", d); - asprintf(&to, "%s/dev/%s", dest, d); - - if (!from || !to) { - log_oom(); - - if (r == 0) - r = -ENOMEM; - - break; - } + from = strappend("/dev/", d); + to = strjoin(dest, "/dev/", d, NULL); + if (!from || !to) + return log_oom(); if (stat(from, &st) < 0) { if (errno != ENOENT) { log_error("Failed to stat %s: %m", from); - if (r == 0) - r = -errno; + return -errno; } } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { log_error("%s is not a char or block device, cannot copy", from); - if (r == 0) - r = -EIO; + return -EIO; } else if (mknod(to, st.st_mode, st.st_rdev) < 0) { log_error("mknod(%s) failed: %m", dest); - if (r == 0) - r = -errno; + return -errno; } } diff --git a/src/shared/def.h b/src/shared/def.h index ac325bf8f..a2304fddd 100644 --- a/src/shared/def.h +++ b/src/shared/def.h @@ -71,3 +71,7 @@ #define UNIX_USER_BUS_FMT "unix:path=%s/bus" #define KERNEL_USER_BUS_FMT "kernel:path=/dev/kdbus/%lu-user/bus" + +#ifndef TTY_GID +#define TTY_GID 5 +#endif diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c index 50a187fda..e025e17bb 100644 --- a/src/shared/dev-setup.c +++ b/src/shared/dev-setup.c @@ -50,7 +50,7 @@ static int symlink_and_label(const char *old_path, const char *new_path) { return r; } -void dev_setup(const char *prefix) { +int dev_setup(const char *prefix) { const char *j, *k; static const char symlinks[] = @@ -69,16 +69,16 @@ void dev_setup(const char *prefix) { } if (prefix) { - char *linkname; + _cleanup_free_ char *link_name = NULL; - if (asprintf(&linkname, "%s/%s", prefix, k) < 0) { - log_oom(); - break; - } + link_name = strjoin(prefix, "/", k, NULL); + if (!link_name) + return -ENOMEM; - symlink_and_label(j, linkname); - free(linkname); + symlink_and_label(j, link_name); } else symlink_and_label(j, k); } + + return 0; } diff --git a/src/shared/dev-setup.h b/src/shared/dev-setup.h index 320c0b30b..d41b6eefb 100644 --- a/src/shared/dev-setup.h +++ b/src/shared/dev-setup.h @@ -21,4 +21,4 @@ along with systemd; If not, see . ***/ -void dev_setup(const char *pathprefix); +int dev_setup(const char *pathprefix); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 37d0998e5..ad0d0419c 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -59,6 +59,7 @@ int main(int argc, char *argv[]) { (char **) inaccessible, tmp_dir, var_tmp_dir, + true, 0); if (r < 0) { log_error("Failed to setup namespace: %s", strerror(-r)); -- 2.30.2