#include "missing.h"
#include "execute.h"
#include "loopback-setup.h"
+#include "mkdir.h"
+#include "dev-setup.h"
+#include "def.h"
+#include "label.h"
typedef enum MountMode {
/* This is ordered by priority! */
READONLY,
PRIVATE_TMP,
PRIVATE_VAR_TMP,
+ PRIVATE_DEV,
+ PRIVATE_BUS_ENDPOINT,
READWRITE
} MountMode;
STRV_FOREACH(i, strv) {
(*p)->ignore = false;
+ (*p)->done = false;
- if ((mode == INACCESSIBLE || mode == READONLY) && (*i)[0] == '-') {
+ if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
(*p)->ignore = true;
(*i)++;
}
if (previous && path_equal(f->path, previous->path))
continue;
- t->path = f->path;
- t->mode = f->mode;
+ *t = *f;
previous = t;
*n = t - m;
}
+static int mount_dev(BindMount *m) {
+ static const char devnodes[] =
+ "/dev/null\0"
+ "/dev/zero\0"
+ "/dev/full\0"
+ "/dev/random\0"
+ "/dev/urandom\0"
+ "/dev/tty\0";
+
+ char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
+ const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
+ _cleanup_umask_ mode_t u;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount))
+ return -errno;
+
+ dev = strappenda(temporary_mount, "/dev");
+ mkdir(dev, 0755);
+ if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ devpts = strappenda(temporary_mount, "/dev/pts");
+ mkdir(devpts, 0755);
+ if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ devptmx = strappenda(temporary_mount, "/dev/ptmx");
+ symlink("pts/ptmx", devptmx);
+
+ devshm = strappenda(temporary_mount, "/dev/shm");
+ mkdir(devshm, 01777);
+ r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ devmqueue = strappenda(temporary_mount, "/dev/mqueue");
+ mkdir(devmqueue, 0755);
+ mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
+
+ devkdbus = strappenda(temporary_mount, "/dev/kdbus");
+ mkdir(devkdbus, 0755);
+ mount("/dev/kdbus", devkdbus, NULL, MS_BIND, NULL);
+
+ devhugepages = strappenda(temporary_mount, "/dev/hugepages");
+ mkdir(devhugepages, 0755);
+ mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+
+ devlog = strappenda(temporary_mount, "/dev/log");
+ symlink("/run/systemd/journal/dev-log", devlog);
+
+ NULSTR_FOREACH(d, devnodes) {
+ _cleanup_free_ char *dn = NULL;
+ struct stat st;
+
+ r = stat(d, &st);
+ if (r < 0) {
+
+ if (errno == ENOENT)
+ continue;
+
+ r = -errno;
+ goto fail;
+ }
+
+ if (!S_ISBLK(st.st_mode) &&
+ !S_ISCHR(st.st_mode)) {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ if (st.st_rdev == 0)
+ continue;
+
+ dn = strappend(temporary_mount, d);
+ if (!dn) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ mac_selinux_create_file_prepare(d, st.st_mode);
+ r = mknod(dn, st.st_mode, st.st_rdev);
+ mac_selinux_create_file_clear();
+
+ if (r < 0) {
+ r = -errno;
+ goto fail;
+ }
+ }
+
+ dev_setup(temporary_mount);
+
+ if (mount(dev, "/dev/", NULL, MS_MOVE, NULL) < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ rmdir(dev);
+ rmdir(temporary_mount);
+
+ return 0;
+
+fail:
+ if (devpts)
+ umount(devpts);
+
+ if (devshm)
+ umount(devshm);
+
+ if (devkdbus)
+ umount(devkdbus);
+
+ if (devhugepages)
+ umount(devhugepages);
+
+ if (devmqueue)
+ umount(devmqueue);
+
+ umount(dev);
+ rmdir(dev);
+ rmdir(temporary_mount);
+
+ return r;
+}
+
+static int mount_kdbus(BindMount *m) {
+
+ char temporary_mount[] = "/tmp/kdbus-dev-XXXXXX";
+ _cleanup_free_ char *basepath = NULL;
+ _cleanup_umask_ mode_t u;
+ char *busnode = NULL, *root;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount)) {
+ log_error("Failed create temp dir: %m");
+ return -errno;
+ }
+
+ root = strappenda(temporary_mount, "/kdbus");
+ mkdir(root, 0755);
+ if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=777") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* create a new /dev/null dev node copy so we have some fodder to
+ * bind-mount the custom endpoint over. */
+ if (stat("/dev/null", &st) < 0) {
+ log_error("Failed to stat /dev/null: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ busnode = strappenda(root, "/bus");
+ if (mknod(busnode, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
+ log_error("mknod() for %s failed: %m", busnode);
+ r = -errno;
+ goto fail;
+ }
+
+ r = mount(m->path, busnode, "bind", MS_BIND, NULL);
+ if (r < 0) {
+ log_error("bind mount of %s failed: %m", m->path);
+ r = -errno;
+ goto fail;
+ }
+
+ basepath = dirname_malloc(m->path);
+ if (!basepath) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (mount(root, basepath, NULL, MS_MOVE, NULL) < 0) {
+ log_error("bind mount of %s failed: %m", basepath);
+ r = -errno;
+ goto fail;
+ }
+
+ rmdir(temporary_mount);
+ return 0;
+
+fail:
+ if (busnode) {
+ umount(busnode);
+ unlink(busnode);
+ }
+
+ umount(root);
+ rmdir(root);
+ rmdir(temporary_mount);
+
+ return r;
+}
+
static int apply_mount(
BindMount *m,
const char *tmp_dir,
switch (m->mode) {
case INACCESSIBLE:
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible directory. */
+ umount_recursive(m->path, 0);
+
what = "/run/systemd/inaccessible";
break;
case READONLY:
case READWRITE:
- what = m->path;
- break;
+ /* Nothing to mount here, we just later toggle the
+ * MS_RDONLY bit for the mount point */
+ return 0;
case PRIVATE_TMP:
what = tmp_dir;
what = var_tmp_dir;
break;
+ case PRIVATE_DEV:
+ return mount_dev(m);
+
+ case PRIVATE_BUS_ENDPOINT:
+ return mount_kdbus(m);
+
default:
assert_not_reached("Unknown mode");
}
if (r >= 0)
log_debug("Successfully mounted %s to %s", what, m->path);
else if (m->ignore && errno == ENOENT)
- r = 0;
+ return 0;
return r;
}
assert(m);
- if (m->mode != INACCESSIBLE && m->mode != READONLY)
- return 0;
+ if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+ r = bind_remount_recursive(m->path, true);
+ else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV))
+ r = bind_remount_recursive(m->path, false);
+ else
+ r = 0;
- r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
- if (r < 0 && !(m->ignore && errno == ENOENT))
- return -errno;
+ if (m->ignore && r == -ENOENT)
+ return 0;
- return 0;
+ return r;
}
int setup_namespace(
char** read_write_dirs,
char** read_only_dirs,
char** inaccessible_dirs,
- char* tmp_dir,
- char* var_tmp_dir,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ const char* bus_endpoint_path,
+ bool private_dev,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
unsigned mount_flags) {
BindMount *m, *mounts = NULL;
if (unshare(CLONE_NEWNS) < 0)
return -errno;
- n = !!tmp_dir + !!var_tmp_dir +
+ n = !!tmp_dir + !!var_tmp_dir + !!bus_endpoint_path +
strv_length(read_write_dirs) +
strv_length(read_only_dirs) +
- strv_length(inaccessible_dirs);
+ strv_length(inaccessible_dirs) +
+ private_dev +
+ (protect_home != PROTECT_HOME_NO ? 3 : 0) +
+ (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
+ (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
if (n > 0) {
- m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
+ m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
r = append_mounts(&m, read_write_dirs, READWRITE);
if (r < 0)
return r;
m++;
}
+ if (private_dev) {
+ m->path = "/dev";
+ m->mode = PRIVATE_DEV;
+ m++;
+ }
+
+ if (bus_endpoint_path) {
+ m->path = bus_endpoint_path;
+ m->mode = PRIVATE_BUS_ENDPOINT;
+ m++;
+ }
+
+ if (protect_home != PROTECT_HOME_NO) {
+ r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user", "-/root"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (r < 0)
+ return r;
+ }
+
+ if (protect_system != PROTECT_SYSTEM_NO) {
+ r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "-/boot", "/etc") : STRV_MAKE("/usr", "-/boot"), READONLY);
+ if (r < 0)
+ return r;
+ }
+
assert(mounts + n == m);
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
drop_duplicates(mounts, &n);
}
- /* Remount / as SLAVE so that nothing now mounted in the namespace
- shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
- return -errno;
+ if (n > 0) {
+ /* Remount / as SLAVE so that nothing now mounted in the namespace
+ shows up in the parent */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ return -errno;
- for (m = mounts; m < mounts + n; ++m) {
- r = apply_mount(m, tmp_dir, var_tmp_dir);
- if (r < 0)
- goto fail;
- }
+ for (m = mounts; m < mounts + n; ++m) {
+ r = apply_mount(m, tmp_dir, var_tmp_dir);
+ if (r < 0)
+ goto fail;
+ }
- for (m = mounts; m < mounts + n; ++m) {
- r = make_read_only(m);
- if (r < 0)
- goto fail;
+ for (m = mounts; m < mounts + n; ++m) {
+ r = make_read_only(m);
+ if (r < 0)
+ goto fail;
+ }
}
- /* Remount / as the desired mode */
+ /* Remount / as the desired mode. Not that this will not
+ * reestablish propagation from our side to the host, since
+ * what's disconnected is disconnected. */
if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
r = -errno;
goto fail;
return 0;
fail:
- for (m = mounts; m < mounts + n; ++m)
- if (m->done)
- umount2(m->path, MNT_DETACH);
+ if (n > 0) {
+ for (m = mounts; m < mounts + n; ++m)
+ if (m->done)
+ umount2(m->path, MNT_DETACH);
+ }
return r;
}
static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
_cleanup_free_ char *x = NULL;
+ char bid[SD_ID128_STRING_MAX];
+ sd_id128_t boot_id;
+ int r;
assert(id);
assert(prefix);
assert(path);
- x = strjoin(prefix, "/systemd-", id, "-XXXXXX", NULL);
+ /* We include the boot id in the directory so that after a
+ * reboot we can easily identify obsolete directories. */
+
+ r = sd_id128_get_boot(&boot_id);
+ if (r < 0)
+ return r;
+
+ x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
if (!x)
return -ENOMEM;
return r;
}
+
+static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
+ [PROTECT_HOME_NO] = "no",
+ [PROTECT_HOME_YES] = "yes",
+ [PROTECT_HOME_READ_ONLY] = "read-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_home, ProtectHome);
+
+static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
+ [PROTECT_SYSTEM_NO] = "no",
+ [PROTECT_SYSTEM_YES] = "yes",
+ [PROTECT_SYSTEM_FULL] = "full",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);