#include "mkdir.h"
#include "dev-setup.h"
#include "def.h"
+#include "label.h"
typedef enum MountMode {
/* This is ordered by priority! */
PRIVATE_TMP,
PRIVATE_VAR_TMP,
PRIVATE_DEV,
+ PRIVATE_BUS_ENDPOINT,
READWRITE
} MountMode;
STRV_FOREACH(i, strv) {
(*p)->ignore = false;
+ (*p)->done = false;
if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
(*p)->ignore = true;
if (previous && path_equal(f->path, previous->path))
continue;
- t->path = f->path;
- t->mode = f->mode;
+ *t = *f;
previous = t;
"/dev/tty\0";
char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
- const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL;
+ const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
_cleanup_umask_ mode_t u;
int r;
goto fail;
}
+ devptmx = strappenda(temporary_mount, "/dev/ptmx");
+ symlink("pts/ptmx", devptmx);
+
devshm = strappenda(temporary_mount, "/dev/shm");
mkdir(devshm, 01777);
r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
mkdir(devhugepages, 0755);
mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+ devlog = strappenda(temporary_mount, "/dev/log");
+ symlink("/run/systemd/journal/dev-log", devlog);
+
NULSTR_FOREACH(d, devnodes) {
_cleanup_free_ char *dn = NULL;
struct stat st;
goto fail;
}
+ label_context_set(d, st.st_mode);
r = mknod(dn, st.st_mode, st.st_rdev);
+ label_context_clear();
+
if (r < 0) {
r = -errno;
goto fail;
return r;
}
+static int mount_kdbus(BindMount *m) {
+
+ char temporary_mount[] = "/tmp/kdbus-dev-XXXXXX";
+ _cleanup_free_ char *basepath = NULL;
+ _cleanup_umask_ mode_t u;
+ char *busnode = NULL, *root;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount)) {
+ log_error("Failed create temp dir: %m");
+ return -errno;
+ }
+
+ root = strappenda(temporary_mount, "/kdbus");
+ mkdir(root, 0755);
+ if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=777") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* create a new /dev/null dev node copy so we have some fodder to
+ * bind-mount the custom endpoint over. */
+ if (stat("/dev/null", &st) < 0) {
+ log_error("Failed to stat /dev/null: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ busnode = strappenda(root, "/bus");
+ if (mknod(busnode, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
+ log_error("mknod() for %s failed: %m", busnode);
+ r = -errno;
+ goto fail;
+ }
+
+ r = mount(m->path, busnode, "bind", MS_BIND, NULL);
+ if (r < 0) {
+ log_error("bind mount of %s failed: %m", m->path);
+ r = -errno;
+ goto fail;
+ }
+
+ basepath = dirname_malloc(m->path);
+ if (!basepath) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (mount(root, basepath, NULL, MS_MOVE, NULL) < 0) {
+ log_error("bind mount of %s failed: %m", basepath);
+ r = -errno;
+ goto fail;
+ }
+
+ rmdir(temporary_mount);
+ return 0;
+
+fail:
+ if (busnode) {
+ umount(busnode);
+ unlink(busnode);
+ }
+
+ if (root) {
+ umount(root);
+ rmdir(root);
+ }
+
+ rmdir(temporary_mount);
+
+ return r;
+}
+
static int apply_mount(
BindMount *m,
const char *tmp_dir,
switch (m->mode) {
- case PRIVATE_DEV:
- return mount_dev(m);
-
case INACCESSIBLE:
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible directory. */
+ umount_recursive(m->path, 0);
+
what = "/run/systemd/inaccessible";
break;
case READONLY:
case READWRITE:
- what = m->path;
- break;
+ /* Nothing to mount here, we just later toggle the
+ * MS_RDONLY bit for the mount point */
+ return 0;
case PRIVATE_TMP:
what = tmp_dir;
what = var_tmp_dir;
break;
+ case PRIVATE_DEV:
+ return mount_dev(m);
+
+ case PRIVATE_BUS_ENDPOINT:
+ return mount_kdbus(m);
+
default:
assert_not_reached("Unknown mode");
}
if (r >= 0)
log_debug("Successfully mounted %s to %s", what, m->path);
else if (m->ignore && errno == ENOENT)
- r = 0;
+ return 0;
return r;
}
assert(m);
- if (m->mode != INACCESSIBLE && m->mode != READONLY)
- return 0;
+ if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+ r = bind_remount_recursive(m->path, true);
+ else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV))
+ r = bind_remount_recursive(m->path, false);
+ else
+ r = 0;
- r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
- if (r < 0 && !(m->ignore && errno == ENOENT))
- return -errno;
+ if (m->ignore && r == -ENOENT)
+ return 0;
- return 0;
+ return r;
}
int setup_namespace(
char** inaccessible_dirs,
char* tmp_dir,
char* var_tmp_dir,
+ char* bus_endpoint_path,
bool private_dev,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
unsigned mount_flags) {
BindMount *m, *mounts = NULL;
if (unshare(CLONE_NEWNS) < 0)
return -errno;
- n = !!tmp_dir + !!var_tmp_dir +
+ n = !!tmp_dir + !!var_tmp_dir + !!bus_endpoint_path +
strv_length(read_write_dirs) +
strv_length(read_only_dirs) +
strv_length(inaccessible_dirs) +
- private_dev;
+ private_dev +
+ (protect_home != PROTECT_HOME_NO ? 3 : 0) +
+ (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
+ (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
if (n > 0) {
- m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
+ m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
r = append_mounts(&m, read_write_dirs, READWRITE);
if (r < 0)
return r;
m++;
}
+ if (bus_endpoint_path) {
+ m->path = bus_endpoint_path;
+ m->mode = PRIVATE_BUS_ENDPOINT;
+ m++;
+ }
+
+ if (protect_home != PROTECT_HOME_NO) {
+ r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user", "-/root"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (r < 0)
+ return r;
+ }
+
+ if (protect_system != PROTECT_SYSTEM_NO) {
+ r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "-/boot", "/etc") : STRV_MAKE("/usr", "-/boot"), READONLY);
+ if (r < 0)
+ return r;
+ }
+
assert(mounts + n == m);
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
drop_duplicates(mounts, &n);
}
- /* Remount / as SLAVE so that nothing now mounted in the namespace
- shows up in the parent */
- if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
- return -errno;
+ if (n > 0) {
+ /* Remount / as SLAVE so that nothing now mounted in the namespace
+ shows up in the parent */
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
+ return -errno;
- for (m = mounts; m < mounts + n; ++m) {
- r = apply_mount(m, tmp_dir, var_tmp_dir);
- if (r < 0)
- goto fail;
- }
+ for (m = mounts; m < mounts + n; ++m) {
+ r = apply_mount(m, tmp_dir, var_tmp_dir);
+ if (r < 0)
+ goto fail;
+ }
- for (m = mounts; m < mounts + n; ++m) {
- r = make_read_only(m);
- if (r < 0)
- goto fail;
+ for (m = mounts; m < mounts + n; ++m) {
+ r = make_read_only(m);
+ if (r < 0)
+ goto fail;
+ }
}
- /* Remount / as the desired mode */
+ /* Remount / as the desired mode. Not that this will not
+ * reestablish propagation from our side to the host, since
+ * what's disconnected is disconnected. */
if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
r = -errno;
goto fail;
return 0;
fail:
- for (m = mounts; m < mounts + n; ++m)
- if (m->done)
- umount2(m->path, MNT_DETACH);
+ if (n > 0) {
+ for (m = mounts; m < mounts + n; ++m)
+ if (m->done)
+ umount2(m->path, MNT_DETACH);
+ }
return r;
}
return r;
}
+
+static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
+ [PROTECT_HOME_NO] = "no",
+ [PROTECT_HOME_YES] = "yes",
+ [PROTECT_HOME_READ_ONLY] = "read-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_home, ProtectHome);
+
+static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
+ [PROTECT_SYSTEM_NO] = "no",
+ [PROTECT_SYSTEM_YES] = "yes",
+ [PROTECT_SYSTEM_FULL] = "full",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);