#include "mkdir.h"
#include "dev-setup.h"
#include "def.h"
+#include "label.h"
typedef enum MountMode {
/* This is ordered by priority! */
PRIVATE_TMP,
PRIVATE_VAR_TMP,
PRIVATE_DEV,
+ PRIVATE_BUS_ENDPOINT,
READWRITE
} MountMode;
STRV_FOREACH(i, strv) {
(*p)->ignore = false;
+ (*p)->done = false;
if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
(*p)->ignore = true;
if (previous && path_equal(f->path, previous->path))
continue;
- t->path = f->path;
- t->mode = f->mode;
+ *t = *f;
previous = t;
"/dev/tty\0";
char temporary_mount[] = "/tmp/namespace-dev-XXXXXX";
- const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devkdbus = NULL, *devhugepages = NULL, *devmqueue = NULL;
+ const char *d, *dev = NULL, *devpts = NULL, *devshm = NULL, *devhugepages = NULL, *devmqueue = NULL, *devlog = NULL, *devptmx = NULL;
_cleanup_umask_ mode_t u;
int r;
return -errno;
dev = strappenda(temporary_mount, "/dev");
- mkdir(dev, 0755);
+ (void)mkdir(dev, 0755);
if (mount("tmpfs", dev, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755") < 0) {
r = -errno;
goto fail;
}
devpts = strappenda(temporary_mount, "/dev/pts");
- mkdir(devpts, 0755);
+ (void)mkdir(devpts, 0755);
if (mount("/dev/pts", devpts, NULL, MS_BIND, NULL) < 0) {
r = -errno;
goto fail;
}
+ devptmx = strappenda(temporary_mount, "/dev/ptmx");
+ symlink("pts/ptmx", devptmx);
+
devshm = strappenda(temporary_mount, "/dev/shm");
- mkdir(devshm, 01777);
+ (void)mkdir(devshm, 01777);
r = mount("/dev/shm", devshm, NULL, MS_BIND, NULL);
if (r < 0) {
r = -errno;
}
devmqueue = strappenda(temporary_mount, "/dev/mqueue");
- mkdir(devmqueue, 0755);
+ (void)mkdir(devmqueue, 0755);
mount("/dev/mqueue", devmqueue, NULL, MS_BIND, NULL);
- devkdbus = strappenda(temporary_mount, "/dev/kdbus");
- mkdir(devkdbus, 0755);
- mount("/dev/kdbus", devkdbus, NULL, MS_BIND, NULL);
-
devhugepages = strappenda(temporary_mount, "/dev/hugepages");
- mkdir(devhugepages, 0755);
+ (void)mkdir(devhugepages, 0755);
mount("/dev/hugepages", devhugepages, NULL, MS_BIND, NULL);
+ devlog = strappenda(temporary_mount, "/dev/log");
+ symlink("/run/systemd/journal/dev-log", devlog);
+
NULSTR_FOREACH(d, devnodes) {
_cleanup_free_ char *dn = NULL;
struct stat st;
goto fail;
}
+ mac_selinux_create_file_prepare(d, st.st_mode);
r = mknod(dn, st.st_mode, st.st_rdev);
+ mac_selinux_create_file_clear();
+
if (r < 0) {
r = -errno;
goto fail;
if (devshm)
umount(devshm);
- if (devkdbus)
- umount(devkdbus);
-
if (devhugepages)
umount(devhugepages);
if (devmqueue)
umount(devmqueue);
- if (dev) {
- umount(dev);
- rmdir(dev);
+ umount(dev);
+ rmdir(dev);
+ rmdir(temporary_mount);
+
+ return r;
+}
+
+static int mount_kdbus(BindMount *m) {
+
+ char temporary_mount[] = "/tmp/kdbus-dev-XXXXXX";
+ _cleanup_free_ char *basepath = NULL;
+ _cleanup_umask_ mode_t u;
+ char *busnode = NULL, *root;
+ struct stat st;
+ int r;
+
+ assert(m);
+
+ u = umask(0000);
+
+ if (!mkdtemp(temporary_mount))
+ return log_error_errno(errno, "Failed create temp dir: %m");
+
+ root = strappenda(temporary_mount, "/kdbus");
+ (void)mkdir(root, 0755);
+ if (mount("tmpfs", root, "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=777") < 0) {
+ r = -errno;
+ goto fail;
+ }
+
+ /* create a new /dev/null dev node copy so we have some fodder to
+ * bind-mount the custom endpoint over. */
+ if (stat("/dev/null", &st) < 0) {
+ log_error_errno(errno, "Failed to stat /dev/null: %m");
+ r = -errno;
+ goto fail;
+ }
+
+ busnode = strappenda(root, "/bus");
+ if (mknod(busnode, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
+ log_error_errno(errno, "mknod() for %s failed: %m", busnode);
+ r = -errno;
+ goto fail;
}
+ r = mount(m->path, busnode, "bind", MS_BIND, NULL);
+ if (r < 0) {
+ log_error_errno(errno, "bind mount of %s failed: %m", m->path);
+ r = -errno;
+ goto fail;
+ }
+
+ basepath = dirname_malloc(m->path);
+ if (!basepath) {
+ r = -ENOMEM;
+ goto fail;
+ }
+
+ if (mount(root, basepath, NULL, MS_MOVE, NULL) < 0) {
+ log_error_errno(errno, "bind mount of %s failed: %m", basepath);
+ r = -errno;
+ goto fail;
+ }
+
+ rmdir(temporary_mount);
+ return 0;
+
+fail:
+ if (busnode) {
+ umount(busnode);
+ unlink(busnode);
+ }
+
+ umount(root);
+ rmdir(root);
rmdir(temporary_mount);
return r;
switch (m->mode) {
- case PRIVATE_DEV:
- return mount_dev(m);
-
case INACCESSIBLE:
+
+ /* First, get rid of everything that is below if there
+ * is anything... Then, overmount it with an
+ * inaccessible directory. */
+ umount_recursive(m->path, 0);
+
what = "/run/systemd/inaccessible";
break;
case READONLY:
case READWRITE:
- what = m->path;
- break;
+ /* Nothing to mount here, we just later toggle the
+ * MS_RDONLY bit for the mount point */
+ return 0;
case PRIVATE_TMP:
what = tmp_dir;
what = var_tmp_dir;
break;
+ case PRIVATE_DEV:
+ return mount_dev(m);
+
+ case PRIVATE_BUS_ENDPOINT:
+ return mount_kdbus(m);
+
default:
assert_not_reached("Unknown mode");
}
if (r >= 0)
log_debug("Successfully mounted %s to %s", what, m->path);
else if (m->ignore && errno == ENOENT)
- r = 0;
+ return 0;
return r;
}
assert(m);
- if (m->mode != INACCESSIBLE && m->mode != READONLY)
- return 0;
+ if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+ r = bind_remount_recursive(m->path, true);
+ else if (IN_SET(m->mode, READWRITE, PRIVATE_TMP, PRIVATE_VAR_TMP, PRIVATE_DEV))
+ r = bind_remount_recursive(m->path, false);
+ else
+ r = 0;
- r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
- if (r < 0 && !(m->ignore && errno == ENOENT))
- return -errno;
+ if (m->ignore && r == -ENOENT)
+ return 0;
- return 0;
+ return r;
}
int setup_namespace(
char** read_write_dirs,
char** read_only_dirs,
char** inaccessible_dirs,
- char* tmp_dir,
- char* var_tmp_dir,
+ const char* tmp_dir,
+ const char* var_tmp_dir,
+ const char* bus_endpoint_path,
bool private_dev,
+ ProtectHome protect_home,
+ ProtectSystem protect_system,
unsigned mount_flags) {
BindMount *m, *mounts = NULL;
if (unshare(CLONE_NEWNS) < 0)
return -errno;
- n = !!tmp_dir + !!var_tmp_dir +
+ n = !!tmp_dir + !!var_tmp_dir + !!bus_endpoint_path +
strv_length(read_write_dirs) +
strv_length(read_only_dirs) +
strv_length(inaccessible_dirs) +
- private_dev;
+ private_dev +
+ (protect_home != PROTECT_HOME_NO ? 3 : 0) +
+ (protect_system != PROTECT_SYSTEM_NO ? 2 : 0) +
+ (protect_system == PROTECT_SYSTEM_FULL ? 1 : 0);
if (n > 0) {
- m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
+ m = mounts = (BindMount *) alloca0(n * sizeof(BindMount));
r = append_mounts(&m, read_write_dirs, READWRITE);
if (r < 0)
return r;
m++;
}
+ if (bus_endpoint_path) {
+ m->path = bus_endpoint_path;
+ m->mode = PRIVATE_BUS_ENDPOINT;
+ m++;
+ }
+
+ if (protect_home != PROTECT_HOME_NO) {
+ r = append_mounts(&m, STRV_MAKE("-/home", "-/run/user", "-/root"), protect_home == PROTECT_HOME_READ_ONLY ? READONLY : INACCESSIBLE);
+ if (r < 0)
+ return r;
+ }
+
+ if (protect_system != PROTECT_SYSTEM_NO) {
+ r = append_mounts(&m, protect_system == PROTECT_SYSTEM_FULL ? STRV_MAKE("/usr", "-/boot", "/etc") : STRV_MAKE("/usr", "-/boot"), READONLY);
+ if (r < 0)
+ return r;
+ }
+
assert(mounts + n == m);
qsort(mounts, n, sizeof(BindMount), mount_path_compare);
return r;
}
+
+static const char *const protect_home_table[_PROTECT_HOME_MAX] = {
+ [PROTECT_HOME_NO] = "no",
+ [PROTECT_HOME_YES] = "yes",
+ [PROTECT_HOME_READ_ONLY] = "read-only",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_home, ProtectHome);
+
+static const char *const protect_system_table[_PROTECT_SYSTEM_MAX] = {
+ [PROTECT_SYSTEM_NO] = "no",
+ [PROTECT_SYSTEM_YES] = "yes",
+ [PROTECT_SYSTEM_FULL] = "full",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(protect_system, ProtectSystem);