X-Git-Url: https://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fnspawn%2Fnspawn.c;h=fb672510b4ff6e718a71b581d4350c48ba46d577;hb=4d2b1e0a3f98d8cf93beb943b05868b55231006a;hp=4e465dfe4f6f510a2a4012880e9c493b76c73dfe;hpb=bbb99c30d01a8bcdc27fb151cc6376a7877a6b07;p=elogind.git diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 4e465dfe4..fb672510b 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -962,7 +962,7 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons char *to; int r; - to = strappenda(dest, "/sys/fs/cgroup/", hierarchy); + to = strjoina(dest, "/sys/fs/cgroup/", hierarchy); r = path_is_mount_point(to, false); if (r < 0) @@ -972,9 +972,17 @@ static int mount_cgroup_hierarchy(const char *dest, const char *controller, cons mkdir_p(to, 0755); - if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV|(read_only ? MS_RDONLY : 0), controller) < 0) + /* The superblock mount options of the mount point need to be + * identical to the hosts', and hence writable... */ + if (mount("cgroup", to, "cgroup", MS_NOSUID|MS_NOEXEC|MS_NODEV, controller) < 0) return log_error_errno(errno, "Failed to mount to %s: %m", to); + /* ... hence let's only make the bind mount read-only, not the + * superblock. */ + if (read_only) { + if (mount(NULL, to, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0) + return log_error_errno(errno, "Failed to remount %s read-only: %m", to); + } return 1; } @@ -996,7 +1004,7 @@ static int mount_cgroup(const char *dest) { if (r < 0) return log_error_errno(r, "Failed to determine our own cgroup path: %m"); - cgroup_root = strappenda(dest, "/sys/fs/cgroup"); + cgroup_root = strjoina(dest, "/sys/fs/cgroup"); if (mount("tmpfs", cgroup_root, "tmpfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, "mode=755") < 0) return log_error_errno(errno, "Failed to mount tmpfs to /sys/fs/cgroup: %m"); @@ -1044,17 +1052,17 @@ static int mount_cgroup(const char *dest) { } } - r = mount_cgroup_hierarchy(dest, "name=systemd", "systemd", false); + r = mount_cgroup_hierarchy(dest, "name=systemd,xattr", "systemd", false); if (r < 0) return r; /* Make our own cgroup a (writable) bind mount */ - systemd_own = strappenda(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); + systemd_own = strjoina(dest, "/sys/fs/cgroup/systemd", own_cgroup_path); if (mount(systemd_own, systemd_own, NULL, MS_BIND, NULL) < 0) return log_error_errno(errno, "Failed to turn %s into a bind mount: %m", own_cgroup_path); /* And then remount the systemd cgroup root read-only */ - systemd_root = strappenda(dest, "/sys/fs/cgroup/systemd"); + systemd_root = strjoina(dest, "/sys/fs/cgroup/systemd"); if (mount(NULL, systemd_root, NULL, MS_BIND|MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, NULL) < 0) return log_error_errno(errno, "Failed to mount cgroup root read-only: %m"); @@ -1207,7 +1215,7 @@ static int setup_volatile_state(const char *directory) { if (r < 0) return log_error_errno(r, "Failed to remount %s read-only: %m", directory); - p = strappenda(directory, "/var"); + p = strjoina(directory, "/var"); r = mkdir(p, 0755); if (r < 0 && errno != EEXIST) return log_error_errno(errno, "Failed to create %s: %m", directory); @@ -1243,8 +1251,8 @@ static int setup_volatile(const char *directory) { tmpfs_mounted = true; - f = strappenda(directory, "/usr"); - t = strappenda(template, "/usr"); + f = strjoina(directory, "/usr"); + t = strjoina(template, "/usr"); r = mkdir(t, 0755); if (r < 0 && errno != EEXIST) { @@ -1426,7 +1434,7 @@ static int setup_dev_console(const char *dest, const char *console) { * /dev/console. (Note that the major minor doesn't actually * matter here, since we mount it over anyway). */ - to = strappenda(dest, "/dev/console"); + to = strjoina(dest, "/dev/console"); if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) return log_error_errno(errno, "mknod() for /dev/console failed: %m"); @@ -2477,15 +2485,18 @@ static int setup_seccomp(void) { static const int blacklist[] = { SCMP_SYS(kexec_load), SCMP_SYS(open_by_handle_at), - SCMP_SYS(init_module), - SCMP_SYS(finit_module), - SCMP_SYS(delete_module), SCMP_SYS(iopl), SCMP_SYS(ioperm), SCMP_SYS(swapon), SCMP_SYS(swapoff), }; + static const int kmod_blacklist[] = { + SCMP_SYS(init_module), + SCMP_SYS(finit_module), + SCMP_SYS(delete_module), + }; + scmp_filter_ctx seccomp; unsigned i; int r; @@ -2510,6 +2521,20 @@ static int setup_seccomp(void) { } } + /* If the CAP_SYS_MODULE capability is not requested then + * we'll block the kmod syscalls too */ + if (!(arg_retain & (1ULL << CAP_SYS_MODULE))) { + for (i = 0; i < ELEMENTSOF(kmod_blacklist); i++) { + r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), kmod_blacklist[i], 0); + if (r == -EFAULT) + continue; /* unknown syscall */ + if (r < 0) { + log_error_errno(r, "Failed to block syscall: %m"); + goto finish; + } + } + } + /* Audit is broken in containers, much of the userspace audit hookup will fail if running inside a container. We don't @@ -2556,10 +2581,10 @@ static int setup_propagate(const char *root) { (void) mkdir_p("/run/systemd/nspawn/", 0755); (void) mkdir_p("/run/systemd/nspawn/propagate", 0600); - p = strappenda("/run/systemd/nspawn/propagate/", arg_machine); + p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); (void) mkdir_p(p, 0600); - q = strappenda(root, "/run/systemd/nspawn/incoming"); + q = strjoina(root, "/run/systemd/nspawn/incoming"); mkdir_parents(q, 0755); mkdir_p(q, 0600); @@ -3042,7 +3067,7 @@ static int mount_device(const char *what, const char *where, const char *directo rw = false; if (directory) - p = strappenda(where, directory); + p = strjoina(where, directory); else p = where; @@ -3602,7 +3627,6 @@ int main(int argc, char *argv[]) { } if (arg_ephemeral) { - _cleanup_release_lock_file_ LockFile original_lock = LOCK_FILE_INIT; char *np; /* If the specified path is a mount point we @@ -3678,7 +3702,7 @@ int main(int argc, char *argv[]) { } else { const char *p; - p = strappenda(arg_directory, + p = strjoina(arg_directory, argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/"); if (access(p, F_OK) < 0) { log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory); @@ -4287,7 +4311,7 @@ finish: if (arg_machine) { const char *p; - p = strappenda("/run/systemd/nspawn/propagate/", arg_machine); + p = strjoina("/run/systemd/nspawn/propagate/", arg_machine); (void) rm_rf(p, false, true, false); }