From ac0930c892bc7979b4c9bc2a52e5e844650b025d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 13 Aug 2012 15:27:04 +0200 Subject: [PATCH] namespace: rework namespace support - don't use pivot_root() anymore, just reuse root hierarchy - first create all mounts, then mark them read-only so that we get the right behaviour when people want writable mounts inside of read-only mounts - don't pass invalid combinations of MS_ constants to the kernel --- man/systemd.exec.xml | 19 ++-- src/core/execute.c | 3 +- src/core/namespace.c | 218 ++++++++++++++++++----------------------- src/core/switch-root.c | 6 ++ src/shared/util.c | 3 +- src/test/test-ns.c | 5 +- 6 files changed, 113 insertions(+), 141 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index e1193d2d5..cf6ab1778 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -1043,20 +1043,13 @@ , or , which - control whether namespaces set up with - ReadWriteDirectories=, - ReadOnlyDirectories= - and - InaccessibleDirectories= - receive or propagate new mounts - from/to the main namespace. See + control whether the file system + namespace set up for this unit's + processes will receive or propagate + new mounts. See mount1 - for details. Defaults to - , i.e. the new - namespace will both receive new mount - points from the main namespace as well - as propagate new mounts to - it. + for details. Default to + . diff --git a/src/core/execute.c b/src/core/execute.c index fc0edc6cf..6e2b5e48a 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1304,7 +1304,7 @@ int exec_spawn(ExecCommand *command, if (strv_length(context->read_write_dirs) > 0 || strv_length(context->read_only_dirs) > 0 || strv_length(context->inaccessible_dirs) > 0 || - context->mount_flags != MS_SHARED || + context->mount_flags != 0 || context->private_tmp) { err = setup_namespace(context->read_write_dirs, context->read_only_dirs, @@ -1540,7 +1540,6 @@ void exec_context_init(ExecContext *c) { c->cpu_sched_policy = SCHED_OTHER; c->syslog_priority = LOG_DAEMON|LOG_INFO; c->syslog_level_prefix = true; - c->mount_flags = MS_SHARED; c->control_group_persistent = -1; c->ignore_sigpipe = true; c->timer_slack_nsec = (nsec_t) -1; diff --git a/src/core/namespace.c b/src/core/namespace.c index ce10c7907..5c2a24653 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -41,13 +41,15 @@ typedef enum PathMode { /* This is ordered by priority! */ INACCESSIBLE, READONLY, - PRIVATE, + PRIVATE_TMP, + PRIVATE_VAR_TMP, READWRITE } PathMode; typedef struct Path { const char *path; PathMode mode; + bool done; } Path; static int append_paths(Path **p, char **strv, PathMode mode) { @@ -91,25 +93,22 @@ static int path_compare(const void *a, const void *b) { return 0; } -static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible, bool *need_private) { +static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) { Path *f, *t, *previous; assert(p); assert(n); assert(need_inaccessible); - assert(need_private); for (f = p, t = p, previous = NULL; f < p+*n; f++) { + /* The first one wins */ if (previous && path_equal(f->path, previous->path)) continue; t->path = f->path; t->mode = f->mode; - if (t->mode == PRIVATE) - *need_private = true; - if (t->mode == INACCESSIBLE) *need_inaccessible = true; @@ -121,65 +120,62 @@ static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible, bool *n = t - p; } -static int apply_mount(Path *p, const char *root_dir, const char *inaccessible_dir, const char *private_dir, unsigned long flags) { +static int apply_mount( + Path *p, + const char *tmp_dir, + const char *var_tmp_dir, + const char *inaccessible_dir) { + const char *what; - char *where; int r; assert(p); - assert(root_dir); - assert(inaccessible_dir); - assert(private_dir); - - where = strappend(root_dir, p->path); - if (!where) - return -ENOMEM; switch (p->mode) { case INACCESSIBLE: what = inaccessible_dir; - flags |= MS_RDONLY; break; case READONLY: - flags |= MS_RDONLY; - /* Fall through */ - case READWRITE: what = p->path; break; - case PRIVATE: - what = private_dir; + case PRIVATE_TMP: + what = tmp_dir; + break; + + case PRIVATE_VAR_TMP: + what = var_tmp_dir; break; default: assert_not_reached("Unknown mode"); } - r = mount(what, where, NULL, MS_BIND|MS_REC, NULL); - if (r >= 0) { - log_debug("Successfully mounted %s to %s", what, where); + assert(what); - /* The bind mount will always inherit the original - * flags. If we want to set any flag we need - * to do so in a second independent step. */ - if (flags) - r = mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|MS_REC|flags, NULL); + r = mount(what, p->path, NULL, MS_BIND, NULL); + if (r >= 0) + log_debug("Successfully mounted %s to %s", what, p->path); - /* Avoid exponential growth of trees */ - if (r >= 0 && path_equal(p->path, "/")) - r = mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL); + return r; +} - if (r < 0) { - r = -errno; - umount2(where, MNT_DETACH); - } - } +static int make_read_only(Path *p) { + int r; - free(where); - return r; + assert(p); + + if (p->mode != INACCESSIBLE && p->mode != READONLY) + return 0; + + r = mount(NULL, p->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + if (r < 0) + return -errno; + + return 0; } int setup_namespace( @@ -190,30 +186,26 @@ int setup_namespace( unsigned long flags) { char - tmp_dir[] = "/tmp/systemd-namespace-XXXXXX", - root_dir[] = "/tmp/systemd-namespace-XXXXXX/root", - old_root_dir[] = "/tmp/systemd-namespace-XXXXXX/root/tmp/old-root-XXXXXX", - inaccessible_dir[] = "/tmp/systemd-namespace-XXXXXX/inaccessible", - private_dir[] = "/tmp/systemd-namespace-XXXXXX/private"; + tmp_dir[] = "/tmp/systemd-private-XXXXXX", + var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX", + inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX"; Path *paths, *p; unsigned n; - bool need_private = false, need_inaccessible = false; - bool remove_tmp = false, remove_root = false, remove_old_root = false, remove_inaccessible = false, remove_private = false; + bool need_inaccessible = false; + bool remove_tmp = false, remove_var_tmp = false, remove_inaccessible = false; int r; - const char *t; + + if (!flags) + flags = MS_SHARED; n = strv_length(writable) + strv_length(readable) + strv_length(inaccessible) + - (private_tmp ? 3 : 1); - - paths = new(Path, n); - if (!paths) - return -ENOMEM; + (private_tmp ? 2 : 0); - p = paths; + p = paths = alloca(sizeof(Path) * n); if ((r = append_paths(&p, writable, READWRITE)) < 0 || (r = append_paths(&p, readable, READONLY)) < 0 || (r = append_paths(&p, inaccessible, INACCESSIBLE)) < 0) @@ -221,60 +213,70 @@ int setup_namespace( if (private_tmp) { p->path = "/tmp"; - p->mode = PRIVATE; + p->mode = PRIVATE_TMP; p++; p->path = "/var/tmp"; - p->mode = PRIVATE; + p->mode = PRIVATE_VAR_TMP; p++; } - p->path = "/"; - p->mode = READWRITE; - p++; - assert(paths + n == p); qsort(paths, n, sizeof(Path), path_compare); - drop_duplicates(paths, &n, &need_inaccessible, &need_private); + drop_duplicates(paths, &n, &need_inaccessible); - if (!mkdtemp(tmp_dir)) { - r = -errno; - goto fail; - } - remove_tmp = true; + if (need_inaccessible) { + mode_t u; + char *d; - memcpy(root_dir, tmp_dir, sizeof(tmp_dir)-1); - if (mkdir(root_dir, 0777) < 0) { - r = -errno; - goto fail; - } - remove_root = true; + u = umask(0777); + d = mkdtemp(inaccessible_dir); + umask(u); - if (need_inaccessible) { - memcpy(inaccessible_dir, tmp_dir, sizeof(tmp_dir)-1); - if (mkdir(inaccessible_dir, 0) < 0) { + if (!d) { r = -errno; goto fail; } + remove_inaccessible = true; } - if (need_private) { + if (private_tmp) { mode_t u; - - memcpy(private_dir, tmp_dir, sizeof(tmp_dir)-1); + char *d; u = umask(0000); - if (mkdir(private_dir, 0777 + S_ISVTX) < 0) { - umask(u); + d = mkdtemp(tmp_dir); + umask(u); + if (!d) { r = -errno; goto fail; } + remove_tmp = true; + + u = umask(0000); + d = mkdtemp(var_tmp_dir); umask(u); - remove_private = true; + + if (!d) { + r = -errno; + goto fail; + } + + remove_var_tmp = true; + + if (chmod(tmp_dir, 0777 + S_ISVTX) < 0) { + r = -errno; + goto fail; + } + + if (chmod(var_tmp_dir, 0777 + S_ISVTX) < 0) { + r = -errno; + goto fail; + } } if (unshare(CLONE_NEWNS) < 0) { @@ -282,7 +284,7 @@ int setup_namespace( goto fail; } - /* Remount / as SLAVE so that nothing mounted in the namespace + /* Remount / as SLAVE so that nothing now mounted in the namespace shows up in the parent */ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { r = -errno; @@ -290,69 +292,39 @@ int setup_namespace( } for (p = paths; p < paths + n; p++) { - r = apply_mount(p, root_dir, inaccessible_dir, private_dir, flags); + r = apply_mount(p, tmp_dir, var_tmp_dir, inaccessible_dir); if (r < 0) goto undo_mounts; } - memcpy(old_root_dir, tmp_dir, sizeof(tmp_dir)-1); - if (!mkdtemp(old_root_dir)) { - r = -errno; - goto undo_mounts; - } - remove_old_root = true; - - if (chdir(root_dir) < 0) { - r = -errno; - goto undo_mounts; + for (p = paths; p < paths + n; p++) { + r = make_read_only(p); + if (r < 0) + goto undo_mounts; } - if (pivot_root(root_dir, old_root_dir) < 0) { + /* Remount / as the desired mode */ + if (mount(NULL, "/", NULL, flags|MS_REC, NULL) < 0) { r = -errno; goto undo_mounts; } - free(paths); - - t = old_root_dir + sizeof(root_dir) - 1; - if (umount2(t, MNT_DETACH) < 0) - /* At this point it's too late to turn anything back, - * since we are already in the new root. */ - return -errno; - - if (rmdir(t) < 0) - return -errno; - return 0; undo_mounts: - - for (p--; p >= paths; p--) { - char full_path[PATH_MAX]; - - snprintf(full_path, sizeof(full_path), "%s%s", root_dir, p->path); - char_array_0(full_path); - - umount2(full_path, MNT_DETACH); - } + for (p = paths; p < paths + n; p++) + if (p->done) + umount2(p->path, MNT_DETACH); fail: - if (remove_old_root) - rmdir(old_root_dir); - if (remove_inaccessible) rmdir(inaccessible_dir); - if (remove_private) - rmdir(private_dir); - - if (remove_root) - rmdir(root_dir); - if (remove_tmp) rmdir(tmp_dir); - free(paths); + if (remove_var_tmp) + rmdir(var_tmp_dir); return r; } diff --git a/src/core/switch-root.c b/src/core/switch-root.c index efc7d345e..150332a85 100644 --- a/src/core/switch-root.c +++ b/src/core/switch-root.c @@ -115,6 +115,12 @@ int switch_root(const char *new_root) { goto fail; } + if (chdir("/") < 0) { + r = -errno; + log_error("Failed to change directory: %m"); + goto fail; + } + if (old_root_fd >= 0) { struct stat rb; diff --git a/src/shared/util.c b/src/shared/util.c index 946b7d53f..e615195af 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -3011,7 +3011,8 @@ unsigned long long random_ull(void) { uint64_t ull; ssize_t r; - if ((fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY)) < 0) + fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY); + if (fd < 0) goto fallback; r = loop_read(fd, &ull, sizeof(ull), true); diff --git a/src/test/test-ns.c b/src/test/test-ns.c index 102b00588..b1c759fc2 100644 --- a/src/test/test-ns.c +++ b/src/test/test-ns.c @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) { NULL }; - const char * const readable[] = { + const char * const readonly[] = { "/", "/usr", "/boot", @@ -48,7 +48,8 @@ int main(int argc, char *argv[]) { int r; - if ((r = setup_namespace((char**) writable, (char**) readable, (char**) inaccessible, true, MS_SHARED)) < 0) { + r = setup_namespace((char**) writable, (char**) readonly, (char**) inaccessible, true, 0); + if (r < 0) { log_error("Failed to setup namespace: %s", strerror(-r)); return 1; } -- 2.30.2