From d6797c920e9eb70f46a893c00fdd9ecb86d15f84 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 6 Jun 2014 11:42:25 +0200 Subject: [PATCH 1/1] namespace: beef up read-only bind mount logic Instead of blindly creating another bind mount for read-only mounts, check if there's already one we can use, and if so, use it. Also, recursively mark all submounts read-only too. Also, ignore autofs mounts when remounting read-only unless they are already triggered. --- man/systemd.exec.xml | 14 ++--- src/core/namespace.c | 28 +++++---- src/nspawn/nspawn.c | 25 +++++--- src/shared/util.c | 145 ++++++++++++++++++++++++++++++++++++++++++- src/shared/util.h | 2 + 5 files changed, 183 insertions(+), 31 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index c5bb55c55..c419424d9 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -777,8 +777,8 @@ ReadOnlyDirectories= InaccessibleDirectories= - Sets up a new - file system namespace for executed + Sets up a new file + system namespace for executed processes. These options may be used to limit access a process might have to the main file system @@ -799,16 +799,14 @@ processes inside the namespace. Note that restricting access with these options does not extend to submounts - of a directory. You must list - submounts separately in these settings - to ensure the same limited - access. These options may be specified + of a directory that are created later + on. These options may be specified more than once in which case all directories listed will have limited access from within the namespace. If the empty string is assigned to this - option, the specific list is reset, and - all prior assignments have no + option, the specific list is reset, + and all prior assignments have no effect. Paths in ReadOnlyDirectories= diff --git a/src/core/namespace.c b/src/core/namespace.c index 43b904580..f11065ee4 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -280,9 +280,6 @@ static int apply_mount( switch (m->mode) { - case PRIVATE_DEV: - return mount_dev(m); - case INACCESSIBLE: /* First, get rid of everything that is below if there @@ -295,8 +292,9 @@ static int apply_mount( case READONLY: case READWRITE: - what = m->path; - break; + /* Nothing to mount here, we just later toggle the + * MS_RDONLY bit for the mount point */ + return 0; case PRIVATE_TMP: what = tmp_dir; @@ -306,6 +304,9 @@ static int apply_mount( what = var_tmp_dir; break; + case PRIVATE_DEV: + return mount_dev(m); + default: assert_not_reached("Unknown mode"); } @@ -316,7 +317,7 @@ static int apply_mount( if (r >= 0) log_debug("Successfully mounted %s to %s", what, m->path); else if (m->ignore && errno == ENOENT) - r = 0; + return 0; return r; } @@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) { assert(m); - if (m->mode != INACCESSIBLE && m->mode != READONLY) - return 0; + if (IN_SET(m->mode, INACCESSIBLE, READONLY)) + r = bind_remount_recursive(m->path, true); + else if (m->mode == READWRITE) + r = bind_remount_recursive(m->path, false); + else + r = 0; - r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL); - if (r < 0 && !(m->ignore && errno == ENOENT)) - return -errno; + if (m->ignore && r == -ENOENT) + return 0; - return 0; + return r; } int setup_namespace( diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index a1d77244f..19fb086e7 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -635,7 +635,7 @@ static int mount_all(const char *dest) { return r; } -static int mount_binds(const char *dest, char **l, unsigned long flags) { +static int mount_binds(const char *dest, char **l, bool ro) { char **x, **y; STRV_FOREACH_PAIR(x, y, l) { @@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) { return -errno; } - if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) { - log_error("mount(%s) failed: %m", where); - return -errno; + if (ro) { + r = bind_remount_recursive(where, true); + if (r < 0) { + log_error("Read-Only bind mount failed: %s", strerror(-r)); + return r; + } } } @@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) { /* Turn directory into bind mount */ if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) { - log_error("Failed to make bind mount."); + log_error("Failed to make bind mount: %m"); goto child_fail; } - if (arg_read_only) - if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) { - log_error("Failed to make read-only."); + if (arg_read_only) { + k = bind_remount_recursive(arg_directory, true); + if (k < 0) { + log_error("Failed to make tree read-only: %s", strerror(-k)); goto child_fail; } + } if (mount_all(arg_directory) < 0) goto child_fail; @@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) { if (setup_journal(arg_directory) < 0) goto child_fail; - if (mount_binds(arg_directory, arg_bind, 0) < 0) + if (mount_binds(arg_directory, arg_bind, false) < 0) goto child_fail; - if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0) + if (mount_binds(arg_directory, arg_bind_ro, true) < 0) goto child_fail; if (setup_kdbus(arg_directory, kdbus_domain) < 0) diff --git a/src/shared/util.c b/src/shared/util.c index 011fb36f4..7fa3742b4 100644 --- a/src/shared/util.c +++ b/src/shared/util.c @@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) { "%*s" /* (11) mount options 2 */ "%*[^\n]", /* some rubbish at the end */ &path); - if (k != 1) { if (k == EOF) break; @@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) { return r ? r : n; } + +int bind_remount_recursive(const char *prefix, bool ro) { + _cleanup_set_free_free_ Set *done = NULL; + _cleanup_free_ char *cleaned = NULL; + int r; + + /* Recursively remount a directory (and all its submounts) + * read-only or read-write. If the directory is already + * mounted, we reuse the mount and simply mark it + * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write + * operation). If it isn't we first make it one. Afterwards we + * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all + * submounts we can access, too. When mounts are stacked on + * the same mount point we only care for each individual + * "top-level" mount on each point, as we cannot + * influence/access the underlying mounts anyway. We do not + * have any effect on future submounts that might get + * propagated, they migt be writable. This includes future + * submounts that have been triggered via autofs. */ + + cleaned = strdup(prefix); + if (!cleaned) + return -ENOMEM; + + path_kill_slashes(cleaned); + + done = set_new(string_hash_func, string_compare_func); + if (!done) + return -ENOMEM; + + for (;;) { + _cleanup_fclose_ FILE *proc_self_mountinfo = NULL; + _cleanup_set_free_free_ Set *todo = NULL; + bool top_autofs = false; + char *x; + + todo = set_new(string_hash_func, string_compare_func); + if (!todo) + return -ENOMEM; + + proc_self_mountinfo = fopen("/proc/self/mountinfo", "re"); + if (!proc_self_mountinfo) + return -errno; + + for (;;) { + _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL; + int k; + + k = fscanf(proc_self_mountinfo, + "%*s " /* (1) mount id */ + "%*s " /* (2) parent id */ + "%*s " /* (3) major:minor */ + "%*s " /* (4) root */ + "%ms " /* (5) mount point */ + "%*s" /* (6) mount options (superblock) */ + "%*[^-]" /* (7) optional fields */ + "- " /* (8) separator */ + "%ms " /* (9) file system type */ + "%*s" /* (10) mount source */ + "%*s" /* (11) mount options (bind mount) */ + "%*[^\n]", /* some rubbish at the end */ + &path, + &type); + if (k != 2) { + if (k == EOF) + break; + + continue; + } + + p = cunescape(path); + if (!p) + return -ENOMEM; + + /* Let's ignore autofs mounts. If they aren't + * triggered yet, we want to avoid triggering + * them, as we don't make any guarantees for + * future submounts anyway. If they are + * already triggered, then we will find + * another entry for this. */ + if (streq(type, "autofs")) { + top_autofs = top_autofs || path_equal(cleaned, p); + continue; + } + + if (path_startswith(p, cleaned) && + !set_contains(done, p)) { + + r = set_consume(todo, p); + p = NULL; + + if (r == -EEXIST) + continue; + if (r < 0) + return r; + } + } + + /* If we have no submounts to process anymore and if + * the root is either already done, or an autofs, we + * are done */ + if (set_isempty(todo) && + (top_autofs || set_contains(done, cleaned))) + return 0; + + if (!set_contains(done, cleaned) && + !set_contains(todo, cleaned)) { + /* The prefix directory itself is not yet a + * mount, make it one. */ + if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0) + return -errno; + + if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) + return -errno; + + x = strdup(cleaned); + if (!x) + return -ENOMEM; + + r = set_consume(done, x); + if (r < 0) + return r; + } + + while ((x = set_steal_first(todo))) { + + r = set_consume(done, x); + if (r == -EEXIST) + continue; + if (r < 0) + return r; + + if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) { + + /* Deal with mount points that are + * obstructed by a later mount */ + + if (errno != ENOENT) + return -errno; + } + + } + } +} diff --git a/src/shared/util.h b/src/shared/util.h index ac851fa4d..952239e7e 100644 --- a/src/shared/util.h +++ b/src/shared/util.h @@ -944,3 +944,5 @@ union file_handle_union { int update_reboot_param_file(const char *param); int umount_recursive(const char *target, int flags); + +int bind_remount_recursive(const char *prefix, bool ro); -- 2.30.2