chiark / gitweb /
namespace: beef up read-only bind mount logic
authorLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 09:42:25 +0000 (11:42 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 12:37:40 +0000 (14:37 +0200)
Instead of blindly creating another bind mount for read-only mounts,
check if there's already one we can use, and if so, use it. Also,
recursively mark all submounts read-only too. Also, ignore autofs mounts
when remounting read-only unless they are already triggered.

man/systemd.exec.xml
src/core/namespace.c
src/nspawn/nspawn.c
src/shared/util.c
src/shared/util.h

index c5bb55c..c419424 100644 (file)
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
 
-                                <listitem><para>Sets up a new
-                                file system namespace for executed
+                                <listitem><para>Sets up a new file
+                                system namespace for executed
                                 processes. These options may be used
                                 to limit access a process might have
                                 to the main file system
                                 processes inside the namespace. Note
                                 that restricting access with these
                                 options does not extend to submounts
-                                of a directory. You must list
-                                submounts separately in these settings
-                                to ensure the same limited
-                                access. These options may be specified
+                                of a directory that are created later
+                                on. These options may be specified
                                 more than once in which case all
                                 directories listed will have limited
                                 access from within the namespace. If
                                 the empty string is assigned to this
-                                option, the specific list is reset, and
-                                all prior assignments have no
+                                option, the specific list is reset,
+                                and all prior assignments have no
                                 effect.</para>
                                 <para>Paths in
                                 <varname>ReadOnlyDirectories=</varname>
index 43b9045..f11065e 100644 (file)
@@ -280,9 +280,6 @@ static int apply_mount(
 
         switch (m->mode) {
 
-        case PRIVATE_DEV:
-                return mount_dev(m);
-
         case INACCESSIBLE:
 
                 /* First, get rid of everything that is below if there
@@ -295,8 +292,9 @@ static int apply_mount(
 
         case READONLY:
         case READWRITE:
-                what = m->path;
-                break;
+                /* Nothing to mount here, we just later toggle the
+                 * MS_RDONLY bit for the mount point */
+                return 0;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
@@ -306,6 +304,9 @@ static int apply_mount(
                 what = var_tmp_dir;
                 break;
 
+        case PRIVATE_DEV:
+                return mount_dev(m);
+
         default:
                 assert_not_reached("Unknown mode");
         }
@@ -316,7 +317,7 @@ static int apply_mount(
         if (r >= 0)
                 log_debug("Successfully mounted %s to %s", what, m->path);
         else if (m->ignore && errno == ENOENT)
-                r = 0;
+                return 0;
 
         return r;
 }
@@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) {
 
         assert(m);
 
-        if (m->mode != INACCESSIBLE && m->mode != READONLY)
-                return 0;
+        if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+                r = bind_remount_recursive(m->path, true);
+        else if (m->mode == READWRITE)
+                r = bind_remount_recursive(m->path, false);
+        else
+                r = 0;
 
-        r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
-        if (r < 0 && !(m->ignore && errno == ENOENT))
-                return -errno;
+        if (m->ignore && r == -ENOENT)
+                return 0;
 
-        return 0;
+        return r;
 }
 
 int setup_namespace(
index a1d7724..19fb086 100644 (file)
@@ -635,7 +635,7 @@ static int mount_all(const char *dest) {
         return r;
 }
 
-static int mount_binds(const char *dest, char **l, unsigned long flags) {
+static int mount_binds(const char *dest, char **l, bool ro) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
@@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
                         return -errno;
                 }
 
-                if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
-                        log_error("mount(%s) failed: %m", where);
-                        return -errno;
+                if (ro) {
+                        r = bind_remount_recursive(where, true);
+                        if (r < 0) {
+                                log_error("Read-Only bind mount failed: %s", strerror(-r));
+                                return r;
+                        }
                 }
         }
 
@@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) {
 
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
-                                log_error("Failed to make bind mount.");
+                                log_error("Failed to make bind mount: %m");
                                 goto child_fail;
                         }
 
-                        if (arg_read_only)
-                                if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
-                                        log_error("Failed to make read-only.");
+                        if (arg_read_only) {
+                                k = bind_remount_recursive(arg_directory, true);
+                                if (k < 0) {
+                                        log_error("Failed to make tree read-only: %s", strerror(-k));
                                         goto child_fail;
                                 }
+                        }
 
                         if (mount_all(arg_directory) < 0)
                                 goto child_fail;
@@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) {
                         if (setup_journal(arg_directory) < 0)
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind, 0) < 0)
+                        if (mount_binds(arg_directory, arg_bind, false) < 0)
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
+                        if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
                                 goto child_fail;
 
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
index 011fb36..7fa3742 100644 (file)
@@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) {
                                    "%*s"        /* (11) mount options 2 */
                                    "%*[^\n]",   /* some rubbish at the end */
                                    &path);
-
                         if (k != 1) {
                                 if (k == EOF)
                                         break;
@@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) {
 
         return r ? r : n;
 }
+
+int bind_remount_recursive(const char *prefix, bool ro) {
+        _cleanup_set_free_free_ Set *done = NULL;
+        _cleanup_free_ char *cleaned = NULL;
+        int r;
+
+        /* Recursively remount a directory (and all its submounts)
+         * read-only or read-write. If the directory is already
+         * mounted, we reuse the mount and simply mark it
+         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+         * operation). If it isn't we first make it one. Afterwards we
+         * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
+         * submounts we can access, too. When mounts are stacked on
+         * the same mount point we only care for each individual
+         * "top-level" mount on each point, as we cannot
+         * influence/access the underlying mounts anyway. We do not
+         * have any effect on future submounts that might get
+         * propagated, they migt be writable. This includes future
+         * submounts that have been triggered via autofs. */
+
+        cleaned = strdup(prefix);
+        if (!cleaned)
+                return -ENOMEM;
+
+        path_kill_slashes(cleaned);
+
+        done = set_new(string_hash_func, string_compare_func);
+        if (!done)
+                return -ENOMEM;
+
+        for (;;) {
+                _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+                _cleanup_set_free_free_ Set *todo = NULL;
+                bool top_autofs = false;
+                char *x;
+
+                todo = set_new(string_hash_func, string_compare_func);
+                if (!todo)
+                        return -ENOMEM;
+
+                proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+                if (!proc_self_mountinfo)
+                        return -errno;
+
+                for (;;) {
+                        _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+                        int k;
+
+                        k = fscanf(proc_self_mountinfo,
+                                   "%*s "       /* (1) mount id */
+                                   "%*s "       /* (2) parent id */
+                                   "%*s "       /* (3) major:minor */
+                                   "%*s "       /* (4) root */
+                                   "%ms "       /* (5) mount point */
+                                   "%*s"        /* (6) mount options (superblock) */
+                                   "%*[^-]"     /* (7) optional fields */
+                                   "- "         /* (8) separator */
+                                   "%ms "       /* (9) file system type */
+                                   "%*s"        /* (10) mount source */
+                                   "%*s"        /* (11) mount options (bind mount) */
+                                   "%*[^\n]",   /* some rubbish at the end */
+                                   &path,
+                                   &type);
+                        if (k != 2) {
+                                if (k == EOF)
+                                        break;
+
+                                continue;
+                        }
+
+                        p = cunescape(path);
+                        if (!p)
+                                return -ENOMEM;
+
+                        /* Let's ignore autofs mounts.  If they aren't
+                         * triggered yet, we want to avoid triggering
+                         * them, as we don't make any guarantees for
+                         * future submounts anyway.  If they are
+                         * already triggered, then we will find
+                         * another entry for this. */
+                        if (streq(type, "autofs")) {
+                                top_autofs = top_autofs || path_equal(cleaned, p);
+                                continue;
+                        }
+
+                        if (path_startswith(p, cleaned) &&
+                            !set_contains(done, p)) {
+
+                                r = set_consume(todo, p);
+                                p = NULL;
+
+                                if (r == -EEXIST)
+                                        continue;
+                                if (r < 0)
+                                        return r;
+                        }
+                }
+
+                /* If we have no submounts to process anymore and if
+                 * the root is either already done, or an autofs, we
+                 * are done */
+                if (set_isempty(todo) &&
+                    (top_autofs || set_contains(done, cleaned)))
+                        return 0;
+
+                if (!set_contains(done, cleaned) &&
+                    !set_contains(todo, cleaned)) {
+                        /* The prefix directory itself is not yet a
+                         * mount, make it one. */
+                        if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+                                return -errno;
+
+                        if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+                                return -errno;
+
+                        x = strdup(cleaned);
+                        if (!x)
+                                return -ENOMEM;
+
+                        r = set_consume(done, x);
+                        if (r < 0)
+                                return r;
+                }
+
+                while ((x = set_steal_first(todo))) {
+
+                        r = set_consume(done, x);
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return r;
+
+                        if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+
+                                /* Deal with mount points that are
+                                 * obstructed by a later mount */
+
+                                if (errno != ENOENT)
+                                        return -errno;
+                        }
+
+                }
+        }
+}
index ac851fa..952239e 100644 (file)
@@ -944,3 +944,5 @@ union file_handle_union {
 int update_reboot_param_file(const char *param);
 
 int umount_recursive(const char *target, int flags);
+
+int bind_remount_recursive(const char *prefix, bool ro);