chiark / gitweb /
namespace: beef up read-only bind mount logic
authorLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 09:42:25 +0000 (11:42 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 12:37:40 +0000 (14:37 +0200)
Instead of blindly creating another bind mount for read-only mounts,
check if there's already one we can use, and if so, use it. Also,
recursively mark all submounts read-only too. Also, ignore autofs mounts
when remounting read-only unless they are already triggered.

man/systemd.exec.xml
src/core/namespace.c
src/nspawn/nspawn.c
src/shared/util.c
src/shared/util.h

index c5bb55c556b3abc065917dc8890cba3694113e88..c419424d9d6d0603587dc71b8b8e31baabe8bbeb 100644 (file)
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
 
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
 
-                                <listitem><para>Sets up a new
-                                file system namespace for executed
+                                <listitem><para>Sets up a new file
+                                system namespace for executed
                                 processes. These options may be used
                                 to limit access a process might have
                                 to the main file system
                                 processes. These options may be used
                                 to limit access a process might have
                                 to the main file system
                                 processes inside the namespace. Note
                                 that restricting access with these
                                 options does not extend to submounts
                                 processes inside the namespace. Note
                                 that restricting access with these
                                 options does not extend to submounts
-                                of a directory. You must list
-                                submounts separately in these settings
-                                to ensure the same limited
-                                access. These options may be specified
+                                of a directory that are created later
+                                on. These options may be specified
                                 more than once in which case all
                                 directories listed will have limited
                                 access from within the namespace. If
                                 the empty string is assigned to this
                                 more than once in which case all
                                 directories listed will have limited
                                 access from within the namespace. If
                                 the empty string is assigned to this
-                                option, the specific list is reset, and
-                                all prior assignments have no
+                                option, the specific list is reset,
+                                and all prior assignments have no
                                 effect.</para>
                                 <para>Paths in
                                 <varname>ReadOnlyDirectories=</varname>
                                 effect.</para>
                                 <para>Paths in
                                 <varname>ReadOnlyDirectories=</varname>
index 43b9045800cb175a9b5dce74f52c0e10b89bad33..f11065ee4bdade83d78373d7f4bb09cfda15723a 100644 (file)
@@ -280,9 +280,6 @@ static int apply_mount(
 
         switch (m->mode) {
 
 
         switch (m->mode) {
 
-        case PRIVATE_DEV:
-                return mount_dev(m);
-
         case INACCESSIBLE:
 
                 /* First, get rid of everything that is below if there
         case INACCESSIBLE:
 
                 /* First, get rid of everything that is below if there
@@ -295,8 +292,9 @@ static int apply_mount(
 
         case READONLY:
         case READWRITE:
 
         case READONLY:
         case READWRITE:
-                what = m->path;
-                break;
+                /* Nothing to mount here, we just later toggle the
+                 * MS_RDONLY bit for the mount point */
+                return 0;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
@@ -306,6 +304,9 @@ static int apply_mount(
                 what = var_tmp_dir;
                 break;
 
                 what = var_tmp_dir;
                 break;
 
+        case PRIVATE_DEV:
+                return mount_dev(m);
+
         default:
                 assert_not_reached("Unknown mode");
         }
         default:
                 assert_not_reached("Unknown mode");
         }
@@ -316,7 +317,7 @@ static int apply_mount(
         if (r >= 0)
                 log_debug("Successfully mounted %s to %s", what, m->path);
         else if (m->ignore && errno == ENOENT)
         if (r >= 0)
                 log_debug("Successfully mounted %s to %s", what, m->path);
         else if (m->ignore && errno == ENOENT)
-                r = 0;
+                return 0;
 
         return r;
 }
 
         return r;
 }
@@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) {
 
         assert(m);
 
 
         assert(m);
 
-        if (m->mode != INACCESSIBLE && m->mode != READONLY)
-                return 0;
+        if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+                r = bind_remount_recursive(m->path, true);
+        else if (m->mode == READWRITE)
+                r = bind_remount_recursive(m->path, false);
+        else
+                r = 0;
 
 
-        r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
-        if (r < 0 && !(m->ignore && errno == ENOENT))
-                return -errno;
+        if (m->ignore && r == -ENOENT)
+                return 0;
 
 
-        return 0;
+        return r;
 }
 
 int setup_namespace(
 }
 
 int setup_namespace(
index a1d77244f8c38edab93f1139cff95a3af6355f1a..19fb086e7ab8ef2c9a67278f5329d17f10c80dd8 100644 (file)
@@ -635,7 +635,7 @@ static int mount_all(const char *dest) {
         return r;
 }
 
         return r;
 }
 
-static int mount_binds(const char *dest, char **l, unsigned long flags) {
+static int mount_binds(const char *dest, char **l, bool ro) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
@@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
                         return -errno;
                 }
 
                         return -errno;
                 }
 
-                if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
-                        log_error("mount(%s) failed: %m", where);
-                        return -errno;
+                if (ro) {
+                        r = bind_remount_recursive(where, true);
+                        if (r < 0) {
+                                log_error("Read-Only bind mount failed: %s", strerror(-r));
+                                return r;
+                        }
                 }
         }
 
                 }
         }
 
@@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) {
 
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
 
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
-                                log_error("Failed to make bind mount.");
+                                log_error("Failed to make bind mount: %m");
                                 goto child_fail;
                         }
 
                                 goto child_fail;
                         }
 
-                        if (arg_read_only)
-                                if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
-                                        log_error("Failed to make read-only.");
+                        if (arg_read_only) {
+                                k = bind_remount_recursive(arg_directory, true);
+                                if (k < 0) {
+                                        log_error("Failed to make tree read-only: %s", strerror(-k));
                                         goto child_fail;
                                 }
                                         goto child_fail;
                                 }
+                        }
 
                         if (mount_all(arg_directory) < 0)
                                 goto child_fail;
 
                         if (mount_all(arg_directory) < 0)
                                 goto child_fail;
@@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) {
                         if (setup_journal(arg_directory) < 0)
                                 goto child_fail;
 
                         if (setup_journal(arg_directory) < 0)
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind, 0) < 0)
+                        if (mount_binds(arg_directory, arg_bind, false) < 0)
                                 goto child_fail;
 
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
+                        if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
                                 goto child_fail;
 
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
                                 goto child_fail;
 
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
index 011fb36f498116ef0821273075d4de76484169dc..7fa3742b4afa69f7e83e067d83c166bcee75ced2 100644 (file)
@@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) {
                                    "%*s"        /* (11) mount options 2 */
                                    "%*[^\n]",   /* some rubbish at the end */
                                    &path);
                                    "%*s"        /* (11) mount options 2 */
                                    "%*[^\n]",   /* some rubbish at the end */
                                    &path);
-
                         if (k != 1) {
                                 if (k == EOF)
                                         break;
                         if (k != 1) {
                                 if (k == EOF)
                                         break;
@@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) {
 
         return r ? r : n;
 }
 
         return r ? r : n;
 }
+
+int bind_remount_recursive(const char *prefix, bool ro) {
+        _cleanup_set_free_free_ Set *done = NULL;
+        _cleanup_free_ char *cleaned = NULL;
+        int r;
+
+        /* Recursively remount a directory (and all its submounts)
+         * read-only or read-write. If the directory is already
+         * mounted, we reuse the mount and simply mark it
+         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+         * operation). If it isn't we first make it one. Afterwards we
+         * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
+         * submounts we can access, too. When mounts are stacked on
+         * the same mount point we only care for each individual
+         * "top-level" mount on each point, as we cannot
+         * influence/access the underlying mounts anyway. We do not
+         * have any effect on future submounts that might get
+         * propagated, they migt be writable. This includes future
+         * submounts that have been triggered via autofs. */
+
+        cleaned = strdup(prefix);
+        if (!cleaned)
+                return -ENOMEM;
+
+        path_kill_slashes(cleaned);
+
+        done = set_new(string_hash_func, string_compare_func);
+        if (!done)
+                return -ENOMEM;
+
+        for (;;) {
+                _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+                _cleanup_set_free_free_ Set *todo = NULL;
+                bool top_autofs = false;
+                char *x;
+
+                todo = set_new(string_hash_func, string_compare_func);
+                if (!todo)
+                        return -ENOMEM;
+
+                proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+                if (!proc_self_mountinfo)
+                        return -errno;
+
+                for (;;) {
+                        _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+                        int k;
+
+                        k = fscanf(proc_self_mountinfo,
+                                   "%*s "       /* (1) mount id */
+                                   "%*s "       /* (2) parent id */
+                                   "%*s "       /* (3) major:minor */
+                                   "%*s "       /* (4) root */
+                                   "%ms "       /* (5) mount point */
+                                   "%*s"        /* (6) mount options (superblock) */
+                                   "%*[^-]"     /* (7) optional fields */
+                                   "- "         /* (8) separator */
+                                   "%ms "       /* (9) file system type */
+                                   "%*s"        /* (10) mount source */
+                                   "%*s"        /* (11) mount options (bind mount) */
+                                   "%*[^\n]",   /* some rubbish at the end */
+                                   &path,
+                                   &type);
+                        if (k != 2) {
+                                if (k == EOF)
+                                        break;
+
+                                continue;
+                        }
+
+                        p = cunescape(path);
+                        if (!p)
+                                return -ENOMEM;
+
+                        /* Let's ignore autofs mounts.  If they aren't
+                         * triggered yet, we want to avoid triggering
+                         * them, as we don't make any guarantees for
+                         * future submounts anyway.  If they are
+                         * already triggered, then we will find
+                         * another entry for this. */
+                        if (streq(type, "autofs")) {
+                                top_autofs = top_autofs || path_equal(cleaned, p);
+                                continue;
+                        }
+
+                        if (path_startswith(p, cleaned) &&
+                            !set_contains(done, p)) {
+
+                                r = set_consume(todo, p);
+                                p = NULL;
+
+                                if (r == -EEXIST)
+                                        continue;
+                                if (r < 0)
+                                        return r;
+                        }
+                }
+
+                /* If we have no submounts to process anymore and if
+                 * the root is either already done, or an autofs, we
+                 * are done */
+                if (set_isempty(todo) &&
+                    (top_autofs || set_contains(done, cleaned)))
+                        return 0;
+
+                if (!set_contains(done, cleaned) &&
+                    !set_contains(todo, cleaned)) {
+                        /* The prefix directory itself is not yet a
+                         * mount, make it one. */
+                        if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+                                return -errno;
+
+                        if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+                                return -errno;
+
+                        x = strdup(cleaned);
+                        if (!x)
+                                return -ENOMEM;
+
+                        r = set_consume(done, x);
+                        if (r < 0)
+                                return r;
+                }
+
+                while ((x = set_steal_first(todo))) {
+
+                        r = set_consume(done, x);
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return r;
+
+                        if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+
+                                /* Deal with mount points that are
+                                 * obstructed by a later mount */
+
+                                if (errno != ENOENT)
+                                        return -errno;
+                        }
+
+                }
+        }
+}
index ac851fa4d6a5e7c8aab5d1f93397f7f730b0bcac..952239e7e6fe1b55fabadc13baf54aefcd2840e4 100644 (file)
@@ -944,3 +944,5 @@ union file_handle_union {
 int update_reboot_param_file(const char *param);
 
 int umount_recursive(const char *target, int flags);
 int update_reboot_param_file(const char *param);
 
 int umount_recursive(const char *target, int flags);
+
+int bind_remount_recursive(const char *prefix, bool ro);