chiark / gitweb /
namespace: beef up read-only bind mount logic
authorLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 09:42:25 +0000 (11:42 +0200)
committerLennart Poettering <lennart@poettering.net>
Fri, 6 Jun 2014 12:37:40 +0000 (14:37 +0200)
Instead of blindly creating another bind mount for read-only mounts,
check if there's already one we can use, and if so, use it. Also,
recursively mark all submounts read-only too. Also, ignore autofs mounts
when remounting read-only unless they are already triggered.

man/systemd.exec.xml
src/core/namespace.c
src/nspawn/nspawn.c
src/shared/util.c
src/shared/util.h

index c5bb55c..c419424 100644 (file)
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
 
                                 <term><varname>ReadOnlyDirectories=</varname></term>
                                 <term><varname>InaccessibleDirectories=</varname></term>
 
-                                <listitem><para>Sets up a new
-                                file system namespace for executed
+                                <listitem><para>Sets up a new file
+                                system namespace for executed
                                 processes. These options may be used
                                 to limit access a process might have
                                 to the main file system
                                 processes. These options may be used
                                 to limit access a process might have
                                 to the main file system
                                 processes inside the namespace. Note
                                 that restricting access with these
                                 options does not extend to submounts
                                 processes inside the namespace. Note
                                 that restricting access with these
                                 options does not extend to submounts
-                                of a directory. You must list
-                                submounts separately in these settings
-                                to ensure the same limited
-                                access. These options may be specified
+                                of a directory that are created later
+                                on. These options may be specified
                                 more than once in which case all
                                 directories listed will have limited
                                 access from within the namespace. If
                                 the empty string is assigned to this
                                 more than once in which case all
                                 directories listed will have limited
                                 access from within the namespace. If
                                 the empty string is assigned to this
-                                option, the specific list is reset, and
-                                all prior assignments have no
+                                option, the specific list is reset,
+                                and all prior assignments have no
                                 effect.</para>
                                 <para>Paths in
                                 <varname>ReadOnlyDirectories=</varname>
                                 effect.</para>
                                 <para>Paths in
                                 <varname>ReadOnlyDirectories=</varname>
index 43b9045..f11065e 100644 (file)
@@ -280,9 +280,6 @@ static int apply_mount(
 
         switch (m->mode) {
 
 
         switch (m->mode) {
 
-        case PRIVATE_DEV:
-                return mount_dev(m);
-
         case INACCESSIBLE:
 
                 /* First, get rid of everything that is below if there
         case INACCESSIBLE:
 
                 /* First, get rid of everything that is below if there
@@ -295,8 +292,9 @@ static int apply_mount(
 
         case READONLY:
         case READWRITE:
 
         case READONLY:
         case READWRITE:
-                what = m->path;
-                break;
+                /* Nothing to mount here, we just later toggle the
+                 * MS_RDONLY bit for the mount point */
+                return 0;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
 
         case PRIVATE_TMP:
                 what = tmp_dir;
@@ -306,6 +304,9 @@ static int apply_mount(
                 what = var_tmp_dir;
                 break;
 
                 what = var_tmp_dir;
                 break;
 
+        case PRIVATE_DEV:
+                return mount_dev(m);
+
         default:
                 assert_not_reached("Unknown mode");
         }
         default:
                 assert_not_reached("Unknown mode");
         }
@@ -316,7 +317,7 @@ static int apply_mount(
         if (r >= 0)
                 log_debug("Successfully mounted %s to %s", what, m->path);
         else if (m->ignore && errno == ENOENT)
         if (r >= 0)
                 log_debug("Successfully mounted %s to %s", what, m->path);
         else if (m->ignore && errno == ENOENT)
-                r = 0;
+                return 0;
 
         return r;
 }
 
         return r;
 }
@@ -326,14 +327,17 @@ static int make_read_only(BindMount *m) {
 
         assert(m);
 
 
         assert(m);
 
-        if (m->mode != INACCESSIBLE && m->mode != READONLY)
-                return 0;
+        if (IN_SET(m->mode, INACCESSIBLE, READONLY))
+                r = bind_remount_recursive(m->path, true);
+        else if (m->mode == READWRITE)
+                r = bind_remount_recursive(m->path, false);
+        else
+                r = 0;
 
 
-        r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
-        if (r < 0 && !(m->ignore && errno == ENOENT))
-                return -errno;
+        if (m->ignore && r == -ENOENT)
+                return 0;
 
 
-        return 0;
+        return r;
 }
 
 int setup_namespace(
 }
 
 int setup_namespace(
index a1d7724..19fb086 100644 (file)
@@ -635,7 +635,7 @@ static int mount_all(const char *dest) {
         return r;
 }
 
         return r;
 }
 
-static int mount_binds(const char *dest, char **l, unsigned long flags) {
+static int mount_binds(const char *dest, char **l, bool ro) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
@@ -686,9 +686,12 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
                         return -errno;
                 }
 
                         return -errno;
                 }
 
-                if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
-                        log_error("mount(%s) failed: %m", where);
-                        return -errno;
+                if (ro) {
+                        r = bind_remount_recursive(where, true);
+                        if (r < 0) {
+                                log_error("Read-Only bind mount failed: %s", strerror(-r));
+                                return r;
+                        }
                 }
         }
 
                 }
         }
 
@@ -2941,15 +2944,17 @@ int main(int argc, char *argv[]) {
 
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
 
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
-                                log_error("Failed to make bind mount.");
+                                log_error("Failed to make bind mount: %m");
                                 goto child_fail;
                         }
 
                                 goto child_fail;
                         }
 
-                        if (arg_read_only)
-                                if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
-                                        log_error("Failed to make read-only.");
+                        if (arg_read_only) {
+                                k = bind_remount_recursive(arg_directory, true);
+                                if (k < 0) {
+                                        log_error("Failed to make tree read-only: %s", strerror(-k));
                                         goto child_fail;
                                 }
                                         goto child_fail;
                                 }
+                        }
 
                         if (mount_all(arg_directory) < 0)
                                 goto child_fail;
 
                         if (mount_all(arg_directory) < 0)
                                 goto child_fail;
@@ -2985,10 +2990,10 @@ int main(int argc, char *argv[]) {
                         if (setup_journal(arg_directory) < 0)
                                 goto child_fail;
 
                         if (setup_journal(arg_directory) < 0)
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind, 0) < 0)
+                        if (mount_binds(arg_directory, arg_bind, false) < 0)
                                 goto child_fail;
 
                                 goto child_fail;
 
-                        if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
+                        if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
                                 goto child_fail;
 
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
                                 goto child_fail;
 
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
index 011fb36..7fa3742 100644 (file)
@@ -6540,7 +6540,6 @@ int umount_recursive(const char *prefix, int flags) {
                                    "%*s"        /* (11) mount options 2 */
                                    "%*[^\n]",   /* some rubbish at the end */
                                    &path);
                                    "%*s"        /* (11) mount options 2 */
                                    "%*[^\n]",   /* some rubbish at the end */
                                    &path);
-
                         if (k != 1) {
                                 if (k == EOF)
                                         break;
                         if (k != 1) {
                                 if (k == EOF)
                                         break;
@@ -6570,3 +6569,147 @@ int umount_recursive(const char *prefix, int flags) {
 
         return r ? r : n;
 }
 
         return r ? r : n;
 }
+
+int bind_remount_recursive(const char *prefix, bool ro) {
+        _cleanup_set_free_free_ Set *done = NULL;
+        _cleanup_free_ char *cleaned = NULL;
+        int r;
+
+        /* Recursively remount a directory (and all its submounts)
+         * read-only or read-write. If the directory is already
+         * mounted, we reuse the mount and simply mark it
+         * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
+         * operation). If it isn't we first make it one. Afterwards we
+         * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
+         * submounts we can access, too. When mounts are stacked on
+         * the same mount point we only care for each individual
+         * "top-level" mount on each point, as we cannot
+         * influence/access the underlying mounts anyway. We do not
+         * have any effect on future submounts that might get
+         * propagated, they migt be writable. This includes future
+         * submounts that have been triggered via autofs. */
+
+        cleaned = strdup(prefix);
+        if (!cleaned)
+                return -ENOMEM;
+
+        path_kill_slashes(cleaned);
+
+        done = set_new(string_hash_func, string_compare_func);
+        if (!done)
+                return -ENOMEM;
+
+        for (;;) {
+                _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
+                _cleanup_set_free_free_ Set *todo = NULL;
+                bool top_autofs = false;
+                char *x;
+
+                todo = set_new(string_hash_func, string_compare_func);
+                if (!todo)
+                        return -ENOMEM;
+
+                proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
+                if (!proc_self_mountinfo)
+                        return -errno;
+
+                for (;;) {
+                        _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
+                        int k;
+
+                        k = fscanf(proc_self_mountinfo,
+                                   "%*s "       /* (1) mount id */
+                                   "%*s "       /* (2) parent id */
+                                   "%*s "       /* (3) major:minor */
+                                   "%*s "       /* (4) root */
+                                   "%ms "       /* (5) mount point */
+                                   "%*s"        /* (6) mount options (superblock) */
+                                   "%*[^-]"     /* (7) optional fields */
+                                   "- "         /* (8) separator */
+                                   "%ms "       /* (9) file system type */
+                                   "%*s"        /* (10) mount source */
+                                   "%*s"        /* (11) mount options (bind mount) */
+                                   "%*[^\n]",   /* some rubbish at the end */
+                                   &path,
+                                   &type);
+                        if (k != 2) {
+                                if (k == EOF)
+                                        break;
+
+                                continue;
+                        }
+
+                        p = cunescape(path);
+                        if (!p)
+                                return -ENOMEM;
+
+                        /* Let's ignore autofs mounts.  If they aren't
+                         * triggered yet, we want to avoid triggering
+                         * them, as we don't make any guarantees for
+                         * future submounts anyway.  If they are
+                         * already triggered, then we will find
+                         * another entry for this. */
+                        if (streq(type, "autofs")) {
+                                top_autofs = top_autofs || path_equal(cleaned, p);
+                                continue;
+                        }
+
+                        if (path_startswith(p, cleaned) &&
+                            !set_contains(done, p)) {
+
+                                r = set_consume(todo, p);
+                                p = NULL;
+
+                                if (r == -EEXIST)
+                                        continue;
+                                if (r < 0)
+                                        return r;
+                        }
+                }
+
+                /* If we have no submounts to process anymore and if
+                 * the root is either already done, or an autofs, we
+                 * are done */
+                if (set_isempty(todo) &&
+                    (top_autofs || set_contains(done, cleaned)))
+                        return 0;
+
+                if (!set_contains(done, cleaned) &&
+                    !set_contains(todo, cleaned)) {
+                        /* The prefix directory itself is not yet a
+                         * mount, make it one. */
+                        if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
+                                return -errno;
+
+                        if (mount(NULL, prefix, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
+                                return -errno;
+
+                        x = strdup(cleaned);
+                        if (!x)
+                                return -ENOMEM;
+
+                        r = set_consume(done, x);
+                        if (r < 0)
+                                return r;
+                }
+
+                while ((x = set_steal_first(todo))) {
+
+                        r = set_consume(done, x);
+                        if (r == -EEXIST)
+                                continue;
+                        if (r < 0)
+                                return r;
+
+                        if (mount(NULL, x, NULL, MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
+
+                                /* Deal with mount points that are
+                                 * obstructed by a later mount */
+
+                                if (errno != ENOENT)
+                                        return -errno;
+                        }
+
+                }
+        }
+}
index ac851fa..952239e 100644 (file)
@@ -944,3 +944,5 @@ union file_handle_union {
 int update_reboot_param_file(const char *param);
 
 int umount_recursive(const char *target, int flags);
 int update_reboot_param_file(const char *param);
 
 int umount_recursive(const char *target, int flags);
+
+int bind_remount_recursive(const char *prefix, bool ro);