chiark / gitweb /
namespace: rework namespace support
authorLennart Poettering <lennart@poettering.net>
Mon, 13 Aug 2012 13:27:04 +0000 (15:27 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 13 Aug 2012 13:27:04 +0000 (15:27 +0200)
- don't use pivot_root() anymore, just reuse root hierarchy
- first create all mounts, then mark them read-only so that we get the
  right behaviour when people want writable mounts inside of
  read-only mounts
- don't pass invalid combinations of MS_ constants to the kernel

man/systemd.exec.xml
src/core/execute.c
src/core/namespace.c
src/core/switch-root.c
src/shared/util.c
src/test/test-ns.c

index e1193d2d55c64288cf106209bb430218035e984b..cf6ab1778e7b5334cb7303dc4ab3940b6313c81b 100644 (file)
                                 <option>shared</option>,
                                 <option>slave</option> or
                                 <option>private</option>, which
-                                control whether namespaces set up with
-                                <varname>ReadWriteDirectories=</varname>,
-                                <varname>ReadOnlyDirectories=</varname>
-                                and
-                                <varname>InaccessibleDirectories=</varname>
-                                receive or propagate new mounts
-                                from/to the main namespace. See
+                                control whether the file system
+                                namespace set up for this unit's
+                                processes will receive or propagate
+                                new mounts. See
                                 <citerefentry><refentrytitle>mount</refentrytitle><manvolnum>1</manvolnum></citerefentry>
-                                for details. Defaults to
-                                <option>shared</option>, i.e. the new
-                                namespace will both receive new mount
-                                points from the main namespace as well
-                                as propagate new mounts to
-                                it.</para></listitem>
+                                for details. Default to
+                                <option>shared</option>.</para></listitem>
                         </varlistentry>
 
                         <varlistentry>
index fc0edc6cfd7387b4c92f66515497c4d9853cb4fa..6e2b5e48a01c7be317ba292392dd0316656a17fa 100644 (file)
@@ -1304,7 +1304,7 @@ int exec_spawn(ExecCommand *command,
                 if (strv_length(context->read_write_dirs) > 0 ||
                     strv_length(context->read_only_dirs) > 0 ||
                     strv_length(context->inaccessible_dirs) > 0 ||
-                    context->mount_flags != MS_SHARED ||
+                    context->mount_flags != 0 ||
                     context->private_tmp) {
                         err = setup_namespace(context->read_write_dirs,
                                               context->read_only_dirs,
@@ -1540,7 +1540,6 @@ void exec_context_init(ExecContext *c) {
         c->cpu_sched_policy = SCHED_OTHER;
         c->syslog_priority = LOG_DAEMON|LOG_INFO;
         c->syslog_level_prefix = true;
-        c->mount_flags = MS_SHARED;
         c->control_group_persistent = -1;
         c->ignore_sigpipe = true;
         c->timer_slack_nsec = (nsec_t) -1;
index ce10c790742c92bd35caf07bea1532d2b14479eb..5c2a24653c72708d87f90a26e72ced8534c55060 100644 (file)
@@ -41,13 +41,15 @@ typedef enum PathMode {
         /* This is ordered by priority! */
         INACCESSIBLE,
         READONLY,
-        PRIVATE,
+        PRIVATE_TMP,
+        PRIVATE_VAR_TMP,
         READWRITE
 } PathMode;
 
 typedef struct Path {
         const char *path;
         PathMode mode;
+        bool done;
 } Path;
 
 static int append_paths(Path **p, char **strv, PathMode mode) {
@@ -91,25 +93,22 @@ static int path_compare(const void *a, const void *b) {
         return 0;
 }
 
-static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible, bool *need_private) {
+static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible) {
         Path *f, *t, *previous;
 
         assert(p);
         assert(n);
         assert(need_inaccessible);
-        assert(need_private);
 
         for (f = p, t = p, previous = NULL; f < p+*n; f++) {
 
+                /* The first one wins */
                 if (previous && path_equal(f->path, previous->path))
                         continue;
 
                 t->path = f->path;
                 t->mode = f->mode;
 
-                if (t->mode == PRIVATE)
-                        *need_private = true;
-
                 if (t->mode == INACCESSIBLE)
                         *need_inaccessible = true;
 
@@ -121,65 +120,62 @@ static void drop_duplicates(Path *p, unsigned *n, bool *need_inaccessible, bool
         *n = t - p;
 }
 
-static int apply_mount(Path *p, const char *root_dir, const char *inaccessible_dir, const char *private_dir, unsigned long flags) {
+static int apply_mount(
+                Path *p,
+                const char *tmp_dir,
+                const char *var_tmp_dir,
+                const char *inaccessible_dir) {
+
         const char *what;
-        char *where;
         int r;
 
         assert(p);
-        assert(root_dir);
-        assert(inaccessible_dir);
-        assert(private_dir);
-
-        where = strappend(root_dir, p->path);
-        if (!where)
-                return -ENOMEM;
 
         switch (p->mode) {
 
         case INACCESSIBLE:
                 what = inaccessible_dir;
-                flags |= MS_RDONLY;
                 break;
 
         case READONLY:
-                flags |= MS_RDONLY;
-                /* Fall through */
-
         case READWRITE:
                 what = p->path;
                 break;
 
-        case PRIVATE:
-                what = private_dir;
+        case PRIVATE_TMP:
+                what = tmp_dir;
+                break;
+
+        case PRIVATE_VAR_TMP:
+                what = var_tmp_dir;
                 break;
 
         default:
                 assert_not_reached("Unknown mode");
         }
 
-        r = mount(what, where, NULL, MS_BIND|MS_REC, NULL);
-        if (r >= 0) {
-                log_debug("Successfully mounted %s to %s", what, where);
+        assert(what);
 
-                /* The bind mount will always inherit the original
-                 * flags. If we want to set any flag we need
-                 * to do so in a second independent step. */
-                if (flags)
-                        r = mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|MS_REC|flags, NULL);
+        r = mount(what, p->path, NULL, MS_BIND, NULL);
+        if (r >= 0)
+                log_debug("Successfully mounted %s to %s", what, p->path);
 
-                /* Avoid exponential growth of trees */
-                if (r >= 0 && path_equal(p->path, "/"))
-                        r = mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL);
+        return r;
+}
 
-                if (r < 0) {
-                        r = -errno;
-                        umount2(where, MNT_DETACH);
-                }
-        }
+static int make_read_only(Path *p) {
+        int r;
 
-        free(where);
-        return r;
+        assert(p);
+
+        if (p->mode != INACCESSIBLE && p->mode != READONLY)
+                return 0;
+
+        r = mount(NULL, p->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
+        if (r < 0)
+                return -errno;
+
+        return 0;
 }
 
 int setup_namespace(
@@ -190,30 +186,26 @@ int setup_namespace(
                 unsigned long flags) {
 
         char
-                tmp_dir[] = "/tmp/systemd-namespace-XXXXXX",
-                root_dir[] = "/tmp/systemd-namespace-XXXXXX/root",
-                old_root_dir[] = "/tmp/systemd-namespace-XXXXXX/root/tmp/old-root-XXXXXX",
-                inaccessible_dir[] = "/tmp/systemd-namespace-XXXXXX/inaccessible",
-                private_dir[] = "/tmp/systemd-namespace-XXXXXX/private";
+                tmp_dir[] = "/tmp/systemd-private-XXXXXX",
+                var_tmp_dir[] = "/var/tmp/systemd-private-XXXXXX",
+                inaccessible_dir[] = "/tmp/systemd-inaccessible-XXXXXX";
 
         Path *paths, *p;
         unsigned n;
-        bool need_private = false, need_inaccessible = false;
-        bool remove_tmp = false, remove_root = false, remove_old_root = false, remove_inaccessible = false, remove_private = false;
+        bool need_inaccessible = false;
+        bool remove_tmp = false, remove_var_tmp = false, remove_inaccessible = false;
         int r;
-        const char *t;
+
+        if (!flags)
+                flags = MS_SHARED;
 
         n =
                 strv_length(writable) +
                 strv_length(readable) +
                 strv_length(inaccessible) +
-                (private_tmp ? 3 : 1);
-
-        paths = new(Path, n);
-        if (!paths)
-                return -ENOMEM;
+                (private_tmp ? 2 : 0);
 
-        p = paths;
+        p = paths = alloca(sizeof(Path) * n);
         if ((r = append_paths(&p, writable, READWRITE)) < 0 ||
             (r = append_paths(&p, readable, READONLY)) < 0 ||
             (r = append_paths(&p, inaccessible, INACCESSIBLE)) < 0)
@@ -221,60 +213,70 @@ int setup_namespace(
 
         if (private_tmp) {
                 p->path = "/tmp";
-                p->mode = PRIVATE;
+                p->mode = PRIVATE_TMP;
                 p++;
 
                 p->path = "/var/tmp";
-                p->mode = PRIVATE;
+                p->mode = PRIVATE_VAR_TMP;
                 p++;
         }
 
-        p->path = "/";
-        p->mode = READWRITE;
-        p++;
-
         assert(paths + n == p);
 
         qsort(paths, n, sizeof(Path), path_compare);
-        drop_duplicates(paths, &n, &need_inaccessible, &need_private);
+        drop_duplicates(paths, &n, &need_inaccessible);
 
-        if (!mkdtemp(tmp_dir)) {
-                r = -errno;
-                goto fail;
-        }
-        remove_tmp = true;
+        if (need_inaccessible) {
+                mode_t u;
+                char *d;
 
-        memcpy(root_dir, tmp_dir, sizeof(tmp_dir)-1);
-        if (mkdir(root_dir, 0777) < 0) {
-                r = -errno;
-                goto fail;
-        }
-        remove_root = true;
+                u = umask(0777);
+                d = mkdtemp(inaccessible_dir);
+                umask(u);
 
-        if (need_inaccessible) {
-                memcpy(inaccessible_dir, tmp_dir, sizeof(tmp_dir)-1);
-                if (mkdir(inaccessible_dir, 0) < 0) {
+                if (!d) {
                         r = -errno;
                         goto fail;
                 }
+
                 remove_inaccessible = true;
         }
 
-        if (need_private) {
+        if (private_tmp) {
                 mode_t u;
-
-                memcpy(private_dir, tmp_dir, sizeof(tmp_dir)-1);
+                char *d;
 
                 u = umask(0000);
-                if (mkdir(private_dir, 0777 + S_ISVTX) < 0) {
-                        umask(u);
+                d = mkdtemp(tmp_dir);
+                umask(u);
 
+                if (!d) {
                         r = -errno;
                         goto fail;
                 }
 
+                remove_tmp = true;
+
+                u = umask(0000);
+                d = mkdtemp(var_tmp_dir);
                 umask(u);
-                remove_private = true;
+
+                if (!d) {
+                        r = -errno;
+                        goto fail;
+                }
+
+                remove_var_tmp = true;
+
+                if (chmod(tmp_dir, 0777 + S_ISVTX) < 0) {
+                        r = -errno;
+                        goto fail;
+                }
+
+                if (chmod(var_tmp_dir, 0777 + S_ISVTX) < 0) {
+                        r = -errno;
+                        goto fail;
+                }
         }
 
         if (unshare(CLONE_NEWNS) < 0) {
@@ -282,7 +284,7 @@ int setup_namespace(
                 goto fail;
         }
 
-        /* Remount / as SLAVE so that nothing mounted in the namespace
+        /* Remount / as SLAVE so that nothing now mounted in the namespace
            shows up in the parent */
         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
                 r = -errno;
@@ -290,69 +292,39 @@ int setup_namespace(
         }
 
         for (p = paths; p < paths + n; p++) {
-                r = apply_mount(p, root_dir, inaccessible_dir, private_dir, flags);
+                r = apply_mount(p, tmp_dir, var_tmp_dir, inaccessible_dir);
                 if (r < 0)
                         goto undo_mounts;
         }
 
-        memcpy(old_root_dir, tmp_dir, sizeof(tmp_dir)-1);
-        if (!mkdtemp(old_root_dir)) {
-                r = -errno;
-                goto undo_mounts;
-        }
-        remove_old_root = true;
-
-        if (chdir(root_dir) < 0) {
-                r = -errno;
-                goto undo_mounts;
+        for (p = paths; p < paths + n; p++) {
+                r = make_read_only(p);
+                if (r < 0)
+                        goto undo_mounts;
         }
 
-        if (pivot_root(root_dir, old_root_dir) < 0) {
+        /* Remount / as the desired mode */
+        if (mount(NULL, "/", NULL, flags|MS_REC, NULL) < 0) {
                 r = -errno;
                 goto undo_mounts;
         }
 
-        free(paths);
-
-        t = old_root_dir + sizeof(root_dir) - 1;
-        if (umount2(t, MNT_DETACH) < 0)
-                /* At this point it's too late to turn anything back,
-                 * since we are already in the new root. */
-                return -errno;
-
-        if (rmdir(t) < 0)
-                return -errno;
-
         return 0;
 
 undo_mounts:
-
-        for (p--; p >= paths; p--) {
-                char full_path[PATH_MAX];
-
-                snprintf(full_path, sizeof(full_path), "%s%s", root_dir, p->path);
-                char_array_0(full_path);
-
-                umount2(full_path, MNT_DETACH);
-        }
+        for (p = paths; p < paths + n; p++)
+                if (p->done)
+                        umount2(p->path, MNT_DETACH);
 
 fail:
-        if (remove_old_root)
-                rmdir(old_root_dir);
-
         if (remove_inaccessible)
                 rmdir(inaccessible_dir);
 
-        if (remove_private)
-                rmdir(private_dir);
-
-        if (remove_root)
-                rmdir(root_dir);
-
         if (remove_tmp)
                 rmdir(tmp_dir);
 
-        free(paths);
+        if (remove_var_tmp)
+                rmdir(var_tmp_dir);
 
         return r;
 }
index efc7d345e1a83ca28a861b69eda9b0ee92cac281..150332a8587b841e8bd6a767fe6c8ce50393dbd3 100644 (file)
@@ -115,6 +115,12 @@ int switch_root(const char *new_root) {
                 goto fail;
         }
 
+        if (chdir("/") < 0) {
+                r = -errno;
+                log_error("Failed to change directory: %m");
+                goto fail;
+        }
+
         if (old_root_fd >= 0) {
                 struct stat rb;
 
index 946b7d53f95c1144b44705a4082a4e458ed24443..e615195af580711d4c919ad4461325612c6e9f80 100644 (file)
@@ -3011,7 +3011,8 @@ unsigned long long random_ull(void) {
         uint64_t ull;
         ssize_t r;
 
-        if ((fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY)) < 0)
+        fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC|O_NOCTTY);
+        if (fd < 0)
                 goto fallback;
 
         r = loop_read(fd, &ull, sizeof(ull), true);
index 102b005880d57a7069513d71d2705a3cfd53cce0..b1c759fc20add3ff7ac0ea59d1aa5b2e7ddd0316 100644 (file)
@@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
                 NULL
         };
 
-        const char * const readable[] = {
+        const char * const readonly[] = {
                 "/",
                 "/usr",
                 "/boot",
@@ -48,7 +48,8 @@ int main(int argc, char *argv[]) {
 
         int r;
 
-        if ((r = setup_namespace((char**) writable, (char**) readable, (char**) inaccessible, true, MS_SHARED)) < 0) {
+        r = setup_namespace((char**) writable, (char**) readonly, (char**) inaccessible, true, 0);
+        if (r < 0) {
                 log_error("Failed to setup namespace: %s", strerror(-r));
                 return 1;
         }