chiark / gitweb /
nspawn: let's avoid using goto to wildly for non-cleanup purposes
[elogind.git] / src / nspawn / nspawn.c
index 26ac1bf7df2369cce29bfdc2d0bf0573923b7fc5..fd61d07761fc9353852b6ca3ca7bee79a5878f28 100644 (file)
@@ -88,6 +88,8 @@
 #include "blkid-util.h"
 #include "gpt.h"
 #include "siphash24.h"
+#include "copy.h"
+#include "base-filesystem.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -145,6 +147,7 @@ static uint64_t arg_retain =
         (1ULL << CAP_MKNOD);
 static char **arg_bind = NULL;
 static char **arg_bind_ro = NULL;
+static char **arg_tmpfs = NULL;
 static char **arg_setenv = NULL;
 static bool arg_quiet = false;
 static bool arg_share_system = false;
@@ -199,6 +202,7 @@ static int help(void) {
                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
                "                            the container\n"
                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
+               "     --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
                "     --share-system         Share system namespaces with host\n"
                "     --register=BOOLEAN     Register container as machine\n"
@@ -221,6 +225,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_LINK_JOURNAL,
                 ARG_BIND,
                 ARG_BIND_RO,
+                ARG_TMPFS,
                 ARG_SETENV,
                 ARG_SHARE_SYSTEM,
                 ARG_REGISTER,
@@ -246,6 +251,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
                 { "bind",                  required_argument, NULL, ARG_BIND              },
                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
+                { "tmpfs",                 required_argument, NULL, ARG_TMPFS             },
                 { "machine",               required_argument, NULL, 'M'                   },
                 { "slice",                 required_argument, NULL, 'S'                   },
                 { "setenv",                required_argument, NULL, ARG_SETENV            },
@@ -468,6 +474,42 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_TMPFS: {
+                        _cleanup_free_ char *a = NULL, *b = NULL;
+                        char *e;
+
+                        e = strchr(optarg, ':');
+                        if (e) {
+                                a = strndup(optarg, e - optarg);
+                                b = strdup(e + 1);
+                        } else {
+                                a = strdup(optarg);
+                                b = strdup("mode=0755");
+                        }
+
+                        if (!a || !b)
+                                return log_oom();
+
+                        if (!path_is_absolute(a)) {
+                                log_error("Invalid tmpfs specification: %s", optarg);
+                                return -EINVAL;
+                        }
+
+                        r = strv_push(&arg_tmpfs, a);
+                        if (r < 0)
+                                return log_oom();
+
+                        a = NULL;
+
+                        r = strv_push(&arg_tmpfs, b);
+                        if (r < 0)
+                                return log_oom();
+
+                        b = NULL;
+
+                        break;
+                }
+
                 case ARG_SETENV: {
                         char **n;
 
@@ -560,17 +602,17 @@ static int mount_all(const char *dest) {
         } MountPoint;
 
         static const MountPoint mount_table[] = {
-                { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
-                { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
-                { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
-                { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
-                { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
+                { "proc",      "/proc",     "proc",  NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV,           true  },
+                { "/proc/sys", "/proc/sys", NULL,    NULL,        MS_BIND,                                true  },   /* Bind mount first */
+                { NULL,        "/proc/sys", NULL,    NULL,        MS_BIND|MS_RDONLY|MS_REMOUNT,           true  },   /* Then, make it r/o */
+                { "sysfs",     "/sys",      "sysfs", NULL,        MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
+                { "tmpfs",     "/dev",      "tmpfs", "mode=755",  MS_NOSUID|MS_STRICTATIME,               true  },
                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
-                { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
-                { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
+                { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,      true  },
+                { "tmpfs",     "/run",      "tmpfs", "mode=755",  MS_NOSUID|MS_NODEV|MS_STRICTATIME,      true  },
 #ifdef HAVE_SELINUX
-                { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
-                { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
+                { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                              false },  /* Bind mount first */
+                { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT,         false },  /* Then, make it r/o */
 #endif
         };
 
@@ -639,7 +681,7 @@ static int mount_binds(const char *dest, char **l, bool ro) {
         char **x, **y;
 
         STRV_FOREACH_PAIR(x, y, l) {
-                char *where;
+                _cleanup_free_ char *where = NULL;
                 struct stat source_st, dest_st;
                 int r;
 
@@ -648,12 +690,14 @@ static int mount_binds(const char *dest, char **l, bool ro) {
                         return -errno;
                 }
 
-                where = strappenda(dest, *y);
+                where = strappend(dest, *y);
+                if (!where)
+                        return log_oom();
+
                 r = stat(where, &dest_st);
                 if (r == 0) {
                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
-                                log_error("The file types of %s and %s do not match. Refusing bind mount",
-                                                *x, where);
+                                log_error("The file types of %s and %s do not match. Refusing bind mount", *x, where);
                                 return -EINVAL;
                         }
                 } else if (errno == ENOENT) {
@@ -666,6 +710,7 @@ static int mount_binds(const char *dest, char **l, bool ro) {
                         log_error("Failed to bind mount %s: %m", *x);
                         return -errno;
                 }
+
                 /* Create the mount point, but be conservative -- refuse to create block
                 * and char devices. */
                 if (S_ISDIR(source_st.st_mode))
@@ -698,6 +743,27 @@ static int mount_binds(const char *dest, char **l, bool ro) {
         return 0;
 }
 
+static int mount_tmpfs(const char *dest) {
+        char **i, **o;
+
+        STRV_FOREACH_PAIR(i, o, arg_tmpfs) {
+                _cleanup_free_ char *where = NULL;
+
+                where = strappend(dest, *i);
+                if (!where)
+                        return log_oom();
+
+                mkdir_label(where, 0755);
+
+                if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, *o) < 0) {
+                        log_error("tmpfs mount to %s failed: %m", where);
+                        return -errno;
+                }
+        }
+
+        return 0;
+}
+
 static int setup_timezone(const char *dest) {
         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
         char *z, *y;
@@ -759,7 +825,7 @@ static int setup_timezone(const char *dest) {
 }
 
 static int setup_resolv_conf(const char *dest) {
-        char _cleanup_free_ *where = NULL;
+        _cleanup_free_ char *where = NULL;
 
         assert(dest);
 
@@ -773,7 +839,7 @@ static int setup_resolv_conf(const char *dest) {
 
         /* We don't really care for the results of this really. If it
          * fails, it fails, but meh... */
-        copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
+        copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
 
         return 0;
 }
@@ -2579,20 +2645,31 @@ static int change_uid_gid(char **_home) {
 }
 
 /*
- * Return 0 in case the container is being rebooted, has been shut
- * down or exited successfully. On failures a negative value is
- * returned.
+ * Return values:
+ * < 0 : wait_for_terminate() failed to get the state of the
+ *       container, the container was terminated by a signal, or
+ *       failed for an unknown reason.  No change is made to the
+ *       container argument.
+ * > 0 : The program executed in the container terminated with an
+ *       error.  The exit code of the program executed in the
+ *       container is returned.  No change is made to the container
+ *       argument.
+ *   0 : The container is being rebooted, has been shut down or exited
+ *       successfully.  The container argument has been set to either
+ *       CONTAINER_TERMINATED or CONTAINER_REBOOTED.
  *
- * The status of the container "CONTAINER_TERMINATED" or
- * "CONTAINER_REBOOTED" will be saved in the container argument
+ * That is, success is indicated by a return value of zero, and an
+ * error is indicated by a non-zero value.
  */
 static int wait_for_container(pid_t pid, ContainerStatus *container) {
         int r;
         siginfo_t status;
 
         r = wait_for_terminate(pid, &status);
-        if (r < 0)
+        if (r < 0) {
+                log_warning("Failed to wait for container: %s", strerror(-r));
                 return r;
+        }
 
         switch (status.si_code) {
         case CLD_EXITED:
@@ -2606,7 +2683,6 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
                 } else {
                         log_error("Container %s failed with error code %i.",
                                   arg_machine, status.si_status);
-                        r = -1;
                 }
                 break;
 
@@ -2734,7 +2810,7 @@ int main(int argc, char *argv[]) {
 
                 if (arg_boot) {
                         if (path_is_os_tree(arg_directory) <= 0) {
-                                log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
+                                log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
                                 goto finish;
                         }
                 } else {
@@ -2943,6 +3019,12 @@ int main(int argc, char *argv[]) {
                                           srv_device, srv_device_rw) < 0)
                                 goto child_fail;
 
+                        r = base_filesystem_create(arg_directory);
+                        if (r < 0) {
+                                log_error("Failed to create the base filesystem: %s", strerror(-r));
+                                goto child_fail;
+                        }
+
                         /* Turn directory into bind mount */
                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
                                 log_error("Failed to make bind mount: %m");
@@ -2997,6 +3079,9 @@ int main(int argc, char *argv[]) {
                         if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
                                 goto child_fail;
 
+                        if (mount_tmpfs(arg_directory) < 0)
+                                goto child_fail;
+
                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
                                 goto child_fail;
 
@@ -3162,72 +3247,75 @@ int main(int argc, char *argv[]) {
                  * join its cgroup which might limit what it can do */
                 r = eventfd_child_succeeded(eventfds[1]);
                 eventfds[1] = safe_close(eventfds[1]);
-                if (r < 0)
-                        goto check_container_status;
 
-                r = register_machine(pid);
-                if (r < 0)
-                        goto finish;
+                if (r >= 0) {
+                        r = register_machine(pid);
+                        if (r < 0)
+                                goto finish;
 
-                r = move_network_interfaces(pid);
-                if (r < 0)
-                        goto finish;
+                        r = move_network_interfaces(pid);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_veth(pid, veth_name);
-                if (r < 0)
-                        goto finish;
+                        r = setup_veth(pid, veth_name);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_bridge(veth_name);
-                if (r < 0)
-                        goto finish;
+                        r = setup_bridge(veth_name);
+                        if (r < 0)
+                                goto finish;
 
-                r = setup_macvlan(pid);
-                if (r < 0)
-                        goto finish;
+                        r = setup_macvlan(pid);
+                        if (r < 0)
+                                goto finish;
 
-                /* Block SIGCHLD here, before notifying child.
-                 * process_pty() will handle it with the other signals. */
-                r = sigprocmask(SIG_BLOCK, &mask_chld, NULL);
-                if (r < 0)
-                        goto finish;
+                        /* Block SIGCHLD here, before notifying child.
+                         * process_pty() will handle it with the other signals. */
+                        r = sigprocmask(SIG_BLOCK, &mask_chld, NULL);
+                        if (r < 0)
+                                goto finish;
 
-                /* Reset signal to default */
-                r = default_signals(SIGCHLD, -1);
-                if (r < 0)
-                        goto finish;
+                        /* Reset signal to default */
+                        r = default_signals(SIGCHLD, -1);
+                        if (r < 0)
+                                goto finish;
 
-                /* Notify the child that the parent is ready with all
-                 * its setup, and that the child can now hand over
-                 * control to the code to run inside the container. */
-                r = eventfd_send_state(eventfds[0],
-                                       EVENTFD_PARENT_SUCCEEDED);
-                eventfds[0] = safe_close(eventfds[0]);
-                if (r < 0)
-                        goto finish;
+                        /* Notify the child that the parent is ready with all
+                         * its setup, and that the child can now hand over
+                         * control to the code to run inside the container. */
+                        r = eventfd_send_state(eventfds[0], EVENTFD_PARENT_SUCCEEDED);
+                        eventfds[0] = safe_close(eventfds[0]);
+                        if (r < 0)
+                                goto finish;
 
-                k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
-                if (k < 0) {
-                        r = EXIT_FAILURE;
-                        break;
-                }
+                        k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
+                        if (k < 0) {
+                                r = EXIT_FAILURE;
+                                break;
+                        }
 
-                if (!arg_quiet)
-                        putc('\n', stdout);
+                        if (!arg_quiet)
+                                putc('\n', stdout);
 
-                /* Kill if it is not dead yet anyway */
-                terminate_machine(pid);
+                        /* Kill if it is not dead yet anyway */
+                        terminate_machine(pid);
+                }
 
-check_container_status:
-                /* Redundant, but better safe than sorry */
+                /* Normally redundant, but better safe than sorry */
                 kill(pid, SIGKILL);
 
                 r = wait_for_container(pid, &container_status);
                 pid = 0;
 
                 if (r < 0) {
+                        /* We failed to wait for the container, or the
+                         * container exited abnormally */
                         r = EXIT_FAILURE;
                         break;
-                } else if (container_status == CONTAINER_TERMINATED)
+                } else if (r > 0 || container_status == CONTAINER_TERMINATED)
+                        /* The container exited with a non-zero
+                         * status, or with zero status and no reboot
+                         * was requested. */
                         break;
 
                 /* CONTAINER_REBOOTED, loop again */
@@ -3247,6 +3335,7 @@ finish:
         strv_free(arg_network_macvlan);
         strv_free(arg_bind);
         strv_free(arg_bind_ro);
+        strv_free(arg_tmpfs);
 
         return r;
 }