chiark / gitweb /
nspawn: Add try-{host,guest} journal link modes
[elogind.git] / src / nspawn / nspawn.c
index da4c116f363368888ad42360f228842b8105fa17..b4dcf39e83fe924760ae307126bde8b50e448989 100644 (file)
@@ -89,6 +89,7 @@
 #include "copy.h"
 #include "base-filesystem.h"
 #include "barrier.h"
+#include "event-util.h"
 
 #ifdef HAVE_SECCOMP
 #include "seccomp-util.h"
@@ -123,6 +124,7 @@ static bool arg_private_network = false;
 static bool arg_read_only = false;
 static bool arg_boot = false;
 static LinkJournal arg_link_journal = LINK_AUTO;
+static bool arg_link_journal_try = false;
 static uint64_t arg_retain =
         (1ULL << CAP_CHOWN) |
         (1ULL << CAP_DAC_OVERRIDE) |
@@ -201,8 +203,9 @@ static void help(void) {
                "     --capability=CAP       In addition to the default, retain specified\n"
                "                            capability\n"
                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
-               "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
-               "  -j                        Equivalent to --link-journal=host\n"
+               "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host,\n"
+               "                            try-guest, try-host\n"
+               "  -j                        Equivalent to --link-journal=try-guest\n"
                "     --read-only            Mount the root directory read-only\n"
                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
                "                            the container\n"
@@ -427,6 +430,7 @@ static int parse_argv(int argc, char *argv[]) {
 
                 case 'j':
                         arg_link_journal = LINK_GUEST;
+                        arg_link_journal_try = true;
                         break;
 
                 case ARG_LINK_JOURNAL:
@@ -438,7 +442,13 @@ static int parse_argv(int argc, char *argv[]) {
                                 arg_link_journal = LINK_GUEST;
                         else if (streq(optarg, "host"))
                                 arg_link_journal = LINK_HOST;
-                        else {
+                        else if (streq(optarg, "try-guest")) {
+                                arg_link_journal = LINK_GUEST;
+                                arg_link_journal_try = true;
+                        } else if (streq(optarg, "try-host")) {
+                                arg_link_journal = LINK_HOST;
+                                arg_link_journal_try = true;
+                        } else {
                                 log_error("Failed to parse link journal mode %s", optarg);
                                 return -EINVAL;
                         }
@@ -758,7 +768,7 @@ static int mount_binds(const char *dest, char **l, bool ro) {
                  * and char devices. */
                 if (S_ISDIR(source_st.st_mode)) {
                         r = mkdir_label(where, 0755);
-                        if (r < 0) {
+                        if (r < 0 && errno != EEXIST) {
                                 log_error("Failed to create mount point %s: %s", where, strerror(-r));
 
                                 return r;
@@ -1289,7 +1299,7 @@ static int setup_hostname(void) {
         if (arg_share_system)
                 return 0;
 
-        if (sethostname(arg_machine, strlen(arg_machine)) < 0)
+        if (sethostname_idempotent(arg_machine) < 0)
                 return -errno;
 
         return 0;
@@ -1403,8 +1413,13 @@ static int setup_journal(const char *directory) {
         if (arg_link_journal == LINK_GUEST) {
 
                 if (symlink(q, p) < 0) {
-                        log_error("Failed to symlink %s to %s: %m", q, p);
-                        return -errno;
+                        if (arg_link_journal_try) {
+                                log_debug("Failed to symlink %s to %s, skipping journal setup: %m", q, p);
+                                return 0;
+                        } else {
+                                log_error("Failed to symlink %s to %s: %m", q, p);
+                                return -errno;
+                        }
                 }
 
                 r = mkdir_p(q, 0755);
@@ -1414,10 +1429,17 @@ static int setup_journal(const char *directory) {
         }
 
         if (arg_link_journal == LINK_HOST) {
-                r = mkdir_p(p, 0755);
+                /* don't create parents here -- if the host doesn't have
+                 * permanent journal set up, don't force it here */
+                r = mkdir(p, 0755);
                 if (r < 0) {
-                        log_error("Failed to create %s: %m", p);
-                        return r;
+                        if (arg_link_journal_try) {
+                                log_debug("Failed to create %s, skipping journal setup: %m", p);
+                                return 0;
+                        } else {
+                                log_error("Failed to create %s: %m", p);
+                                return r;
+                        }
                 }
 
         } else if (access(p, F_OK) < 0)
@@ -1440,26 +1462,6 @@ static int setup_journal(const char *directory) {
         return 0;
 }
 
-static int setup_kdbus(const char *dest, const char *path) {
-        const char *p;
-
-        if (!path)
-                return 0;
-
-        p = strappenda(dest, "/dev/kdbus");
-        if (mkdir(p, 0755) < 0) {
-                log_error("Failed to create kdbus path: %m");
-                return  -errno;
-        }
-
-        if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
-                log_error("Failed to mount kdbus domain path: %m");
-                return -errno;
-        }
-
-        return 0;
-}
-
 static int drop_capabilities(void) {
         return capability_bounding_set_drop(~arg_retain, false);
 }
@@ -1545,7 +1547,7 @@ static int register_machine(pid_t pid, int local_ifindex) {
                         return r;
                 }
 
-                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 10,
+                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
                                           /* Allow the container to
                                            * access and create the API
                                            * device nodes, so that
@@ -1558,24 +1560,14 @@ static int register_machine(pid_t pid, int local_ifindex) {
                                           "/dev/random", "rwm",
                                           "/dev/urandom", "rwm",
                                           "/dev/tty", "rwm",
+                                          "/dev/net/tun", "rwm",
                                           /* Allow the container
                                            * access to ptys. However,
                                            * do not permit the
                                            * container to ever create
                                            * these device nodes. */
                                           "/dev/pts/ptmx", "rw",
-                                          "char-pts", "rw",
-                                          /* Allow the container
-                                           * access to all kdbus
-                                           * devices. Again, the
-                                           * container cannot create
-                                           * these nodes, only use
-                                           * them. We use a pretty
-                                           * open match here, so that
-                                           * the kernel API can still
-                                           * change. */
-                                          "char-kdbus", "rw",
-                                          "char-kdbus/*", "rw");
+                                          "char-pts", "rw");
                 if (r < 0) {
                         log_error("Failed to add device whitelist: %s", strerror(-r));
                         return r;
@@ -2910,8 +2902,8 @@ static int change_uid_gid(char **_home) {
  *       container argument.
  * > 0 : The program executed in the container terminated with an
  *       error.  The exit code of the program executed in the
- *       container is returned.  No change is made to the container
- *       argument.
+ *       container is returned.  The container argument has been set
+ *       to CONTAINER_TERMINATED.
  *   0 : The container is being rebooted, has been shut down or exited
  *       successfully.  The container argument has been set to either
  *       CONTAINER_TERMINATED or CONTAINER_REBOOTED.
@@ -2920,8 +2912,8 @@ static int change_uid_gid(char **_home) {
  * error is indicated by a non-zero value.
  */
 static int wait_for_container(pid_t pid, ContainerStatus *container) {
-        int r;
         siginfo_t status;
+        int r;
 
         r = wait_for_terminate(pid, &status);
         if (r < 0) {
@@ -2930,51 +2922,40 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
         }
 
         switch (status.si_code) {
+
         case CLD_EXITED:
-                r = status.si_status;
-                if (r == 0) {
-                        if (!arg_quiet)
-                                log_debug("Container %s exited successfully.",
-                                          arg_machine);
+                if (status.si_status == 0) {
+                        log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s exited successfully.", arg_machine);
 
-                        *container = CONTAINER_TERMINATED;
-                } else {
-                        log_error("Container %s failed with error code %i.",
-                                  arg_machine, status.si_status);
-                }
-                break;
+                } else
+                        log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s failed with error code %i.", arg_machine, status.si_status);
+
+                *container = CONTAINER_TERMINATED;
+                return status.si_status;
 
         case CLD_KILLED:
                 if (status.si_status == SIGINT) {
-                        if (!arg_quiet)
-                                log_info("Container %s has been shut down.",
-                                         arg_machine);
 
+                        log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s has been shut down.", arg_machine);
                         *container = CONTAINER_TERMINATED;
-                        r = 0;
-                        break;
+                        return 0;
+
                 } else if (status.si_status == SIGHUP) {
-                        if (!arg_quiet)
-                                log_info("Container %s is being rebooted.",
-                                         arg_machine);
 
+                        log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s is being rebooted.", arg_machine);
                         *container = CONTAINER_REBOOTED;
-                        r = 0;
-                        break;
+                        return 0;
                 }
+
                 /* CLD_KILLED fallthrough */
 
         case CLD_DUMPED:
-                log_error("Container %s terminated by signal %s.",
-                          arg_machine, signal_to_string(status.si_status));
-                r = -1;
-                break;
+                log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
+                return -EIO;
 
         default:
-                log_error("Container %s failed due to unknown reason.",
-                          arg_machine);
-                r = -1;
-                break;
+                log_error("Container %s failed due to unknown reason.", arg_machine);
+                return -EIO;
         }
 
         return r;
@@ -2982,11 +2963,27 @@ static int wait_for_container(pid_t pid, ContainerStatus *container) {
 
 static void nop_handler(int sig) {}
 
+static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+        pid_t pid;
+
+        pid = PTR_TO_UINT32(userdata);
+        if (pid > 0) {
+                if (kill(pid, SIGRTMIN+3) >= 0) {
+                        log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
+                        sd_event_source_set_userdata(s, NULL);
+                        return 0;
+                }
+        }
+
+        sd_event_exit(sd_event_source_get_event(s), 0);
+        return 0;
+}
+
 int main(int argc, char *argv[]) {
 
-        _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
+        _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
         bool root_device_rw = true, home_device_rw = true, srv_device_rw = true;
-        _cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
+        _cleanup_close_ int master = -1, image_fd = -1;
         _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 };
         _cleanup_fdset_free_ FDSet *fds = NULL;
         int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
@@ -3133,26 +3130,6 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (access("/dev/kdbus/control", F_OK) >= 0) {
-
-                if (arg_share_system) {
-                        kdbus_domain = strdup("/dev/kdbus");
-                        if (!kdbus_domain) {
-                                log_oom();
-                                goto finish;
-                        }
-                } else {
-                        const char *ns;
-
-                        ns = strappenda("machine-", arg_machine);
-                        kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
-                        if (r < 0)
-                                log_debug("Failed to create kdbus domain: %s", strerror(-r));
-                        else
-                                log_debug("Successfully created kdbus domain as %s", kdbus_domain);
-                }
-        }
-
         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
                 log_error("Failed to create kmsg socket pair: %m");
                 goto finish;
@@ -3163,11 +3140,12 @@ int main(int argc, char *argv[]) {
                   "STATUS=Container running.");
 
         assert_se(sigemptyset(&mask) == 0);
-        assert_se(sigemptyset(&mask_chld) == 0);
-        sigaddset(&mask_chld, SIGCHLD);
         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
 
+        assert_se(sigemptyset(&mask_chld) == 0);
+        assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
+
         for (;;) {
                 ContainerStatus container_status;
                 _cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
@@ -3357,13 +3335,10 @@ int main(int argc, char *argv[]) {
                         if (mount_tmpfs(arg_directory) < 0)
                                 _exit(EXIT_FAILURE);
 
-                        if (setup_kdbus(arg_directory, kdbus_domain) < 0)
-                                _exit(EXIT_FAILURE);
-
                         /* Tell the parent that we are ready, and that
                          * it can cgroupify us to that we lack access
                          * to certain devices and resources. */
-                        barrier_place(&barrier);
+                        (void)barrier_place(&barrier);
 
                         if (chdir(arg_directory) < 0) {
                                 log_error("chdir(%s) failed: %m", arg_directory);
@@ -3504,6 +3479,8 @@ int main(int argc, char *argv[]) {
 
                 /* wait for child-setup to be done */
                 if (barrier_place_and_sync(&barrier)) {
+                        _cleanup_event_unref_ sd_event *event = NULL;
+                        _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
                         int ifi = 0;
 
                         r = move_network_interfaces(pid);
@@ -3540,14 +3517,41 @@ int main(int argc, char *argv[]) {
                         /* Notify the child that the parent is ready with all
                          * its setup, and that the child can now hand over
                          * control to the code to run inside the container. */
-                        barrier_place(&barrier);
+                        (void)barrier_place(&barrier);
 
-                        k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
-                        if (k < 0) {
-                                r = EXIT_FAILURE;
-                                break;
+                        r = sd_event_new(&event);
+                        if (r < 0) {
+                                log_error("Failed to get default event source: %s", strerror(-r));
+                                goto finish;
+                        }
+
+                        if (arg_boot) {
+                                /* Try to kill the init system on SIGINT or SIGTERM */
+                                sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid));
+                                sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid));
+                        } else {
+                                /* Immediately exit */
+                                sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+                                sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+                        }
+
+                        /* simply exit on sigchld */
+                        sd_event_add_signal(event, NULL, SIGCHLD, NULL, NULL);
+
+                        r = pty_forward_new(event, master, &forward);
+                        if (r < 0) {
+                                log_error("Failed to create PTY forwarder: %s", strerror(-r));
+                                goto finish;
                         }
 
+                        r = sd_event_loop(event);
+                        if (r < 0) {
+                                log_error("Failed to run event loop: %s", strerror(-r));
+                                return r;
+                        }
+
+                        forward = pty_forward_free(forward);
+
                         if (!arg_quiet)
                                 putc('\n', stdout);