chiark / gitweb /
nspawn: fix reboot event fd reuse
[elogind.git] / src / nspawn / nspawn.c
index b3ca10ea9116c67168e904592f55e532d194e1a3..5352b95ec69776b20c103ac6a1a4736dfe870fa8 100644 (file)
@@ -40,6 +40,7 @@
 #include <sys/un.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
+#include <sys/eventfd.h>
 
 #include "sd-daemon.h"
 #include "sd-bus.h"
 #include "bus-error.h"
 #include "ptyfwd.h"
 #include "bus-kernel.h"
-
-#ifndef TTY_GID
-#define TTY_GID 5
-#endif
+#include "env-util.h"
+#include "def.h"
 
 typedef enum LinkJournal {
         LINK_NO,
@@ -108,9 +107,11 @@ static uint64_t arg_retain =
         (1ULL << CAP_SYS_RESOURCE) |
         (1ULL << CAP_SYS_BOOT) |
         (1ULL << CAP_AUDIT_WRITE) |
-        (1ULL << CAP_AUDIT_CONTROL);
+        (1ULL << CAP_AUDIT_CONTROL) |
+        (1ULL << CAP_MKNOD);
 static char **arg_bind = NULL;
 static char **arg_bind_ro = NULL;
+static char **arg_setenv = NULL;
 
 static int help(void) {
 
@@ -133,7 +134,8 @@ static int help(void) {
                "  -j                       Equivalent to --link-journal=host\n"
                "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
                "                           the container\n"
-               "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n",
+               "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
+               "     --setenv=NAME=VALUE   Pass an environment variable to PID 1\n",
                program_invocation_short_name);
 
         return 0;
@@ -150,7 +152,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_DROP_CAPABILITY,
                 ARG_LINK_JOURNAL,
                 ARG_BIND,
-                ARG_BIND_RO
+                ARG_BIND_RO,
+                ARG_SETENV,
         };
 
         static const struct option options[] = {
@@ -169,6 +172,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
                 { "machine",         required_argument, NULL, 'M'                 },
                 { "slice",           required_argument, NULL, 'S'                 },
+                { "setenv",          required_argument, NULL, ARG_SETENV          },
                 {}
         };
 
@@ -333,6 +337,23 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_SETENV: {
+                        char **n;
+
+                        if (!env_assignment_is_valid(optarg)) {
+                                log_error("Environment variable assignment '%s' is not valid.", optarg);
+                                return -EINVAL;
+                        }
+
+                        n = strv_env_set(arg_setenv, optarg);
+                        if (!n)
+                                return log_oom();
+
+                        strv_free(arg_setenv);
+                        arg_setenv = n;
+                        break;
+                }
+
                 case '?':
                         return -EINVAL;
 
@@ -616,40 +637,30 @@ static int copy_devnodes(const char *dest) {
         u = umask(0000);
 
         NULSTR_FOREACH(d, devnodes) {
-                struct stat st;
                 _cleanup_free_ char *from = NULL, *to = NULL;
+                struct stat st;
 
-                asprintf(&from, "/dev/%s", d);
-                asprintf(&to, "%s/dev/%s", dest, d);
-
-                if (!from || !to) {
-                        log_oom();
-
-                        if (r == 0)
-                                r = -ENOMEM;
-
-                        break;
-                }
+                from = strappend("/dev/", d);
+                to = strjoin(dest, "/dev/", d, NULL);
+                if (!from || !to)
+                        return log_oom();
 
                 if (stat(from, &st) < 0) {
 
                         if (errno != ENOENT) {
                                 log_error("Failed to stat %s: %m", from);
-                                if (r == 0)
-                                        r = -errno;
+                                return -errno;
                         }
 
                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 
                         log_error("%s is not a char or block device, cannot copy", from);
-                        if (r == 0)
-                                r = -EIO;
+                        return -EIO;
 
                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
 
                         log_error("mknod(%s) failed: %m", dest);
-                        if (r == 0)
-                                r = -errno;
+                        return  -errno;
                 }
         }
 
@@ -804,14 +815,11 @@ static int setup_hostname(void) {
 }
 
 static int setup_journal(const char *directory) {
-        sd_id128_t machine_id;
+        sd_id128_t machine_id, this_id;
         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
         char *id;
         int r;
 
-        if (arg_link_journal == LINK_NO)
-                return 0;
-
         p = strappend(directory, "/etc/machine-id");
         if (!p)
                 return log_oom();
@@ -835,6 +843,24 @@ static int setup_journal(const char *directory) {
                 return r;
         }
 
+        r = sd_id128_get_machine(&this_id);
+        if (r < 0) {
+                log_error("Failed to retrieve machine ID: %s", strerror(-r));
+                return r;
+        }
+
+        if (sd_id128_equal(machine_id, this_id)) {
+                log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
+                         "Host and machine ids are equal (%s): refusing to link journals", id);
+                if (arg_link_journal == LINK_AUTO)
+                        return 0;
+                return
+                        -EEXIST;
+        }
+
+        if (arg_link_journal == LINK_NO)
+                return 0;
+
         free(p);
         p = strappend("/var/log/journal/", id);
         q = strjoin(directory, "/var/log/journal/", id, NULL);
@@ -959,7 +985,7 @@ static int drop_capabilities(void) {
         return capability_bounding_set_drop(~arg_retain, false);
 }
 
-static int register_machine(void) {
+static int register_machine(pid_t pid) {
         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
         _cleanup_bus_unref_ sd_bus *bus = NULL;
         int r;
@@ -983,7 +1009,7 @@ static int register_machine(void) {
                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
                         "nspawn",
                         "container",
-                        (uint32_t) 0,
+                        (uint32_t) pid,
                         strempty(arg_directory),
                         !isempty(arg_slice), "Slice", "s", arg_slice);
         if (r < 0) {
@@ -1060,7 +1086,7 @@ static bool audit_enabled(void) {
 int main(int argc, char *argv[]) {
         pid_t pid = 0;
         int r = EXIT_FAILURE, k;
-        _cleanup_close_ int master = -1, kdbus_fd = -1;
+        _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
         int n_fd_passed;
         const char *console = NULL;
         sigset_t mask;
@@ -1176,7 +1202,7 @@ int main(int argc, char *argv[]) {
                 log_debug("Successfully created kdbus namespace as %s", kdbus_namespace);
 
         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
-                log_error("Failed to create kmsg socket pair.");
+                log_error("Failed to create kmsg socket pair: %m");
                 goto finish;
         }
 
@@ -1189,6 +1215,12 @@ int main(int argc, char *argv[]) {
         for (;;) {
                 siginfo_t status;
 
+                sync_fd = eventfd(0, EFD_CLOEXEC);
+                if (sync_fd < 0) {
+                        log_error("Failed to create event fd: %m");
+                        goto finish;
+                }
+
                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
                 if (pid < 0) {
                         if (errno == EINVAL)
@@ -1206,7 +1238,7 @@ int main(int argc, char *argv[]) {
                         gid_t gid = (gid_t) -1;
                         unsigned n_env = 2;
                         const char *envp[] = {
-                                "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+                                "PATH=" DEFAULT_PATH_SPLIT_USR,
                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
                                 NULL, /* TERM */
                                 NULL, /* HOME */
@@ -1217,6 +1249,8 @@ int main(int argc, char *argv[]) {
                                 NULL, /* LISTEN_PID */
                                 NULL
                         };
+                        char **env_use;
+                        eventfd_t x;
 
                         envp[n_env] = strv_find_prefix(environ, "TERM=");
                         if (envp[n_env])
@@ -1264,10 +1298,6 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
-                        r = register_machine();
-                        if (r < 0)
-                                goto finish;
-
                         /* Mark everything as slave, so that we still
                          * receive mounts from the real root, but don't
                          * propagate mounts to the real root. */
@@ -1444,6 +1474,23 @@ int main(int argc, char *argv[]) {
 
                         setup_hostname();
 
+                        eventfd_read(sync_fd, &x);
+                        close_nointr_nofail(sync_fd);
+                        sync_fd = -1;
+
+                        if (!strv_isempty(arg_setenv)) {
+                                char **n;
+
+                                n = strv_env_merge(2, envp, arg_setenv);
+                                if (!n) {
+                                        log_oom();
+                                        goto child_fail;
+                                }
+
+                                env_use = n;
+                        } else
+                                env_use = (char**) envp;
+
                         if (arg_boot) {
                                 char **a;
                                 size_t l;
@@ -1455,18 +1502,18 @@ int main(int argc, char *argv[]) {
                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
 
                                 a[0] = (char*) "/usr/lib/systemd/systemd";
-                                execve(a[0], a, (char**) envp);
+                                execve(a[0], a, env_use);
 
                                 a[0] = (char*) "/lib/systemd/systemd";
-                                execve(a[0], a, (char**) envp);
+                                execve(a[0], a, env_use);
 
                                 a[0] = (char*) "/sbin/init";
-                                execve(a[0], a, (char**) envp);
+                                execve(a[0], a, env_use);
                         } else if (argc > optind)
-                                execvpe(argv[optind], argv + optind, (char**) envp);
+                                execvpe(argv[optind], argv + optind, env_use);
                         else {
                                 chdir(home ? home : "/root");
-                                execle("/bin/bash", "-bash", NULL, (char**) envp);
+                                execle("/bin/bash", "-bash", NULL, env_use);
                         }
 
                         log_error("execv() failed: %m");
@@ -1478,6 +1525,14 @@ int main(int argc, char *argv[]) {
                 fdset_free(fds);
                 fds = NULL;
 
+                r = register_machine(pid);
+                if (r < 0)
+                        goto finish;
+
+                eventfd_write(sync_fd, 1);
+                close_nointr_nofail(sync_fd);
+                sync_fd = -1;
+
                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
                 if (k < 0) {
                         r = EXIT_FAILURE;
@@ -1537,6 +1592,7 @@ finish:
 
         free(arg_directory);
         free(arg_machine);
+        free(arg_setenv);
 
         return r;
 }