chiark / gitweb /
nspawn: require /etc/os-release only for init
[elogind.git] / src / nspawn / nspawn.c
index a85579b9402a0c8310fd10cd8c4cb20055449c23..be8161c351917c5cb3a38defea971e4d13fd4858 100644 (file)
 #include <sys/un.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
+#include <sys/eventfd.h>
+#ifdef HAVE_SELINUX
+#include <selinux/selinux.h>
+#endif
 
 #include "sd-daemon.h"
 #include "sd-bus.h"
 #include "ptyfwd.h"
 #include "bus-kernel.h"
 #include "env-util.h"
-
-#ifndef TTY_GID
-#define TTY_GID 5
-#endif
+#include "def.h"
 
 typedef enum LinkJournal {
         LINK_NO,
@@ -79,6 +80,8 @@ static char *arg_directory = NULL;
 static char *arg_user = NULL;
 static sd_id128_t arg_uuid = {};
 static char *arg_machine = NULL;
+static char *arg_process_label = NULL;
+static char *arg_apifs_label = NULL;
 static const char *arg_slice = NULL;
 static bool arg_private_network = false;
 static bool arg_read_only = false;
@@ -109,34 +112,41 @@ static uint64_t arg_retain =
         (1ULL << CAP_SYS_RESOURCE) |
         (1ULL << CAP_SYS_BOOT) |
         (1ULL << CAP_AUDIT_WRITE) |
-        (1ULL << CAP_AUDIT_CONTROL);
+        (1ULL << CAP_AUDIT_CONTROL) |
+        (1ULL << CAP_MKNOD);
 static char **arg_bind = NULL;
 static char **arg_bind_ro = NULL;
 static char **arg_setenv = NULL;
+static bool arg_quiet = false;
 
 static int help(void) {
 
         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
-               "  -h --help                Show this help\n"
-               "     --version             Print version string\n"
-               "  -D --directory=NAME      Root directory for the container\n"
-               "  -b --boot                Boot up full system (i.e. invoke init)\n"
-               "  -u --user=USER           Run the command under specified user or uid\n"
-               "     --uuid=UUID           Set a specific machine UUID for the container\n"
-               "  -M --machine=NAME        Set the machine name for the container\n"
-               "  -S --slice=SLICE         Place the container in the specified slice\n"
-               "     --private-network     Disable network in container\n"
-               "     --read-only           Mount the root directory read-only\n"
-               "     --capability=CAP      In addition to the default, retain specified\n"
-               "                           capability\n"
-               "     --drop-capability=CAP Drop the specified capability from the default set\n"
-               "     --link-journal=MODE   Link up guest journal, one of no, auto, guest, host\n"
-               "  -j                       Equivalent to --link-journal=host\n"
-               "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
-               "                           the container\n"
-               "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
-               "     --setenv=NAME=VALUE   Pass an environment variable to PID 1\n",
+               "  -h --help                 Show this help\n"
+               "     --version              Print version string\n"
+               "  -D --directory=NAME       Root directory for the container\n"
+               "  -b --boot                 Boot up full system (i.e. invoke init)\n"
+               "  -u --user=USER            Run the command under specified user or uid\n"
+               "     --uuid=UUID            Set a specific machine UUID for the container\n"
+               "  -M --machine=NAME         Set the machine name for the container\n"
+               "  -S --slice=SLICE          Place the container in the specified slice\n"
+               "  -L --apifs-label=LABEL    Set the MAC file label to be used by API/tmpfs file\n"
+               "                            systems in the container\n"
+               "  -Z --process-label=LABEL  Set the MAC label to be used by processes in\n"
+               "                            the container\n"
+               "     --private-network      Disable network in container\n"
+               "     --read-only            Mount the root directory read-only\n"
+               "     --capability=CAP       In addition to the default, retain specified\n"
+               "                            capability\n"
+               "     --drop-capability=CAP  Drop the specified capability from the default set\n"
+               "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
+               "  -j                        Equivalent to --link-journal=host\n"
+               "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
+               "                            the container\n"
+               "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
+               "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
+               "  -q --quiet                Do not show status information\n",
                program_invocation_short_name);
 
         return 0;
@@ -174,6 +184,9 @@ static int parse_argv(int argc, char *argv[]) {
                 { "machine",         required_argument, NULL, 'M'                 },
                 { "slice",           required_argument, NULL, 'S'                 },
                 { "setenv",          required_argument, NULL, ARG_SETENV          },
+                { "process-label",   required_argument, NULL, 'Z'                 },
+                { "apifs-label",     required_argument, NULL, 'L'                 },
+                { "quiet",           no_argument,       NULL, 'q'                 },
                 {}
         };
 
@@ -182,7 +195,7 @@ static int parse_argv(int argc, char *argv[]) {
         assert(argc >= 0);
         assert(argv);
 
-        while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
+        while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
 
                 switch (c) {
 
@@ -248,6 +261,14 @@ static int parse_argv(int argc, char *argv[]) {
 
                         break;
 
+                case 'L':
+                        arg_apifs_label = optarg;
+                        break;
+
+                case 'Z':
+                        arg_process_label = optarg;
+                        break;
+
                 case ARG_READ_ONLY:
                         arg_read_only = true;
                         break;
@@ -355,6 +376,10 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case 'q':
+                        arg_quiet = true;
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -397,6 +422,10 @@ static int mount_all(const char *dest) {
 
         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
                 _cleanup_free_ char *where = NULL;
+#ifdef HAVE_SELINUX
+                _cleanup_free_ char *options = NULL;
+#endif
+                const char *o;
                 int t;
 
                 where = strjoin(dest, "/", mount_table[k].where, NULL);
@@ -419,11 +448,23 @@ static int mount_all(const char *dest) {
 
                 mkdir_p(where, 0755);
 
+#ifdef HAVE_SELINUX
+                if (arg_apifs_label && (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
+                        options = strjoin(mount_table[k].options, ",context=\"", arg_apifs_label, "\"", NULL);
+                        if (!options)
+                                return log_oom();
+
+                        o = options;
+                } else
+#endif
+                        o = mount_table[k].options;
+
+
                 if (mount(mount_table[k].what,
                           where,
                           mount_table[k].type,
                           mount_table[k].flags,
-                          mount_table[k].options) < 0 &&
+                          o) < 0 &&
                     mount_table[k].fatal) {
 
                         log_error("mount(%s) failed: %m", where);
@@ -638,40 +679,30 @@ static int copy_devnodes(const char *dest) {
         u = umask(0000);
 
         NULSTR_FOREACH(d, devnodes) {
-                struct stat st;
                 _cleanup_free_ char *from = NULL, *to = NULL;
+                struct stat st;
 
-                asprintf(&from, "/dev/%s", d);
-                asprintf(&to, "%s/dev/%s", dest, d);
-
-                if (!from || !to) {
-                        log_oom();
-
-                        if (r == 0)
-                                r = -ENOMEM;
-
-                        break;
-                }
+                from = strappend("/dev/", d);
+                to = strjoin(dest, "/dev/", d, NULL);
+                if (!from || !to)
+                        return log_oom();
 
                 if (stat(from, &st) < 0) {
 
                         if (errno != ENOENT) {
                                 log_error("Failed to stat %s: %m", from);
-                                if (r == 0)
-                                        r = -errno;
+                                return -errno;
                         }
 
                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
 
                         log_error("%s is not a char or block device, cannot copy", from);
-                        if (r == 0)
-                                r = -EIO;
+                        return -EIO;
 
                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
 
                         log_error("mknod(%s) failed: %m", dest);
-                        if (r == 0)
-                                r = -errno;
+                        return  -errno;
                 }
         }
 
@@ -985,7 +1016,7 @@ static int setup_kdbus(const char *dest, const char *path) {
         }
 
         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
-                log_error("Failed to mount kdbus namespace path: %m");
+                log_error("Failed to mount kdbus domain path: %m");
                 return -errno;
         }
 
@@ -996,12 +1027,12 @@ static int drop_capabilities(void) {
         return capability_bounding_set_drop(~arg_retain, false);
 }
 
-static int register_machine(void) {
+static int register_machine(pid_t pid) {
         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
         _cleanup_bus_unref_ sd_bus *bus = NULL;
         int r;
 
-        r = sd_bus_open_system(&bus);
+        r = sd_bus_default_system(&bus);
         if (r < 0) {
                 log_error("Failed to open system bus: %s", strerror(-r));
                 return r;
@@ -1020,7 +1051,7 @@ static int register_machine(void) {
                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
                         "nspawn",
                         "container",
-                        (uint32_t) 0,
+                        (uint32_t) pid,
                         strempty(arg_directory),
                         !isempty(arg_slice), "Slice", "s", arg_slice);
         if (r < 0) {
@@ -1097,13 +1128,13 @@ static bool audit_enabled(void) {
 int main(int argc, char *argv[]) {
         pid_t pid = 0;
         int r = EXIT_FAILURE, k;
-        _cleanup_close_ int master = -1, kdbus_fd = -1;
+        _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
         int n_fd_passed;
         const char *console = NULL;
         sigset_t mask;
         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
         _cleanup_fdset_free_ FDSet *fds = NULL;
-        _cleanup_free_ char *kdbus_namespace = NULL;
+        _cleanup_free_ char *kdbus_domain = NULL;
         const char *ns;
 
         log_parse_environment();
@@ -1169,7 +1200,7 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (path_is_os_tree(arg_directory) <= 0) {
+        if (arg_boot && path_is_os_tree(arg_directory) <= 0) {
                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
                 goto finish;
         }
@@ -1198,7 +1229,8 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
+        if (!arg_quiet)
+                log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
 
         if (unlockpt(master) < 0) {
                 log_error("Failed to unlock tty: %m");
@@ -1206,14 +1238,14 @@ int main(int argc, char *argv[]) {
         }
 
         ns = strappenda("machine-", arg_machine);
-        kdbus_fd = bus_kernel_create_namespace(ns, &kdbus_namespace);
+        kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
         if (r < 0)
-                log_debug("Failed to create kdbus namespace: %s", strerror(-r));
+                log_debug("Failed to create kdbus domain: %s", strerror(-r));
         else
-                log_debug("Successfully created kdbus namespace as %s", kdbus_namespace);
+                log_debug("Successfully created kdbus domain as %s", kdbus_domain);
 
         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
-                log_error("Failed to create kmsg socket pair.");
+                log_error("Failed to create kmsg socket pair: %m");
                 goto finish;
         }
 
@@ -1226,6 +1258,12 @@ int main(int argc, char *argv[]) {
         for (;;) {
                 siginfo_t status;
 
+                sync_fd = eventfd(0, EFD_CLOEXEC);
+                if (sync_fd < 0) {
+                        log_error("Failed to create event fd: %m");
+                        goto finish;
+                }
+
                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
                 if (pid < 0) {
                         if (errno == EINVAL)
@@ -1243,7 +1281,7 @@ int main(int argc, char *argv[]) {
                         gid_t gid = (gid_t) -1;
                         unsigned n_env = 2;
                         const char *envp[] = {
-                                "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+                                "PATH=" DEFAULT_PATH_SPLIT_USR,
                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
                                 NULL, /* TERM */
                                 NULL, /* HOME */
@@ -1255,6 +1293,7 @@ int main(int argc, char *argv[]) {
                                 NULL
                         };
                         char **env_use;
+                        eventfd_t x;
 
                         envp[n_env] = strv_find_prefix(environ, "TERM=");
                         if (envp[n_env])
@@ -1302,10 +1341,6 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
-                        r = register_machine();
-                        if (r < 0)
-                                goto finish;
-
                         /* Mark everything as slave, so that we still
                          * receive mounts from the real root, but don't
                          * propagate mounts to the real root. */
@@ -1364,7 +1399,7 @@ int main(int argc, char *argv[]) {
                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
                                 goto child_fail;
 
-                        if (setup_kdbus(arg_directory, kdbus_namespace) < 0)
+                        if (setup_kdbus(arg_directory, kdbus_domain) < 0)
                                 goto child_fail;
 
                         if (chdir(arg_directory) < 0) {
@@ -1482,6 +1517,10 @@ int main(int argc, char *argv[]) {
 
                         setup_hostname();
 
+                        eventfd_read(sync_fd, &x);
+                        close_nointr_nofail(sync_fd);
+                        sync_fd = -1;
+
                         if (!strv_isempty(arg_setenv)) {
                                 char **n;
 
@@ -1495,6 +1534,11 @@ int main(int argc, char *argv[]) {
                         } else
                                 env_use = (char**) envp;
 
+#ifdef HAVE_SELINUX
+                        if (arg_process_label)
+                                if (setexeccon(arg_process_label) < 0)
+                                        log_error("setexeccon(\"%s\") failed: %m", arg_process_label);
+#endif
                         if (arg_boot) {
                                 char **a;
                                 size_t l;
@@ -1529,13 +1573,22 @@ int main(int argc, char *argv[]) {
                 fdset_free(fds);
                 fds = NULL;
 
+                r = register_machine(pid);
+                if (r < 0)
+                        goto finish;
+
+                eventfd_write(sync_fd, 1);
+                close_nointr_nofail(sync_fd);
+                sync_fd = -1;
+
                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
                 if (k < 0) {
                         r = EXIT_FAILURE;
                         break;
                 }
 
-                putc('\n', stdout);
+                if (!arg_quiet)
+                        putc('\n', stdout);
 
                 /* Kill if it is not dead yet anyway */
                 terminate_machine(pid);
@@ -1558,16 +1611,21 @@ int main(int argc, char *argv[]) {
                                 break;
                         }
 
-                        log_debug("Container %s exited successfully.", arg_machine);
+                        if (!arg_quiet)
+                                log_debug("Container %s exited successfully.", arg_machine);
                         break;
                 } else if (status.si_code == CLD_KILLED &&
                            status.si_status == SIGINT) {
-                        log_info("Container %s has been shut down.", arg_machine);
+
+                        if (!arg_quiet)
+                                log_info("Container %s has been shut down.", arg_machine);
                         r = 0;
                         break;
                 } else if (status.si_code == CLD_KILLED &&
                            status.si_status == SIGHUP) {
-                        log_info("Container %s is being rebooted.", arg_machine);
+
+                        if (!arg_quiet)
+                                log_info("Container %s is being rebooted.", arg_machine);
                         continue;
                 } else if (status.si_code == CLD_KILLED ||
                            status.si_code == CLD_DUMPED) {