chiark / gitweb /
timedated: use libsystemd-bus instead of libdbus for bus communication
[elogind.git] / src / nspawn / nspawn.c
index a58cbec38a4bd1fe8d829a7d9d7f8bf455bfdf75..ad0287dbf40ee6dc4c6dcf1f052635613ba45663 100644 (file)
 #include <linux/fs.h>
 #include <sys/un.h>
 #include <sys/socket.h>
-
-#ifdef HAVE_XATTR
-#include <attr/xattr.h>
-#endif
+#include <linux/netlink.h>
 
 #include <systemd/sd-daemon.h>
+#include <systemd/sd-bus.h>
 
 #include "log.h"
 #include "util.h"
@@ -63,6 +61,7 @@
 #include "fdset.h"
 #include "build.h"
 #include "fileio.h"
+#include "bus-util.h"
 
 #ifndef TTY_GID
 #define TTY_GID 5
@@ -77,9 +76,9 @@ typedef enum LinkJournal {
 
 static char *arg_directory = NULL;
 static char *arg_user = NULL;
-static char **arg_controllers = NULL;
-static char *arg_uuid = NULL;
+static sd_id128_t arg_uuid = {};
 static char *arg_machine = NULL;
+static const char *arg_slice = NULL;
 static bool arg_private_network = false;
 static bool arg_read_only = false;
 static bool arg_boot = false;
@@ -122,10 +121,9 @@ static int help(void) {
                "  -D --directory=NAME      Root directory for the container\n"
                "  -b --boot                Boot up full system (i.e. invoke init)\n"
                "  -u --user=USER           Run the command under specified user or uid\n"
-               "  -C --controllers=LIST    Put the container in specified comma-separated\n"
-               "                           cgroup hierarchies\n"
                "     --uuid=UUID           Set a specific machine UUID for the container\n"
                "  -M --machine=NAME        Set the machine name for the container\n"
+               "  -S --slice=SLICE         Place the container in the specified slice\n"
                "     --private-network     Disable network in container\n"
                "     --read-only           Mount the root directory read-only\n"
                "     --capability=CAP      In addition to the default, retain specified\n"
@@ -158,7 +156,6 @@ static int parse_argv(int argc, char *argv[]) {
                 { "version",         no_argument,       NULL, ARG_VERSION         },
                 { "directory",       required_argument, NULL, 'D'                 },
                 { "user",            required_argument, NULL, 'u'                 },
-                { "controllers",     required_argument, NULL, 'C'                 },
                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
                 { "boot",            no_argument,       NULL, 'b'                 },
                 { "uuid",            required_argument, NULL, ARG_UUID            },
@@ -168,15 +165,16 @@ static int parse_argv(int argc, char *argv[]) {
                 { "bind",            required_argument, NULL, ARG_BIND            },
                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
                 { "machine",         required_argument, NULL, 'M'                 },
+                { "slice",           required_argument, NULL, 'S'                 },
                 { NULL,              0,                 NULL, 0                   }
         };
 
-        int c;
+        int c, r;
 
         assert(argc >= 0);
         assert(argv);
 
-        while ((c = getopt_long(argc, argv, "+hD:u:C:bM:j", options, NULL)) >= 0) {
+        while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
 
                 switch (c) {
 
@@ -207,15 +205,6 @@ static int parse_argv(int argc, char *argv[]) {
 
                         break;
 
-                case 'C':
-                        strv_free(arg_controllers);
-                        arg_controllers = strv_split(optarg, ",");
-                        if (!arg_controllers)
-                                return log_oom();
-
-                        cg_shorten_controllers(arg_controllers);
-                        break;
-
                 case ARG_PRIVATE_NETWORK:
                         arg_private_network = true;
                         break;
@@ -225,12 +214,15 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_UUID:
-                        if (!id128_is_valid(optarg)) {
+                        r = sd_id128_from_string(optarg, &arg_uuid);
+                        if (r < 0) {
                                 log_error("Invalid UUID: %s", optarg);
-                                return -EINVAL;
+                                return r;
                         }
+                        break;
 
-                        arg_uuid = optarg;
+                case 'S':
+                        arg_slice = strdup(optarg);
                         break;
 
                 case 'M':
@@ -300,7 +292,6 @@ static int parse_argv(int argc, char *argv[]) {
                         _cleanup_free_ char *a = NULL, *b = NULL;
                         char *e;
                         char ***x;
-                        int r;
 
                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
 
@@ -419,12 +410,39 @@ static int mount_binds(const char *dest, char **l, unsigned long flags) {
 
         STRV_FOREACH_PAIR(x, y, l) {
                 _cleanup_free_ char *where = NULL;
+                struct stat source_st, dest_st;
+
+                if (stat(*x, &source_st) < 0) {
+                        log_error("failed to stat %s: %m", *x);
+                        return -errno;
+                }
 
                 where = strjoin(dest, "/", *y, NULL);
                 if (!where)
                         return log_oom();
 
-                mkdir_p_label(where, 0755);
+                if (stat(where, &dest_st) == 0) {
+                        if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
+                                log_error("The file types of %s and %s do not match. Refusing bind mount",
+                                                *x, where);
+                                return -EINVAL;
+                        }
+                } else {
+                        /* Create the mount point, but be conservative -- refuse to create block
+                         * and char devices. */
+                        if (S_ISDIR(source_st.st_mode))
+                                mkdir_p_label(where, 0755);
+                        else if (S_ISFIFO(source_st.st_mode))
+                                mkfifo(where, 0644);
+                        else if (S_ISSOCK(source_st.st_mode))
+                                mknod(where, 0644 | S_IFSOCK, 0);
+                        else if (S_ISREG(source_st.st_mode))
+                                touch(where);
+                        else {
+                                log_error("Refusing to create mountpoint for file: %s", *x);
+                                return -ENOTSUP;
+                        }
+                }
 
                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
                         log_error("mount(%s) failed: %m", where);
@@ -502,7 +520,6 @@ static int setup_timezone(const char *dest) {
 
 static int setup_resolv_conf(const char *dest) {
         char _cleanup_free_ *where = NULL;
-        _cleanup_close_ int fd = -1;
 
         assert(dest);
 
@@ -514,18 +531,9 @@ static int setup_resolv_conf(const char *dest) {
         if (!where)
                 return log_oom();
 
-        fd = open(where, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, 0644);
-
         /* We don't really care for the results of this really. If it
          * fails, it fails, but meh... */
-        if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) < 0)
-                log_warning("Failed to bind mount /etc/resolv.conf: %m");
-        else
-                if (mount("/etc/resolv.conf", where, "bind",
-                          MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
-                        log_error("Failed to remount /etc/resolv.conf readonly: %m");
-                        return -errno;
-                }
+        copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
 
         return 0;
 }
@@ -911,68 +919,6 @@ static int setup_journal(const char *directory) {
         return 0;
 }
 
-static int setup_cgroup(const char *path) {
-        char **c;
-        int r;
-
-        r = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, path, 1);
-        if (r < 0) {
-                log_error("Failed to create cgroup: %s", strerror(-r));
-                return r;
-        }
-
-        STRV_FOREACH(c, arg_controllers) {
-                r = cg_create_and_attach(*c, path, 1);
-                if (r < 0)
-                        log_warning("Failed to create cgroup in controller %s: %s", *c, strerror(-r));
-        }
-
-        return 0;
-}
-
-static int save_attributes(const char *cgroup, pid_t pid, const char *uuid, const char *directory) {
-#ifdef HAVE_XATTR
-        _cleanup_free_ char *path = NULL;
-        char buf[DECIMAL_STR_MAX(pid_t)];
-        int r = 0, k;
-
-        assert(cgroup);
-        assert(pid >= 0);
-        assert(arg_directory);
-
-        assert_se(snprintf(buf, sizeof(buf), "%lu", (unsigned long) pid) < (int) sizeof(buf));
-
-        r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup, NULL, &path);
-        if (r < 0) {
-                log_error("Failed to get path: %s", strerror(-r));
-                return r;
-        }
-
-        r = setxattr(path, "trusted.init_pid", buf, strlen(buf), XATTR_CREATE);
-        if (r < 0)
-                log_warning("Failed to set %s attribute on %s: %m", "trusted.init_pid", path);
-
-        if (uuid) {
-                k = setxattr(path, "trusted.machine_id", uuid, strlen(uuid), XATTR_CREATE);
-                if (k < 0) {
-                        log_warning("Failed to set %s attribute on %s: %m", "trusted.machine_id", path);
-                        if (r == 0)
-                                r = k;
-                }
-        }
-
-        k = setxattr(path, "trusted.root_directory", directory, strlen(directory), XATTR_CREATE);
-        if (k < 0) {
-                log_warning("Failed to set %s attribute on %s: %m", "trusted.root_directory", path);
-                if (r == 0)
-                        r = k;
-        }
-        return r;
-#else
-        return 0;
-#endif
-}
-
 static int drop_capabilities(void) {
         return capability_bounding_set_drop(~arg_retain, false);
 }
@@ -1219,10 +1165,55 @@ finish:
         return r;
 }
 
+static int register_machine(void) {
+        _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+        _cleanup_bus_unref_ sd_bus *bus = NULL;
+        int r;
+
+        r = sd_bus_open_system(&bus);
+        if (r < 0) {
+                log_error("Failed to open system bus: %s", strerror(-r));
+                return r;
+        }
+
+        r = sd_bus_call_method(
+                        bus,
+                        "org.freedesktop.machine1",
+                        "/org/freedesktop/machine1",
+                        "org.freedesktop.machine1.Manager",
+                        "CreateMachine",
+                        &error,
+                        NULL,
+                        "sayssusa(sv)",
+                        arg_machine,
+                        SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
+                        "nspawn",
+                        "container",
+                        (uint32_t) 0,
+                        strempty(arg_directory),
+                        1, "Slice", "s", strempty(arg_slice));
+        if (r < 0) {
+                log_error("Failed to register machine: %s", error.message ? error.message : strerror(-r));
+                return r;
+        }
+
+        return 0;
+}
+
+static bool audit_enabled(void) {
+        int fd;
+
+        fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+        if (fd >= 0) {
+                close_nointr_nofail(fd);
+                return true;
+        }
+        return false;
+}
+
 int main(int argc, char *argv[]) {
         pid_t pid = 0;
         int r = EXIT_FAILURE, k;
-        _cleanup_free_ char *newcg = NULL;
         _cleanup_close_ int master = -1;
         int n_fd_passed;
         const char *console = NULL;
@@ -1231,7 +1222,7 @@ int main(int argc, char *argv[]) {
         bool saved_attr_valid = false;
         struct winsize ws;
         int kmsg_socket_pair[2] = { -1, -1 };
-        FDSet *fds = NULL;
+        _cleanup_fdset_free_ FDSet *fds = NULL;
 
         log_parse_environment();
         log_open();
@@ -1267,7 +1258,7 @@ int main(int argc, char *argv[]) {
                         goto finish;
                 }
 
-                hostname_cleanup(arg_machine);
+                hostname_cleanup(arg_machine, false);
                 if (isempty(arg_machine)) {
                         log_error("Failed to determine machine name automatically, please use -M.");
                         goto finish;
@@ -1284,6 +1275,13 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        if (arg_boot && audit_enabled()) {
+                log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
+                            "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
+                            "line before using systemd-nspawn. Sleeping for 5s...\n");
+                sleep(5);
+        }
+
         if (path_equal(arg_directory, "/")) {
                 log_error("Spawning container on root directory not supported.");
                 goto finish;
@@ -1306,22 +1304,6 @@ int main(int argc, char *argv[]) {
         fdset_close_others(fds);
         log_open();
 
-        k = cg_get_machine_path(arg_machine, &newcg);
-        if (k < 0) {
-                log_error("Failed to determine machine cgroup path: %s", strerror(-k));
-                goto finish;
-        }
-
-        k = cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
-        if (k <= 0 && k != -ENOENT) {
-                log_error("Container already running.");
-
-                free(newcg);
-                newcg = NULL;
-
-                goto finish;
-        }
-
         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
         if (master < 0) {
                 log_error("Failed to acquire pseudo tty: %m");
@@ -1465,11 +1447,12 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
-                        if (setup_cgroup(newcg) < 0)
-                                goto child_fail;
-
                         close_pipe(pipefd2);
 
+                        r = register_machine();
+                        if (r < 0)
+                                goto finish;
+
                         /* Mark everything as slave, so that we still
                          * receive mounts from the real root, but don't
                          * propagate mounts to the real root. */
@@ -1620,8 +1603,8 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
-                        if (arg_uuid) {
-                                if (asprintf((char**)(envp + n_env++), "container_uuid=%s", arg_uuid) < 0) {
+                        if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
+                                if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
                                         log_oom();
                                         goto child_fail;
                                 }
@@ -1635,7 +1618,7 @@ int main(int argc, char *argv[]) {
                                 }
 
                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
-                                    (asprintf((char **)(envp + n_env++), "LISTEN_PID=%lu", (unsigned long) 1) < 0)) {
+                                    (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
                                         log_oom();
                                         goto child_fail;
                                 }
@@ -1683,8 +1666,6 @@ int main(int argc, char *argv[]) {
                 fd_wait_for_event(pipefd2[0], POLLHUP, -1);
                 close_nointr_nofail(pipefd2[0]);
 
-                save_attributes(newcg, pid, arg_uuid, arg_directory);
-
                 fdset_free(fds);
                 fds = NULL;
 
@@ -1737,14 +1718,11 @@ finish:
 
         close_pipe(kmsg_socket_pair);
 
-        if (newcg)
-                cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
+        if (pid > 0)
+                kill(pid, SIGKILL);
 
         free(arg_directory);
         free(arg_machine);
-        strv_free(arg_controllers);
-
-        fdset_free(fds);
 
         return r;
 }