chiark / gitweb /
bus: when connecting to a container's kdbus instance, enter namespace first
authorLennart Poettering <lennart@poettering.net>
Fri, 13 Dec 2013 21:02:47 +0000 (22:02 +0100)
committerLennart Poettering <lennart@poettering.net>
Sat, 14 Dec 2013 04:10:25 +0000 (05:10 +0100)
Previously we'd open the connection in the originating namespace, which
meant most peers of the bus would not be able to make sense of the
PID/UID/... identity of us since we didn't exist in the namespace they
run in. However they require this identity for privilege decisions,
hence disallowing access to anything from the host.

Instead, when connecting to a container, create a temporary subprocess,
make it join the container's namespace and then connect from there to
the kdbus instance. This is similar to how we do it for socket
conections already.

THis also unifies the namespacing code used by machinectl and the bus
APIs.

src/core/load-fragment.c
src/libsystemd-bus/bus-container.c
src/libsystemd-bus/bus-container.h
src/libsystemd-bus/sd-bus.c
src/machine/machinectl.c
src/shared/util.c
src/shared/util.h

index 99cf20dcce86bf1a667df218a57556465e21fdef..183c43d58fff73176960ae4f7f3f0199a9ea83f3 100644 (file)
@@ -2680,31 +2680,28 @@ int unit_load_fragment(Unit *u) {
 
         /* Look for a template */
         if (u->load_state == UNIT_STUB && u->instance) {
-                char *k;
+                _cleanup_free_ char *k;
 
                 k = unit_name_template(u->id);
                 if (!k)
                         return -ENOMEM;
 
                 r = load_from_path(u, k);
-                free(k);
-
                 if (r < 0)
                         return r;
 
                 if (u->load_state == UNIT_STUB)
                         SET_FOREACH(t, u->names, i) {
+                                _cleanup_free_ char *z = NULL;
 
                                 if (t == u->id)
                                         continue;
 
-                                k = unit_name_template(t);
-                                if (!k)
+                                z = unit_name_template(t);
+                                if (!z)
                                         return -ENOMEM;
 
-                                r = load_from_path(u, k);
-                                free(k);
-
+                                r = load_from_path(u, z);
                                 if (r < 0)
                                         return r;
 
index 33478c02decdc925242eb8d476d7ae05b1d15d1e..5d31f5afa72466213db094562fb14c08dafd4258 100644 (file)
 #include "bus-socket.h"
 #include "bus-container.h"
 
-int bus_container_connect(sd_bus *b) {
-        _cleanup_free_ char *s = NULL, *ns = NULL, *root = NULL, *class = NULL;
+int bus_container_connect_socket(sd_bus *b) {
         _cleanup_close_ int nsfd = -1, rootfd = -1;
-        char *p;
-        siginfo_t si;
         pid_t leader, child;
+        siginfo_t si;
         int r;
 
         assert(b);
         assert(b->input_fd < 0);
         assert(b->output_fd < 0);
 
-        p = strappenda("/run/systemd/machines/", b->machine);
-        r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
-        if (r == -ENOENT)
-                return -EHOSTDOWN;
+        r = container_get_leader(b->machine, &leader);
         if (r < 0)
                 return r;
-        if (!s)
-                return -EIO;
-
-        if (!streq_ptr(class, "container"))
-                return -EIO;
 
-        r = parse_pid(s, &leader);
+        r = namespace_open(leader, &nsfd, &rootfd);
         if (r < 0)
                 return r;
-        if (leader <= 1)
-                return -EIO;
-
-        r = asprintf(&ns, "/proc/%lu/ns/mnt", (unsigned long) leader);
-        if (r < 0)
-                return -ENOMEM;
-
-        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-        if (nsfd < 0)
-                return -errno;
-
-        r = asprintf(&root, "/proc/%lu/root", (unsigned long) leader);
-        if (r < 0)
-                return -ENOMEM;
-
-        rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
-        if (rootfd < 0)
-                return -errno;
 
         b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
         if (b->input_fd < 0)
@@ -89,14 +61,9 @@ int bus_container_connect(sd_bus *b) {
                 return -errno;
 
         if (child == 0) {
-                r = setns(nsfd, CLONE_NEWNS);
-                if (r < 0)
-                        _exit(255);
-
-                if (fchdir(rootfd) < 0)
-                        _exit(255);
 
-                if (chroot(".") < 0)
+                r = namespace_enter(nsfd, rootfd);
+                if (r < 0)
                         _exit(255);
 
                 r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
@@ -107,7 +74,7 @@ int bus_container_connect(sd_bus *b) {
                         _exit(255);
                 }
 
-                _exit(0);
+                _exit(EXIT_SUCCESS);
         }
 
         r = wait_for_terminate(child, &si);
@@ -120,8 +87,108 @@ int bus_container_connect(sd_bus *b) {
         if (si.si_status == 1)
                 return 1;
 
-        if (si.si_status != 0)
+        if (si.si_status != EXIT_SUCCESS)
                 return -EIO;
 
         return bus_socket_start_auth(b);
 }
+
+int bus_container_connect_kernel(sd_bus *b) {
+        _cleanup_close_pipe_ int pair[2] = { -1, -1 };
+        _cleanup_close_ int nsfd = -1, rootfd = -1;
+        union {
+                struct cmsghdr cmsghdr;
+                uint8_t buf[CMSG_SPACE(sizeof(int))];
+        } control = {};
+        struct msghdr mh = {
+                .msg_control = &control,
+                .msg_controllen = sizeof(control),
+        };
+        struct cmsghdr *cmsg;
+        pid_t leader, child;
+        siginfo_t si;
+        int r;
+        _cleanup_close_ int fd = -1;
+
+        assert(b);
+        assert(b->input_fd < 0);
+        assert(b->output_fd < 0);
+
+        r = container_get_leader(b->machine, &leader);
+        if (r < 0)
+                return r;
+
+        r = namespace_open(leader, &nsfd, &rootfd);
+        if (r < 0)
+                return r;
+
+        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+                return -errno;
+
+        child = fork();
+        if (child < 0)
+                return -errno;
+
+        if (child == 0) {
+                close_nointr_nofail(pair[0]);
+                pair[0] = -1;
+
+                r = namespace_enter(nsfd, rootfd);
+                if (r < 0)
+                        _exit(EXIT_FAILURE);
+
+                fd = open(b->kernel, O_RDWR|O_NOCTTY|O_CLOEXEC);
+                if (fd < 0)
+                        _exit(EXIT_FAILURE);
+
+                cmsg = CMSG_FIRSTHDR(&mh);
+                cmsg->cmsg_level = SOL_SOCKET;
+                cmsg->cmsg_type = SCM_RIGHTS;
+                cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+                memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+                mh.msg_controllen = cmsg->cmsg_len;
+
+                if (sendmsg(pair[1], &mh, MSG_NOSIGNAL) < 0)
+                        _exit(EXIT_FAILURE);
+
+                _exit(EXIT_SUCCESS);
+        }
+
+        close_nointr_nofail(pair[1]);
+        pair[1] = -1;
+
+        if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
+                return -errno;
+
+        for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg))
+                if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+                        int *fds;
+                        unsigned n_fds;
+
+                        fds = (int*) CMSG_DATA(cmsg);
+                        n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+                        if (n_fds != 1) {
+                                close_many(fds, n_fds);
+                                return -EIO;
+                        }
+
+                        fd = fds[0];
+                }
+
+        r = wait_for_terminate(child, &si);
+        if (r < 0)
+                return r;
+
+        if (si.si_code != CLD_EXITED)
+                return -EIO;
+
+        if (si.si_status != EXIT_SUCCESS)
+                return -EIO;
+
+        b->input_fd = b->output_fd = fd;
+        fd = -1;
+
+        return bus_kernel_take_fd(b);
+}
index 65f43ab4fd01b2003e7aa8d898ce7cab7abca47a..c6f757a99bcc431a0f1d14409d70fb8d06406e09 100644 (file)
@@ -23,4 +23,5 @@
 
 #include "sd-bus.h"
 
-int bus_container_connect(sd_bus *b);
+int bus_container_connect_socket(sd_bus *b);
+int bus_container_connect_kernel(sd_bus *b);
index 932bf226c58509a9e07963873d22c4fcf94de66f..4eaceeff042fc4273ed9b4d6faf2a84bbeb556c0 100644 (file)
@@ -740,7 +740,7 @@ static int parse_kernel_address(sd_bus *b, const char **p, char **guid) {
         return 0;
 }
 
-static int parse_container_address(sd_bus *b, const char **p, char **guid) {
+static int parse_container_unix_address(sd_bus *b, const char **p, char **guid) {
         _cleanup_free_ char *machine = NULL;
         int r;
 
@@ -782,6 +782,49 @@ static int parse_container_address(sd_bus *b, const char **p, char **guid) {
         return 0;
 }
 
+static int parse_container_kernel_address(sd_bus *b, const char **p, char **guid) {
+        _cleanup_free_ char *machine = NULL;
+        int r;
+
+        assert(b);
+        assert(p);
+        assert(*p);
+        assert(guid);
+
+        while (**p != 0 && **p != ';') {
+                r = parse_address_key(p, "guid", guid);
+                if (r < 0)
+                        return r;
+                else if (r > 0)
+                        continue;
+
+                r = parse_address_key(p, "machine", &machine);
+                if (r < 0)
+                        return r;
+                else if (r > 0)
+                        continue;
+
+                skip_address_key(p);
+        }
+
+        if (!machine)
+                return -EINVAL;
+
+        if (!filename_is_safe(machine))
+                return -EINVAL;
+
+        free(b->machine);
+        b->machine = machine;
+        machine = NULL;
+
+        free(b->kernel);
+        b->kernel = strdup("/dev/kdbus/0-system/bus");
+        if (!b->kernel)
+                return -ENOMEM;
+
+        return 0;
+}
+
 static void bus_reset_parsed_address(sd_bus *b) {
         assert(b);
 
@@ -855,10 +898,18 @@ static int bus_parse_next_address(sd_bus *b) {
                                 return r;
 
                         break;
-                } else if (startswith(a, "x-container:")) {
+                } else if (startswith(a, "x-container-unix:")) {
+
+                        a += 17;
+                        r = parse_container_unix_address(b, &a, &guid);
+                        if (r < 0)
+                                return r;
+
+                        break;
+                } else if (startswith(a, "x-container-kernel:")) {
 
-                        a += 12;
-                        r = parse_container_address(b, &a, &guid);
+                        a += 19;
+                        r = parse_container_kernel_address(b, &a, &guid);
                         if (r < 0)
                                 return r;
 
@@ -892,10 +943,12 @@ static int bus_start_address(sd_bus *b) {
 
                 if (b->exec_path)
                         r = bus_socket_exec(b);
+                else if (b->machine && b->kernel)
+                        r = bus_container_connect_kernel(b);
+                else if (b->machine && b->sockaddr.sa.sa_family != AF_UNSPEC)
+                        r = bus_container_connect_socket(b);
                 else if (b->kernel)
                         r = bus_kernel_connect(b);
-                else if (b->machine)
-                        r = bus_container_connect(b);
                 else if (b->sockaddr.sa.sa_family != AF_UNSPEC)
                         r = bus_socket_connect(b);
                 else
@@ -1144,9 +1197,9 @@ _public_ int sd_bus_open_system_container(const char *machine, sd_bus **ret) {
                 return -ENOMEM;
 
 #ifdef ENABLE_KDBUS
-        p = strjoin("kernel:path=/dev/kdbus/ns/machine-", e, "/0-system/bus;x-container:machine=", e, NULL);
+        p = strjoin("x-container-kernel:machine=", e, ";x-container-unix:machine=", e, NULL);
 #else
-        p = strjoin("x-container:machine=", e, NULL);
+        p = strjoin("x-container-unix:machine=", e, NULL);
 #endif
         if (!p)
                 return -ENOMEM;
index 7bb7086056a051e6d2dadce399753bd77111133d..f5485b3d4209c4ba24b6ab264814a63b664f13ea 100644 (file)
@@ -398,8 +398,8 @@ static int terminate_machine(sd_bus *bus, char **args, unsigned n) {
 }
 
 static int openpt_in_namespace(pid_t pid, int flags) {
+        _cleanup_close_pipe_ int pair[2] = { -1, -1 };
         _cleanup_close_ int nsfd = -1, rootfd = -1;
-        _cleanup_close_pipe_ int sock[2] = { -1, -1 };
         union {
                 struct cmsghdr cmsghdr;
                 uint8_t buf[CMSG_SPACE(sizeof(int))];
@@ -410,23 +410,14 @@ static int openpt_in_namespace(pid_t pid, int flags) {
         };
         struct cmsghdr *cmsg;
         int master = -1, r;
-        char *ns, *root;
         pid_t child;
         siginfo_t si;
 
-        ns = procfs_file_alloca(pid, "ns/mnt");
-
-        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
-        if (nsfd < 0)
-                return -errno;
-
-        root = procfs_file_alloca(pid, "root");
-
-        rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
-        if (rootfd < 0)
-                return -errno;
+        r = namespace_open(pid, &nsfd, &rootfd);
+        if (r < 0)
+                return r;
 
-        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
+        if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
                 return -errno;
 
         child = fork();
@@ -434,19 +425,13 @@ static int openpt_in_namespace(pid_t pid, int flags) {
                 return -errno;
 
         if (child == 0) {
-                close_nointr_nofail(sock[0]);
-                sock[0] = -1;
+                close_nointr_nofail(pair[0]);
+                pair[0] = -1;
 
-                r = setns(nsfd, CLONE_NEWNS);
+                r = namespace_enter(nsfd, rootfd);
                 if (r < 0)
                         _exit(EXIT_FAILURE);
 
-                if (fchdir(rootfd) < 0)
-                        _exit(EXIT_FAILURE);
-
-                if (chroot(".") < 0)
-                        _exit(EXIT_FAILURE);
-
                 master = posix_openpt(flags);
                 if (master < 0)
                         _exit(EXIT_FAILURE);
@@ -459,18 +444,16 @@ static int openpt_in_namespace(pid_t pid, int flags) {
 
                 mh.msg_controllen = cmsg->cmsg_len;
 
-                r = sendmsg(sock[1], &mh, MSG_NOSIGNAL);
-                close_nointr_nofail(master);
-                if (r < 0)
+                if (sendmsg(pair[1], &mh, MSG_NOSIGNAL) < 0)
                         _exit(EXIT_FAILURE);
 
                 _exit(EXIT_SUCCESS);
         }
 
-        close_nointr_nofail(sock[1]);
-        sock[1] = -1;
+        close_nointr_nofail(pair[1]);
+        pair[1] = -1;
 
-        if (recvmsg(sock[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
+        if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
                 return -errno;
 
         for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg))
index 1c35edfbb19595027742df31e6fa4c0f4280267a..66276aa54e3925aa6c21225bd39f71a6616de4ec 100644 (file)
@@ -5951,3 +5951,76 @@ int proc_cmdline(char **ret) {
 
         return 1;
 }
+
+int container_get_leader(const char *machine, pid_t *pid) {
+        _cleanup_free_ char *s = NULL, *class = NULL;
+        const char *p;
+        pid_t leader;
+        int r;
+
+        assert(machine);
+        assert(pid);
+
+        p = strappenda("/run/systemd/machines/", machine);
+        r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
+        if (r == -ENOENT)
+                return -EHOSTDOWN;
+        if (r < 0)
+                return r;
+        if (!s)
+                return -EIO;
+
+        if (!streq_ptr(class, "container"))
+                return -EIO;
+
+        r = parse_pid(s, &leader);
+        if (r < 0)
+                return r;
+        if (leader <= 1)
+                return -EIO;
+
+        *pid = leader;
+        return 0;
+}
+
+int namespace_open(pid_t pid, int *namespace_fd, int *root_fd) {
+        _cleanup_close_ int nsfd = -1;
+        const char *ns, *root;
+        int rfd;
+
+        assert(pid >= 0);
+        assert(namespace_fd);
+        assert(root_fd);
+
+        ns = procfs_file_alloca(pid, "ns/mnt");
+        nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+        if (nsfd < 0)
+                return -errno;
+
+        root = procfs_file_alloca(pid, "root");
+        rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
+        if (rfd < 0)
+                return -errno;
+
+        *namespace_fd = nsfd;
+        *root_fd = rfd;
+        nsfd = -1;
+
+        return 0;
+}
+
+int namespace_enter(int namespace_fd, int root_fd) {
+        assert(namespace_fd >= 0);
+        assert(root_fd >= 0);
+
+        if (setns(namespace_fd, CLONE_NEWNS) < 0)
+                return -errno;
+
+        if (fchdir(root_fd) < 0)
+                return -errno;
+
+        if (chroot(".") < 0)
+                return -errno;
+
+        return 0;
+}
index 504f63a28fe02bc5711be3492c9a4054784b1455..d5fa81c6a5a7e230c8696dc3e46c878ffe8efee0 100644 (file)
@@ -789,3 +789,8 @@ static inline void qsort_safe(void *base, size_t nmemb, size_t size,
 }
 
 int proc_cmdline(char **ret);
+
+int container_get_leader(const char *machine, pid_t *pid);
+
+int namespace_open(pid_t pid, int *namespace_fd, int *root_fd);
+int namespace_enter(int namespace_fd, int root_fd);