#include "bus-socket.h"
#include "bus-container.h"
-int bus_container_connect(sd_bus *b) {
- _cleanup_free_ char *p = NULL, *s = NULL, *ns = NULL, *root = NULL, *class = NULL;
- _cleanup_close_ int nsfd = -1, rootfd = -1;
- siginfo_t si;
+int bus_container_connect_socket(sd_bus *b) {
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
pid_t leader, child;
+ siginfo_t si;
int r;
assert(b);
assert(b->input_fd < 0);
assert(b->output_fd < 0);
- p = strappend("/run/systemd/machines/", b->machine);
- if (!p)
- return -ENOMEM;
-
- r = parse_env_file(p, NEWLINE, "LEADER", &s, "CLASS", &class, NULL);
+ r = container_get_leader(b->machine, &leader);
if (r < 0)
return r;
- if (!s)
- return -EIO;
-
- if (!streq_ptr(class, "container"))
- return -EIO;
- r = parse_pid(s, &leader);
+ r = namespace_open(leader, &pidnsfd, &mntnsfd, &rootfd);
if (r < 0)
return r;
- if (leader <= 1)
- return -EIO;
-
- r = asprintf(&ns, "/proc/%lu/ns/mnt", (unsigned long) leader);
- if (r < 0)
- return -ENOMEM;
-
- nsfd = open(ns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
- if (nsfd < 0)
- return -errno;
-
- r = asprintf(&root, "/proc/%lu/root", (unsigned long) leader);
- if (r < 0)
- return -ENOMEM;
-
- rootfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC);
- if (rootfd < 0)
- return -errno;
b->input_fd = socket(b->sockaddr.sa.sa_family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
if (b->input_fd < 0)
return -errno;
if (child == 0) {
- r = setns(nsfd, CLONE_NEWNS);
+ pid_t grandchild;
+
+ r = namespace_enter(pidnsfd, mntnsfd, rootfd);
if (r < 0)
_exit(255);
- if (fchdir(rootfd) < 0)
- _exit(255);
+ /* We just changed PID namespace, however it will only
+ * take effect on the children we now fork. Hence,
+ * let's fork another time, and connect from this
+ * grandchild, so that SO_PEERCRED of our connection
+ * comes from a process from within the container, and
+ * not outside of it */
- if (chroot(".") < 0)
+ grandchild = fork();
+ if (grandchild < 0)
_exit(255);
+ if (grandchild == 0) {
- r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
- if (r < 0) {
- if (errno == EINPROGRESS)
- _exit(1);
+ r = connect(b->input_fd, &b->sockaddr.sa, b->sockaddr_size);
+ if (r < 0) {
+ if (errno == EINPROGRESS)
+ _exit(1);
- _exit(255);
+ _exit(255);
+ }
+
+ _exit(EXIT_SUCCESS);
}
- _exit(0);
+ r = wait_for_terminate(grandchild, &si);
+ if (r < 0)
+ _exit(255);
+
+ if (si.si_code != CLD_EXITED)
+ _exit(255);
+
+ _exit(si.si_status);
}
r = wait_for_terminate(child, &si);
if (si.si_status == 1)
return 1;
- if (si.si_status != 0)
+ if (si.si_status != EXIT_SUCCESS)
return -EIO;
return bus_socket_start_auth(b);
}
+
+int bus_container_connect_kernel(sd_bus *b) {
+ _cleanup_close_pipe_ int pair[2] = { -1, -1 };
+ _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, rootfd = -1;
+ union {
+ struct cmsghdr cmsghdr;
+ uint8_t buf[CMSG_SPACE(sizeof(int))];
+ } control = {};
+ struct msghdr mh = {
+ .msg_control = &control,
+ .msg_controllen = sizeof(control),
+ };
+ struct cmsghdr *cmsg;
+ pid_t leader, child;
+ siginfo_t si;
+ int r;
+ _cleanup_close_ int fd = -1;
+
+ assert(b);
+ assert(b->input_fd < 0);
+ assert(b->output_fd < 0);
+
+ r = container_get_leader(b->machine, &leader);
+ if (r < 0)
+ return r;
+
+ r = namespace_open(leader, &pidnsfd, &mntnsfd, &rootfd);
+ if (r < 0)
+ return r;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, pair) < 0)
+ return -errno;
+
+ child = fork();
+ if (child < 0)
+ return -errno;
+
+ if (child == 0) {
+ pid_t grandchild;
+
+ close_nointr_nofail(pair[0]);
+ pair[0] = -1;
+
+ r = namespace_enter(pidnsfd, mntnsfd, rootfd);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ /* We just changed PID namespace, however it will only
+ * take effect on the children we now fork. Hence,
+ * let's fork another time, and connect from this
+ * grandchild, so that kdbus only sees the credentials
+ * of this process which comes from within the
+ * container, and not outside of it */
+
+ grandchild = fork();
+ if (grandchild < 0)
+ _exit(EXIT_FAILURE);
+
+ if (grandchild == 0) {
+
+ fd = open(b->kernel, O_RDWR|O_NOCTTY|O_CLOEXEC);
+ if (fd < 0)
+ _exit(EXIT_FAILURE);
+
+ cmsg = CMSG_FIRSTHDR(&mh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
+
+ mh.msg_controllen = cmsg->cmsg_len;
+
+ if (sendmsg(pair[1], &mh, MSG_NOSIGNAL) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate(grandchild, &si);
+ if (r < 0)
+ _exit(EXIT_FAILURE);
+
+ if (si.si_code != CLD_EXITED)
+ _exit(EXIT_FAILURE);
+
+ _exit(si.si_status);
+ }
+
+ close_nointr_nofail(pair[1]);
+ pair[1] = -1;
+
+ if (recvmsg(pair[0], &mh, MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) < 0)
+ return -errno;
+
+ for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg))
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
+ int *fds;
+ unsigned n_fds;
+
+ fds = (int*) CMSG_DATA(cmsg);
+ n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+
+ if (n_fds != 1) {
+ close_many(fds, n_fds);
+ return -EIO;
+ }
+
+ fd = fds[0];
+ }
+
+ r = wait_for_terminate(child, &si);
+ if (r < 0)
+ return r;
+
+ if (si.si_code != CLD_EXITED)
+ return -EIO;
+
+ if (si.si_status != EXIT_SUCCESS)
+ return -EIO;
+
+ b->input_fd = b->output_fd = fd;
+ fd = -1;
+
+ return bus_kernel_take_fd(b);
+}