+static int machine_get_leader(sd_bus *bus, const char *name, pid_t *ret) {
+ _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_bus_message_unref_ sd_bus_message *reply = NULL, *reply2 = NULL;
+ const char *object;
+ uint32_t leader;
+ int r;
+
+ assert(bus);
+ assert(name);
+ assert(ret);
+
+ r = sd_bus_call_method(
+ bus,
+ "org.freedesktop.machine1",
+ "/org/freedesktop/machine1",
+ "org.freedesktop.machine1.Manager",
+ "GetMachine",
+ &error,
+ &reply,
+ "s", name);
+ if (r < 0) {
+ log_error("Could not get path to machine: %s", bus_error_message(&error, -r));
+ return r;
+ }
+
+ r = sd_bus_message_read(reply, "o", &object);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ r = sd_bus_get_property(
+ bus,
+ "org.freedesktop.machine1",
+ object,
+ "org.freedesktop.machine1.Machine",
+ "Leader",
+ &error,
+ &reply2,
+ "u");
+ if (r < 0)
+ return log_error_errno(r, "Failed to retrieve PID of leader: %m");
+
+ r = sd_bus_message_read(reply2, "u", &leader);
+ if (r < 0)
+ return bus_log_parse_error(r);
+
+ *ret = leader;
+ return 0;
+}
+
+static int bind_mount(sd_bus *bus, char **args, unsigned n) {
+ char mount_slave[] = "/tmp/propagate.XXXXXX", *mount_tmp, *mount_outside, *p;
+ pid_t child, leader;
+ const char *dest;
+ siginfo_t si;
+ bool mount_slave_created = false, mount_slave_mounted = false,
+ mount_tmp_created = false, mount_tmp_mounted = false,
+ mount_outside_created = false, mount_outside_mounted = false;
+ int r;
+
+ /* One day, when bind mounting /proc/self/fd/n works across
+ * namespace boundaries we should rework this logic to make
+ * use of it... */
+
+ if (n > 4) {
+ log_error("Too many arguments.");
+ return -EINVAL;
+ }
+
+ dest = args[3] ?: args[2];
+ if (!path_is_absolute(dest)) {
+ log_error("Destination path not absolute.");
+ return -EINVAL;
+ }
+
+ p = strappenda("/run/systemd/nspawn/propagate/", args[1], "/");
+ if (access(p, F_OK) < 0) {
+ log_error("Container does not allow propagation of mount points.");
+ return -ENOTSUP;
+ }
+
+ r = machine_get_leader(bus, args[1], &leader);
+ if (r < 0)
+ return r;
+
+ /* Our goal is to install a new bind mount into the container,
+ possibly read-only. This is irritatingly complex
+ unfortunately, currently.
+
+ First, we start by creating a private playground in /tmp,
+ that we can mount MS_SLAVE. (Which is necessary, since
+ MS_MOUNT cannot be applied to mounts with MS_SHARED parent
+ mounts.) */
+
+ if (!mkdtemp(mount_slave))
+ return log_error_errno(errno, "Failed to create playground: %m");
+
+ mount_slave_created = true;
+
+ if (mount(mount_slave, mount_slave, NULL, MS_BIND, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to make bind mount: %m");
+ goto finish;
+ }
+
+ mount_slave_mounted = true;
+
+ if (mount(NULL, mount_slave, NULL, MS_SLAVE, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to remount slave: %m");
+ goto finish;
+ }
+
+ /* Second, we mount the source directory to a directory inside
+ of our MS_SLAVE playground. */
+ mount_tmp = strappenda(mount_slave, "/mount");
+ if (mkdir(mount_tmp, 0700) < 0) {
+ r = log_error_errno(errno, "Failed to create temporary mount: %m");
+ goto finish;
+ }
+
+ mount_tmp_created = true;
+
+ if (mount(args[2], mount_tmp, NULL, MS_BIND, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to overmount: %m");
+ goto finish;
+ }
+
+ mount_tmp_mounted = true;
+
+ /* Third, we remount the new bind mount read-only if requested. */
+ if (arg_read_only)
+ if (mount(NULL, mount_tmp, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to mark read-only: %m");
+ goto finish;
+ }
+
+ /* Fourth, we move the new bind mount into the propagation
+ * directory. This way it will appear there read-only
+ * right-away. */
+
+ mount_outside = strappenda("/run/systemd/nspawn/propagate/", args[1], "/XXXXXX");
+ if (!mkdtemp(mount_outside)) {
+ r = log_error_errno(errno, "Cannot create propagation directory: %m");
+ goto finish;
+ }
+
+ mount_outside_created = true;
+
+ if (mount(mount_tmp, mount_outside, NULL, MS_MOVE, NULL) < 0) {
+ r = log_error_errno(errno, "Failed to move: %m");
+ goto finish;
+ }
+
+ mount_outside_mounted = true;
+ mount_tmp_mounted = false;
+
+ (void) rmdir(mount_tmp);
+ mount_tmp_created = false;
+
+ (void) umount(mount_slave);
+ mount_slave_mounted = false;
+
+ (void) rmdir(mount_slave);
+ mount_slave_created = false;
+
+ child = fork();
+ if (child < 0) {
+ r = log_error_errno(errno, "Failed to fork(): %m");
+ goto finish;
+ }
+
+ if (child == 0) {
+ const char *mount_inside;
+ int mntfd;
+ const char *q;
+
+ q = procfs_file_alloca(leader, "ns/mnt");
+ mntfd = open(q, O_RDONLY|O_NOCTTY|O_CLOEXEC);
+ if (mntfd < 0) {
+ log_error_errno(errno, "Failed to open mount namespace of leader: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (setns(mntfd, CLONE_NEWNS) < 0) {
+ log_error_errno(errno, "Failed to join namespace of leader: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (arg_mkdir)
+ mkdir_p(dest, 0755);
+
+ /* Fifth, move the mount to the right place inside */
+ mount_inside = strappenda("/run/systemd/nspawn/incoming/", basename(mount_outside));
+ if (mount(mount_inside, dest, NULL, MS_MOVE, NULL) < 0) {
+ log_error_errno(errno, "Failed to mount: %m");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ r = wait_for_terminate(child, &si);
+ if (r < 0) {
+ log_error_errno(r, "Failed to wait for client: %m");
+ goto finish;
+ }
+ if (si.si_code != CLD_EXITED) {
+ log_error("Client died abnormally.");
+ r = -EIO;
+ goto finish;
+ }
+ if (si.si_status != EXIT_SUCCESS) {
+ r = -EIO;
+ goto finish;
+ }
+
+ r = 0;
+
+finish:
+ if (mount_outside_mounted)
+ umount(mount_outside);
+ if (mount_outside_created)
+ rmdir(mount_outside);
+
+ if (mount_tmp_mounted)
+ umount(mount_tmp);
+ if (mount_tmp_created)
+ umount(mount_tmp);
+
+ if (mount_slave_mounted)
+ umount(mount_slave);
+ if (mount_slave_created)
+ umount(mount_slave);
+
+ return r;
+}
+