#include <linux/veth.h>
#include <sys/personality.h>
#include <linux/loop.h>
+#include <poll.h>
+#include <sys/file.h>
#ifdef HAVE_SELINUX
#include <selinux/selinux.h>
" and container and add it to an existing bridge on\n"
" the host\n"
" -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
- " Expose a container IP port ont the host\n"
+ " Expose a container IP port on the host\n"
" -Z --selinux-context=SECLABEL\n"
" Set the SELinux security context to be used by\n"
" processes in the container\n"
return r;
if (symlink(combined, target) < 0)
- return log_error_errno(errno, "Failed to create symlink for combined hiearchy: %m");
+ return log_error_errno(errno, "Failed to create symlink for combined hierarchy: %m");
}
}
cmsg = CMSG_FIRSTHDR(&mh);
assert(cmsg->cmsg_level == SOL_SOCKET);
assert(cmsg->cmsg_type == SCM_RIGHTS);
- assert(cmsg->cmsg_len = CMSG_LEN(sizeof(int)));
+ assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
memcpy(&fd, CMSG_DATA(cmsg), sizeof(int));
r = sd_rtnl_open_fd(&rtnl, fd, 1, RTNLGRP_IPV4_IFADDR);
return r;
}
+static int wait_for_block_device(struct udev *udev, dev_t devnum, struct udev_device **ret) {
+ _cleanup_udev_monitor_unref_ struct udev_monitor *monitor = NULL;
+ int r;
+
+ assert(udev);
+ assert(ret);
+
+ for (;;) {
+ _cleanup_udev_device_unref_ struct udev_device *d = NULL;
+ struct pollfd pfd = {
+ .events = POLLIN
+ };
+
+ d = udev_device_new_from_devnum(udev, 'b', devnum);
+ if (!d)
+ return log_oom();
+
+ r = udev_device_get_is_initialized(d);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check if device is initialized: %m");
+ if (r > 0) {
+ *ret = d;
+ d = NULL;
+ return 0;
+ }
+ d = udev_device_unref(d);
+
+ if (!monitor) {
+ monitor = udev_monitor_new_from_netlink(udev, "udev");
+ if (!monitor)
+ return log_oom();
+
+ r = udev_monitor_filter_add_match_subsystem_devtype(monitor, "block", NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add block match: %m");
+
+ r = udev_monitor_enable_receiving(monitor);
+ if (r < 0)
+ return log_error_errno(r, "Failed to turn on monitor: %m");
+
+ continue;
+ }
+
+ pfd.fd = udev_monitor_get_fd(monitor);
+ if (pfd.fd < 0)
+ return log_error_errno(r, "Failed to get udev monitor fd: %m");
+
+ r = poll(&pfd, 1, -1);
+ if (r < 0)
+ return log_error_errno(errno, "Failed to wait for device initialization: %m");
+
+ d = udev_monitor_receive_device(monitor);
+ }
+
+ return 0;
+}
+
+#define PARTITION_TABLE_BLURB \
+ "Note that the disk image needs to either contain only a single MBR partition of\n" \
+ "type 0x83 that is marked bootable, or follow\n" \
+ " http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \
+ "to be bootable with systemd-nspawn."
+
static int dissect_image(
int fd,
char **root_device, bool *root_device_rw,
blkid_partlist pl;
struct stat st;
int r;
+ bool is_gpt, is_mbr;
assert(fd >= 0);
assert(root_device);
errno = 0;
r = blkid_do_safeprobe(b);
if (r == -2 || r == 1) {
- log_error("Failed to identify any partition table on %s.\n"
- "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
+ log_error("Failed to identify any partition table on\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
return -EINVAL;
} else if (r != 0) {
if (errno == 0)
}
blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
- if (!streq_ptr(pttype, "gpt")) {
- log_error("Image %s does not carry a GUID Partition Table.\n"
- "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
+
+ is_gpt = streq_ptr(pttype, "gpt");
+ is_mbr = streq_ptr(pttype, "dos");
+
+ if (!is_gpt && !is_mbr) {
+ log_error("No GPT or MBR partition table discovered on\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
return -EINVAL;
}
if (fstat(fd, &st) < 0)
return log_error_errno(errno, "Failed to stat block device: %m");
- d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
- if (!d)
- return log_oom();
+ r = wait_for_block_device(udev, st.st_rdev, &d);
+ if (r < 0)
+ return r;
e = udev_enumerate_new(udev);
if (!e)
first = udev_enumerate_get_list_entry(e);
udev_list_entry_foreach(item, first) {
_cleanup_udev_device_unref_ struct udev_device *q;
- const char *stype, *node;
+ const char *node;
unsigned long long flags;
- sd_id128_t type_id;
blkid_partition pp;
dev_t qn;
int nr;
continue;
flags = blkid_partition_get_flags(pp);
- if (flags & GPT_FLAG_NO_AUTO)
+ if (is_gpt && (flags & GPT_FLAG_NO_AUTO))
+ continue;
+ if (is_mbr && (flags != 0x80)) /* Bootable flag */
continue;
nr = blkid_partition_get_partno(pp);
if (nr < 0)
continue;
- stype = blkid_partition_get_type_string(pp);
- if (!stype)
- continue;
+ if (is_gpt) {
+ sd_id128_t type_id;
+ const char *stype;
- if (sd_id128_from_string(stype, &type_id) < 0)
- continue;
-
- if (sd_id128_equal(type_id, GPT_HOME)) {
+ stype = blkid_partition_get_type_string(pp);
+ if (!stype)
+ continue;
- if (home && nr >= home_nr)
+ if (sd_id128_from_string(stype, &type_id) < 0)
continue;
- home_nr = nr;
- home_rw = !(flags & GPT_FLAG_READ_ONLY);
+ if (sd_id128_equal(type_id, GPT_HOME)) {
- free(home);
- home = strdup(node);
- if (!home)
- return log_oom();
- } else if (sd_id128_equal(type_id, GPT_SRV)) {
+ if (home && nr >= home_nr)
+ continue;
- if (srv && nr >= srv_nr)
- continue;
+ home_nr = nr;
+ home_rw = !(flags & GPT_FLAG_READ_ONLY);
+
+ r = free_and_strdup(&home, node);
+ if (r < 0)
+ return log_oom();
- srv_nr = nr;
- srv_rw = !(flags & GPT_FLAG_READ_ONLY);
+ } else if (sd_id128_equal(type_id, GPT_SRV)) {
- free(srv);
- srv = strdup(node);
- if (!srv)
- return log_oom();
- }
+ if (srv && nr >= srv_nr)
+ continue;
+
+ srv_nr = nr;
+ srv_rw = !(flags & GPT_FLAG_READ_ONLY);
+
+ r = free_and_strdup(&srv, node);
+ if (r < 0)
+ return log_oom();
+ }
#ifdef GPT_ROOT_NATIVE
- else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
+ else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
- if (root && nr >= root_nr)
- continue;
+ if (root && nr >= root_nr)
+ continue;
- root_nr = nr;
- root_rw = !(flags & GPT_FLAG_READ_ONLY);
+ root_nr = nr;
+ root_rw = !(flags & GPT_FLAG_READ_ONLY);
- free(root);
- root = strdup(node);
- if (!root)
- return log_oom();
- }
+ r = free_and_strdup(&root, node);
+ if (r < 0)
+ return log_oom();
+ }
#endif
#ifdef GPT_ROOT_SECONDARY
- else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
+ else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
- if (secondary_root && nr >= secondary_root_nr)
+ if (secondary_root && nr >= secondary_root_nr)
+ continue;
+
+ secondary_root_nr = nr;
+ secondary_root_rw = !(flags & GPT_FLAG_READ_ONLY);
+
+ r = free_and_strdup(&secondary_root, node);
+ if (r < 0)
+ return log_oom();
+ }
+#endif
+
+ } else if (is_mbr) {
+ int type;
+
+ type = blkid_partition_get_type(pp);
+ if (type != 0x83) /* Linux partition */
continue;
- secondary_root_nr = nr;
- secondary_root_rw = !(flags & GPT_FLAG_READ_ONLY);
+ /* Note that there's a certain, intended
+ * asymmetry here: while for GPT we simply
+ * take the first valid partition and ignore
+ * all others of the same type, for MBR we
+ * fail if there are multiple suitable
+ * partitions. This is because the GPT
+ * partition types are defined by us, and
+ * hence we can define their lookup semantics,
+ * while for the MBR logic we reuse existing
+ * definitions, and simply don't want to make
+ * out the situation. */
+
+ if (root) {
+ log_error("Identified multiple bootable Linux 0x83 partitions on\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
+ return -EINVAL;
+ }
+ root_nr = nr;
- free(secondary_root);
- secondary_root = strdup(node);
- if (!secondary_root)
+ r = free_and_strdup(&root, node);
+ if (r < 0)
return log_oom();
}
-#endif
}
if (!root && !secondary_root) {
- log_error("Failed to identify root partition in disk image %s.\n"
- "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
+ log_error("Failed to identify root partition in disk image\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
return -EINVAL;
}
if (image_fd && *image_fd >= 0) {
r = ioctl(*image_fd, LOOP_CLR_FD);
if (r < 0)
- log_warning_errno(errno, "Failed to close loop image: %m");
+ log_debug_errno(errno, "Failed to close loop image: %m");
*image_fd = safe_close(*image_fd);
}
r = ioctl(control, LOOP_CTL_REMOVE, nr);
if (r < 0)
- log_warning_errno(errno, "Failed to remove loop %d: %m", nr);
+ log_debug_errno(errno, "Failed to remove loop %d: %m", nr);
}
static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
return -ENOENT;
}
- if (i->type == IMAGE_GPT)
+ if (i->type == IMAGE_RAW)
r = set_sanitized_path(&arg_image, i->path);
else
r = set_sanitized_path(&arg_directory, i->path);
pid_t pid = 0;
int ret = EXIT_SUCCESS;
union in_addr_union exposed = {};
+ _cleanup_release_lock_file_ LockFile tree_global_lock = LOCK_FILE_INIT, tree_local_lock = LOCK_FILE_INIT;
log_parse_environment();
log_open();
goto finish;
}
- if (arg_template) {
- r = btrfs_subvol_snapshot(arg_template, arg_directory, arg_read_only, true);
- if (r == -EEXIST) {
- if (!arg_quiet)
- log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
- } else if (r < 0) {
- log_error_errno(r, "Couldn't create snapshort %s from %s: %m", arg_directory, arg_template);
- goto finish;
- } else {
- if (!arg_quiet)
- log_info("Populated %s from template %s.", arg_directory, arg_template);
- }
-
- } else if (arg_ephemeral) {
+ if (arg_ephemeral) {
+ _cleanup_release_lock_file_ LockFile original_lock = LOCK_FILE_INIT;
char *np;
/* If the specified path is a mount point we
goto finish;
}
+ r = image_path_lock(np, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", np);
+ goto finish;
+ }
+
r = btrfs_subvol_snapshot(arg_directory, np, arg_read_only, true);
if (r < 0) {
free(np);
arg_directory = np;
remove_subvol = true;
+
+ } else {
+ r = image_path_lock(arg_directory, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ log_error_errno(r, "Directory tree %s is currently busy.", arg_directory);
+ goto finish;
+ }
+ if (r < 0) {
+ log_error_errno(r, "Failed to lock %s: %m", arg_directory);
+ return r;
+ }
+
+ if (arg_template) {
+ r = btrfs_subvol_snapshot(arg_template, arg_directory, arg_read_only, true);
+ if (r == -EEXIST) {
+ if (!arg_quiet)
+ log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template);
+ } else if (r < 0) {
+ log_error_errno(r, "Couldn't create snapshot %s from %s: %m", arg_directory, arg_template);
+ goto finish;
+ } else {
+ if (!arg_quiet)
+ log_info("Populated %s from template %s.", arg_directory, arg_template);
+ }
+ }
}
if (arg_boot) {
assert(arg_image);
assert(!arg_template);
+ r = image_path_lock(arg_image, (arg_read_only ? LOCK_SH : LOCK_EX) | LOCK_NB, &tree_global_lock, &tree_local_lock);
+ if (r == -EBUSY) {
+ r = log_error_errno(r, "Disk image %s is currently busy.", arg_image);
+ goto finish;
+ }
+ if (r < 0) {
+ r = log_error_errno(r, "Failed to create image lock: %m");
+ goto finish;
+ }
+
if (!mkdtemp(template)) {
log_error_errno(errno, "Failed to create temporary directory: %m");
r = -errno;
_cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
char last_char = 0;
- sd_notify(false,
- "READY=1\n"
- "STATUS=Container running.");
+ sd_notifyf(false,
+ "READY=1\n"
+ "STATUS=Container running.\n"
+ "X_NSPAWN_LEADER_PID=" PID_FMT, pid);
r = sd_event_new(&event);
if (r < 0) {
if (arg_machine) {
const char *p;
- p = strappenda("/run/systemd/nspawn/propagate", arg_machine);
+ p = strappenda("/run/systemd/nspawn/propagate/", arg_machine);
(void) rm_rf(p, false, true, false);
}