#include <linux/veth.h>
#include <sys/personality.h>
#include <linux/loop.h>
+#include <poll.h>
+#include <sys/file.h>
#ifdef HAVE_SELINUX
#include <selinux/selinux.h>
static bool arg_keep_unit = false;
static char **arg_network_interfaces = NULL;
static char **arg_network_macvlan = NULL;
+static char **arg_network_ipvlan = NULL;
static bool arg_network_veth = false;
static const char *arg_network_bridge = NULL;
static unsigned long arg_personality = 0xffffffffLU;
" --network-macvlan=INTERFACE\n"
" Create a macvlan network interface based on an\n"
" existing network interface to the container\n"
+ " --network-ipvlan=INTERFACE\n"
+ " Create a ipvlan network interface based on an\n"
+ " existing network interface to the container\n"
" -n --network-veth Add a virtual ethernet connection between host\n"
" and container\n"
" --network-bridge=INTERFACE\n"
ARG_KEEP_UNIT,
ARG_NETWORK_INTERFACE,
ARG_NETWORK_MACVLAN,
+ ARG_NETWORK_IPVLAN,
ARG_NETWORK_BRIDGE,
ARG_PERSONALITY,
ARG_VOLATILE,
{ "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
{ "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
{ "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
+ { "network-ipvlan", required_argument, NULL, ARG_NETWORK_IPVLAN },
{ "network-veth", no_argument, NULL, 'n' },
{ "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
{ "personality", required_argument, NULL, ARG_PERSONALITY },
if (strv_extend(&arg_network_macvlan, optarg) < 0)
return log_oom();
+ arg_private_network = true;
+ break;
+
+ case ARG_NETWORK_IPVLAN:
+ if (strv_extend(&arg_network_ipvlan, optarg) < 0)
+ return log_oom();
+
/* fall through */
case ARG_PRIVATE_NETWORK:
return 0;
}
+static int setup_ipvlan(pid_t pid) {
+ _cleanup_udev_unref_ struct udev *udev = NULL;
+ _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
+ char **i;
+ int r;
+
+ if (!arg_private_network)
+ return 0;
+
+ if (strv_isempty(arg_network_ipvlan))
+ return 0;
+
+ r = sd_rtnl_open(&rtnl, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to connect to netlink: %m");
+
+ udev = udev_new();
+ if (!udev) {
+ log_error("Failed to connect to udev.");
+ return -ENOMEM;
+ }
+
+ STRV_FOREACH(i, arg_network_ipvlan) {
+ _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
+ _cleanup_free_ char *n = NULL;
+ int ifi;
+
+ ifi = parse_interface(udev, *i);
+ if (ifi < 0)
+ return ifi;
+
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
+ if (r < 0)
+ return log_error_errno(r, "Failed to allocate netlink message: %m");
+
+ r = sd_rtnl_message_append_u32(m, IFLA_LINK, ifi);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface index: %m");
+
+ n = strappend("iv-", *i);
+ if (!n)
+ return log_oom();
+
+ strshorten(n, IFNAMSIZ-1);
+
+ r = sd_rtnl_message_append_string(m, IFLA_IFNAME, n);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink interface name: %m");
+
+ r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add netlink namespace field: %m");
+
+ r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_rtnl_message_open_container_union(m, IFLA_INFO_DATA, "ipvlan");
+ if (r < 0)
+ return log_error_errno(r, "Failed to open netlink container: %m");
+
+ r = sd_rtnl_message_append_u16(m, IFLA_IPVLAN_MODE, IPVLAN_MODE_L2);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add ipvlan mode: %m");
+
+ r = sd_rtnl_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_rtnl_message_close_container(m);
+ if (r < 0)
+ return log_error_errno(r, "Failed to close netlink container: %m");
+
+ r = sd_rtnl_call(rtnl, m, 0, NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to add new ipvlan interfaces: %m");
+ }
+
+ return 0;
+}
+
static int setup_seccomp(void) {
#ifdef HAVE_SECCOMP
#define PARTITION_TABLE_BLURB \
"Note that the disk image needs to either contain only a single MBR partition of\n" \
- "type 0x83 that is marked bootable, or follow\n" \
+ "type 0x83 that is marked bootable, or a sinlge GPT partition of type" \
+ "0FC63DAF-8483-4772-8E79-3D69D8477DE4 or follow\n" \
" http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \
"to be bootable with systemd-nspawn."
#ifdef GPT_ROOT_SECONDARY
int secondary_root_nr = -1;
#endif
-
- _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL;
+ _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL, *generic = NULL;
_cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
_cleanup_udev_device_unref_ struct udev_device *d = NULL;
_cleanup_blkid_free_probe_ blkid_probe b = NULL;
_cleanup_udev_unref_ struct udev *udev = NULL;
struct udev_list_entry *first, *item;
- bool home_rw = true, root_rw = true, secondary_root_rw = true, srv_rw = true;
+ bool home_rw = true, root_rw = true, secondary_root_rw = true, srv_rw = true, generic_rw = true;
+ bool is_gpt, is_mbr, multiple_generic = false;
const char *pttype = NULL;
blkid_partlist pl;
struct stat st;
+ unsigned i;
int r;
- bool is_gpt, is_mbr;
assert(fd >= 0);
assert(root_device);
if (!d)
return log_oom();
- e = udev_enumerate_new(udev);
- if (!e)
- return log_oom();
+ for (i = 0;; i++) {
+ int n, m;
- r = udev_enumerate_add_match_parent(e, d);
- if (r < 0)
- return log_oom();
+ if (i >= 10) {
+ log_error("Kernel partitions never appeared.");
+ return -ENXIO;
+ }
- r = udev_enumerate_scan_devices(e);
- if (r < 0)
- return log_error_errno(r, "Failed to scan for partition devices of %s: %m", arg_image);
+ e = udev_enumerate_new(udev);
+ if (!e)
+ return log_oom();
+
+ r = udev_enumerate_add_match_parent(e, d);
+ if (r < 0)
+ return log_oom();
+
+ r = udev_enumerate_scan_devices(e);
+ if (r < 0)
+ return log_error_errno(r, "Failed to scan for partition devices of %s: %m", arg_image);
+
+ /* Count the partitions enumerated by the kernel */
+ n = 0;
+ first = udev_enumerate_get_list_entry(e);
+ udev_list_entry_foreach(item, first)
+ n++;
+
+ /* Count the partitions enumerated by blkid */
+ m = blkid_partlist_numof_partitions(pl);
+ if (n == m + 1)
+ break;
+ if (n > m + 1) {
+ log_error("blkid and kernel partition list do not match.");
+ return -EIO;
+ }
+ if (n < m + 1) {
+ unsigned j;
+
+ /* The kernel has probed fewer partitions than
+ * blkid? Maybe the kernel prober is still
+ * running or it got EBUSY because udev
+ * already opened the device. Let's reprobe
+ * the device, which is a synchronous call
+ * that waits until probing is complete. */
+
+ for (j = 0; j < 20; j++) {
+
+ r = ioctl(fd, BLKRRPART, 0);
+ if (r < 0)
+ r = -errno;
+ if (r >= 0 || r != -EBUSY)
+ break;
+
+ /* If something else has the device
+ * open, such as an udev rule, the
+ * ioctl will return EBUSY. Since
+ * there's no way to wait until it
+ * isn't busy anymore, let's just wait
+ * a bit, and try again.
+ *
+ * This is really something they
+ * should fix in the kernel! */
+
+ usleep(50 * USEC_PER_MSEC);
+ }
+
+ if (r < 0)
+ return log_error_errno(r, "Failed to reread partition table: %m");
+ }
+
+ e = udev_enumerate_unref(e);
+ }
first = udev_enumerate_get_list_entry(e);
udev_list_entry_foreach(item, first) {
continue;
flags = blkid_partition_get_flags(pp);
- if (is_gpt && (flags & GPT_FLAG_NO_AUTO))
- continue;
- if (is_mbr && (flags != 0x80)) /* Bootable flag */
- continue;
nr = blkid_partition_get_partno(pp);
if (nr < 0)
sd_id128_t type_id;
const char *stype;
+ if (flags & GPT_FLAG_NO_AUTO)
+ continue;
+
stype = blkid_partition_get_type_string(pp);
if (!stype)
continue;
return log_oom();
}
#endif
+ else if (sd_id128_equal(type_id, GPT_LINUX_GENERIC)) {
+
+ if (generic)
+ multiple_generic = true;
+ else {
+ generic_rw = !(flags & GPT_FLAG_READ_ONLY);
+
+ r = free_and_strdup(&generic, node);
+ if (r < 0)
+ return log_oom();
+ }
+ }
} else if (is_mbr) {
int type;
+ if (flags != 0x80) /* Bootable flag */
+ continue;
+
type = blkid_partition_get_type(pp);
if (type != 0x83) /* Linux partition */
continue;
- /* Note that there's a certain, intended
- * asymmetry here: while for GPT we simply
- * take the first valid partition and ignore
- * all others of the same type, for MBR we
- * fail if there are multiple suitable
- * partitions. This is because the GPT
- * partition types are defined by us, and
- * hence we can define their lookup semantics,
- * while for the MBR logic we reuse existing
- * definitions, and simply don't want to make
- * out the situation. */
-
- if (root) {
- log_error("Identified multiple bootable Linux 0x83 partitions on\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- }
-
- root_nr = nr;
+ if (generic)
+ multiple_generic = true;
+ else {
+ generic_rw = true;
- r = free_and_strdup(&root, node);
- if (r < 0)
- return log_oom();
+ r = free_and_strdup(&root, node);
+ if (r < 0)
+ return log_oom();
+ }
}
}
- if (!root && !secondary_root) {
- log_error("Failed to identify root partition in disk image\n"
- " %s\n"
- PARTITION_TABLE_BLURB, arg_image);
- return -EINVAL;
- }
-
if (root) {
*root_device = root;
root = NULL;
*root_device_rw = secondary_root_rw;
*secondary = true;
+ } else if (generic) {
+
+ /* There were no partitions with precise meanings
+ * around, but we found generic partitions. In this
+ * case, if there's only one, we can go ahead and boot
+ * it, otherwise we bail out, because we really cannot
+ * make any sense of it. */
+
+ if (multiple_generic) {
+ log_error("Identified multiple bootable Linux partitions on\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
+ return -EINVAL;
+ }
+
+ *root_device = generic;
+ generic = NULL;
+
+ *root_device_rw = generic_rw;
+ *secondary = false;
+ } else {
+ log_error("Failed to identify root partition in disk image\n"
+ " %s\n"
+ PARTITION_TABLE_BLURB, arg_image);
+ return -EINVAL;
}
if (home) {
if (r < 0)
goto finish;
+ r = setup_ipvlan(pid);
+ if (r < 0)
+ goto finish;
+
r = register_machine(pid, ifi);
if (r < 0)
goto finish;
strv_free(arg_setenv);
strv_free(arg_network_interfaces);
strv_free(arg_network_macvlan);
+ strv_free(arg_network_ipvlan);
strv_free(arg_bind);
strv_free(arg_bind_ro);
strv_free(arg_tmpfs);