chiark / gitweb /
nspawn: fix detection of missing /proc/self/loginuid
[elogind.git] / src / nspawn / nspawn.c
index 01e8611e86651d8943c9e2296af91dfd23650f1a..92b67286762350c9b53fe555a404dd8f74235e3c 100644 (file)
@@ -43,6 +43,7 @@
 #include <sys/eventfd.h>
 #include <net/if.h>
 #include <linux/veth.h>
+#include <sys/personality.h>
 
 #ifdef HAVE_SELINUX
 #include <selinux/selinux.h>
 #include "rtnl-util.h"
 #include "udev-util.h"
 
+#ifdef HAVE_SECCOMP
+#include "seccomp-util.h"
+#endif
+
 typedef enum LinkJournal {
         LINK_NO,
         LINK_AUTO,
@@ -90,8 +95,8 @@ static char *arg_directory = NULL;
 static char *arg_user = NULL;
 static sd_id128_t arg_uuid = {};
 static char *arg_machine = NULL;
-static char *arg_selinux_context = NULL;
-static char *arg_selinux_apifs_context = NULL;
+static const char *arg_selinux_context = NULL;
+static const char *arg_selinux_apifs_context = NULL;
 static const char *arg_slice = NULL;
 static bool arg_private_network = false;
 static bool arg_read_only = false;
@@ -132,8 +137,10 @@ static bool arg_share_system = false;
 static bool arg_register = true;
 static bool arg_keep_unit = false;
 static char **arg_network_interfaces = NULL;
+static char **arg_network_macvlan = NULL;
 static bool arg_network_veth = false;
-static char *arg_network_bridge = NULL;
+static const char *arg_network_bridge = NULL;
+static unsigned long arg_personality = 0xffffffffLU;
 
 static int help(void) {
 
@@ -152,6 +159,9 @@ static int help(void) {
                "     --network-interface=INTERFACE\n"
                "                            Assign an existing network interface to the\n"
                "                            container\n"
+               "     --network-macvlan=INTERFACE\n"
+               "                            Create a macvlan network interface based on an\n"
+               "                            existing network interface to the container\n"
                "     --network-veth         Add a virtual ethernet connection between host\n"
                "                            and container\n"
                "     --network-bridge=INTERFACE\n"
@@ -200,8 +210,10 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_REGISTER,
                 ARG_KEEP_UNIT,
                 ARG_NETWORK_INTERFACE,
+                ARG_NETWORK_MACVLAN,
                 ARG_NETWORK_VETH,
                 ARG_NETWORK_BRIDGE,
+                ARG_PERSONALITY,
         };
 
         static const struct option options[] = {
@@ -228,8 +240,10 @@ static int parse_argv(int argc, char *argv[]) {
                 { "register",              required_argument, NULL, ARG_REGISTER          },
                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
+                { "network-macvlan",       required_argument, NULL, ARG_NETWORK_MACVLAN   },
                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
+                { "personality",           required_argument, NULL, ARG_PERSONALITY       },
                 {}
         };
 
@@ -270,9 +284,7 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_NETWORK_BRIDGE:
-                        arg_network_bridge = strdup(optarg);
-                        if (!arg_network_bridge)
-                                return log_oom();
+                        arg_network_bridge = optarg;
 
                         /* fall through */
 
@@ -282,7 +294,14 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case ARG_NETWORK_INTERFACE:
-                        if (strv_push(&arg_network_interfaces, optarg) < 0)
+                        if (strv_extend(&arg_network_interfaces, optarg) < 0)
+                                return log_oom();
+
+                        arg_private_network = true;
+                        break;
+
+                case ARG_NETWORK_MACVLAN:
+                        if (strv_extend(&arg_network_macvlan, optarg) < 0)
                                 return log_oom();
 
                         /* fall through */
@@ -304,10 +323,7 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
 
                 case 'S':
-                        arg_slice = strdup(optarg);
-                        if (!arg_slice)
-                                return log_oom();
-
+                        arg_slice = optarg;
                         break;
 
                 case 'M':
@@ -470,6 +486,16 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_keep_unit = true;
                         break;
 
+                case ARG_PERSONALITY:
+
+                        arg_personality = personality_from_string(optarg);
+                        if (arg_personality == 0xffffffffLU) {
+                                log_error("Unknown or unsupported personality '%s'.", optarg);
+                                return -EINVAL;
+                        }
+
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -725,7 +751,7 @@ static int setup_resolv_conf(const char *dest) {
 
 static int setup_boot_id(const char *dest) {
         _cleanup_free_ char *from = NULL, *to = NULL;
-        sd_id128_t rnd;
+        sd_id128_t rnd = {};
         char as_uuid[37];
         int r;
 
@@ -1170,22 +1196,86 @@ static int register_machine(pid_t pid) {
                                 (uint32_t) pid,
                                 strempty(arg_directory));
         } else {
-                r = sd_bus_call_method(
+                _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
+
+                r = sd_bus_message_new_method_call(
                                 bus,
+                                &m,
                                 "org.freedesktop.machine1",
                                 "/org/freedesktop/machine1",
                                 "org.freedesktop.machine1.Manager",
-                                "CreateMachine",
-                                &error,
-                                NULL,
-                                "sayssusa(sv)",
+                                "CreateMachine");
+                if (r < 0) {
+                        log_error("Failed to create message: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_bus_message_append(
+                                m,
+                                "sayssus",
                                 arg_machine,
                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
                                 "nspawn",
                                 "container",
                                 (uint32_t) pid,
-                                strempty(arg_directory),
-                                !isempty(arg_slice), "Slice", "s", arg_slice);
+                                strempty(arg_directory));
+                if (r < 0) {
+                        log_error("Failed to append message arguments: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_bus_message_open_container(m, 'a', "(sv)");
+                if (r < 0) {
+                        log_error("Failed to open container: %s", strerror(-r));
+                        return r;
+                }
+
+                if (!isempty(arg_slice)) {
+                        r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
+                        if (r < 0) {
+                                log_error("Failed to append slice: %s", strerror(-r));
+                                return r;
+                        }
+                }
+
+                r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
+                if (r < 0) {
+                        log_error("Failed to add device policy: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
+                                          /* Allow the container to
+                                           * access and create the API
+                                           * device nodes, so that
+                                           * PrivateDevices= in the
+                                           * container can work
+                                           * fine */
+                                          "/dev/null", "rwm",
+                                          "/dev/zero", "rwm",
+                                          "/dev/full", "rwm",
+                                          "/dev/random", "rwm",
+                                          "/dev/urandom", "rwm",
+                                          "/dev/tty", "rwm",
+                                          /* Allow the container
+                                           * access to ptys. However,
+                                           * do not permit the
+                                           * container to ever create
+                                           * these device nodes. */
+                                          "/dev/pts/ptmx", "rw",
+                                          "char-pts", "rw");
+                if (r < 0) {
+                        log_error("Failed to add device whitelist: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_bus_message_close_container(m);
+                if (r < 0) {
+                        log_error("Failed to close container: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_bus_call(bus, m, 0, &error, NULL);
         }
 
         if (r < 0) {
@@ -1259,7 +1349,7 @@ static int reset_audit_loginuid(void) {
                 return 0;
 
         r = read_one_line_file("/proc/self/loginuid", &p);
-        if (r == -EEXIST)
+        if (r == -ENOENT)
                 return 0;
         if (r < 0) {
                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
@@ -1284,7 +1374,7 @@ static int reset_audit_loginuid(void) {
         return 0;
 }
 
-static int setup_veth(pid_t pid, char iface_name[]) {
+static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
         int r;
@@ -1295,15 +1385,22 @@ static int setup_veth(pid_t pid, char iface_name[]) {
         if (!arg_network_veth)
                 return 0;
 
+        /* Use two different interface name prefixes depending whether
+         * we are in bridge mode or not. */
+        if (arg_network_bridge)
+                memcpy(iface_name, "vb-", 3);
+        else
+                memcpy(iface_name, "ve-", 3);
+
         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
 
-        r = sd_rtnl_open(0, &rtnl);
+        r = sd_rtnl_open(&rtnl, 0);
         if (r < 0) {
                 log_error("Failed to connect to netlink: %s", strerror(-r));
                 return r;
         }
 
-        r = sd_rtnl_message_new_link(RTM_NEWLINK, 0, &m);
+        r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
         if (r < 0) {
                 log_error("Failed to allocate netlink message: %s", strerror(-r));
                 return r;
@@ -1398,13 +1495,13 @@ static int setup_bridge(const char veth_name[]) {
                 return -errno;
         }
 
-        r = sd_rtnl_open(0, &rtnl);
+        r = sd_rtnl_open(&rtnl, 0);
         if (r < 0) {
                 log_error("Failed to connect to netlink: %s", strerror(-r));
                 return r;
         }
 
-        r = sd_rtnl_message_new_link(RTM_SETLINK, 0, &m);
+        r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
         if (r < 0) {
                 log_error("Failed to allocate netlink message: %s", strerror(-r));
                 return r;
@@ -1431,6 +1528,32 @@ static int setup_bridge(const char veth_name[]) {
         return 0;
 }
 
+static int parse_interface(struct udev *udev, const char *name) {
+        _cleanup_udev_device_unref_ struct udev_device *d = NULL;
+        char ifi_str[2 + DECIMAL_STR_MAX(int)];
+        int ifi;
+
+        ifi = (int) if_nametoindex(name);
+        if (ifi <= 0) {
+                log_error("Failed to resolve interface %s: %m", name);
+                return -errno;
+        }
+
+        sprintf(ifi_str, "n%i", ifi);
+        d = udev_device_new_from_device_id(udev, ifi_str);
+        if (!d) {
+                log_error("Failed to get udev device for interface %s: %m", name);
+                return -errno;
+        }
+
+        if (udev_device_get_is_initialized(d) <= 0) {
+                log_error("Network interface %s is not initialized yet.", name);
+                return -EBUSY;
+        }
+
+        return ifi;
+}
+
 static int move_network_interfaces(pid_t pid) {
         _cleanup_udev_unref_ struct udev *udev = NULL;
         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
@@ -1443,7 +1566,7 @@ static int move_network_interfaces(pid_t pid) {
         if (strv_isempty(arg_network_interfaces))
                 return 0;
 
-        r = sd_rtnl_open(0, &rtnl);
+        r = sd_rtnl_open(&rtnl, 0);
         if (r < 0) {
                 log_error("Failed to connect to netlink: %s", strerror(-r));
                 return r;
@@ -1457,43 +1580,136 @@ static int move_network_interfaces(pid_t pid) {
 
         STRV_FOREACH(i, arg_network_interfaces) {
                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
-                _cleanup_udev_device_unref_ struct udev_device *d = NULL;
-                char ifi_str[2 + DECIMAL_STR_MAX(int)];
                 int ifi;
 
-                ifi = (int) if_nametoindex(*i);
-                if (ifi <= 0) {
-                        log_error("Failed to resolve interface %s: %m", *i);
-                        return -errno;
+                ifi = parse_interface(udev, *i);
+                if (ifi < 0)
+                        return ifi;
+
+                r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
+                if (r < 0) {
+                        log_error("Failed to allocate netlink message: %s", strerror(-r));
+                        return r;
                 }
 
-                sprintf(ifi_str, "n%i", ifi);
-                d = udev_device_new_from_device_id(udev, ifi_str);
-                if (!d) {
-                        log_error("Failed to get udev device for interface %s: %m", *i);
-                        return -errno;
+                r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
+                if (r < 0) {
+                        log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
+                        return r;
                 }
 
-                if (udev_device_get_is_initialized(d) <= 0) {
-                        log_error("Network interface %s is not initialized yet.", *i);
-                        return -EBUSY;
+                r = sd_rtnl_call(rtnl, m, 0, NULL);
+                if (r < 0) {
+                        log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
+                        return r;
                 }
+        }
+
+        return 0;
+}
+
+static int setup_macvlan(pid_t pid) {
+        _cleanup_udev_unref_ struct udev *udev = NULL;
+        _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
+        char **i;
+        int r;
+
+        if (!arg_private_network)
+                return 0;
+
+        if (strv_isempty(arg_network_macvlan))
+                return 0;
+
+        r = sd_rtnl_open(&rtnl, 0);
+        if (r < 0) {
+                log_error("Failed to connect to netlink: %s", strerror(-r));
+                return r;
+        }
 
-                r = sd_rtnl_message_new_link(RTM_NEWLINK, ifi, &m);
+        udev = udev_new();
+        if (!udev) {
+                log_error("Failed to connect to udev.");
+                return -ENOMEM;
+        }
+
+        STRV_FOREACH(i, arg_network_macvlan) {
+                _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
+                _cleanup_free_ char *n = NULL;
+                int ifi;
+
+                ifi = parse_interface(udev, *i);
+                if (ifi < 0)
+                        return ifi;
+
+                r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
                 if (r < 0) {
                         log_error("Failed to allocate netlink message: %s", strerror(-r));
                         return r;
                 }
 
+                r = sd_rtnl_message_append_u32(m, IFLA_LINK, ifi);
+                if (r < 0) {
+                        log_error("Failed to add netlink interface index: %s", strerror(-r));
+                        return r;
+                }
+
+                n = strappend("mv-", *i);
+                if (!n)
+                        return log_oom();
+
+                strshorten(n, IFNAMSIZ-1);
+
+                r = sd_rtnl_message_append_string(m, IFLA_IFNAME, n);
+                if (r < 0) {
+                        log_error("Failed to add netlink interface name: %s", strerror(-r));
+                        return r;
+                }
+
                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
                 if (r < 0) {
-                        log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
+                        log_error("Failed to add netlink namespace field: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
+                if (r < 0) {
+                        log_error("Failed to open netlink container: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "macvlan");
+                if (r < 0) {
+                        log_error("Failed to append netlink kind: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
+                if (r < 0) {
+                        log_error("Failed to open netlink container: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
+                if (r < 0) {
+                        log_error("Failed to append macvlan mode: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_close_container(m);
+                if (r < 0) {
+                        log_error("Failed to close netlink container: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_close_container(m);
+                if (r < 0) {
+                        log_error("Failed to close netlink container: %s", strerror(-r));
                         return r;
                 }
 
                 r = sd_rtnl_call(rtnl, m, 0, NULL);
                 if (r < 0) {
-                        log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
+                        log_error("Failed to add new macvlan interfaces: %s", strerror(-r));
                         return r;
                 }
         }
@@ -1521,7 +1737,13 @@ static int audit_still_doesnt_work_in_containers(void) {
         if (!seccomp)
                 return log_oom();
 
-        r = seccomp_rule_add_exact(
+        r = seccomp_add_secondary_archs(seccomp);
+        if (r < 0) {
+                log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
+                goto finish;
+        }
+
+        r = seccomp_rule_add(
                         seccomp,
                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
                         SCMP_SYS(socket),
@@ -1554,7 +1776,7 @@ finish:
 
 int main(int argc, char *argv[]) {
 
-        _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, netns_fd = -1;
+        _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
         _cleanup_free_ char *kdbus_domain = NULL;
         _cleanup_fdset_free_ FDSet *fds = NULL;
@@ -1563,7 +1785,7 @@ int main(int argc, char *argv[]) {
         int n_fd_passed;
         pid_t pid = 0;
         sigset_t mask;
-        char veth_name[IFNAMSIZ] = "ve-";
+        char veth_name[IFNAMSIZ];
 
         log_parse_environment();
         log_open();
@@ -1973,6 +2195,13 @@ int main(int argc, char *argv[]) {
 
                         setup_hostname();
 
+                        if (arg_personality != 0xffffffffLU) {
+                                if (personality(arg_personality) < 0) {
+                                        log_error("personality() failed: %m");
+                                        goto child_fail;
+                                }
+                        }
+
                         eventfd_read(sync_fd, &x);
                         close_nointr_nofail(sync_fd);
                         sync_fd = -1;
@@ -1992,7 +2221,7 @@ int main(int argc, char *argv[]) {
 
 #ifdef HAVE_SELINUX
                         if (arg_selinux_context)
-                                if (setexeccon(arg_selinux_context) < 0)
+                                if (setexeccon((security_context_t) arg_selinux_context) < 0)
                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
 #endif
                         if (arg_boot) {
@@ -2046,6 +2275,10 @@ int main(int argc, char *argv[]) {
                 if (r < 0)
                         goto finish;
 
+                r = setup_macvlan(pid);
+                if (r < 0)
+                        goto finish;
+
                 eventfd_write(sync_fd, 1);
                 close_nointr_nofail(sync_fd);
                 sync_fd = -1;
@@ -2115,8 +2348,12 @@ finish:
 
         free(arg_directory);
         free(arg_machine);
-        free(arg_setenv);
-        free(arg_network_interfaces);
+        free(arg_user);
+        strv_free(arg_setenv);
+        strv_free(arg_network_interfaces);
+        strv_free(arg_network_macvlan);
+        strv_free(arg_bind);
+        strv_free(arg_bind_ro);
 
         return r;
 }