chiark / gitweb /
nspawn: add new --network-interface= switch to move an existing interface into the...
[elogind.git] / src / nspawn / nspawn.c
index 97ef6c799d4281705e97e4533c6f60feb7bda604..160b50b3ed83c8042c025da6bd46246ab11d7800 100644 (file)
 #include <sys/un.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
+#include <linux/rtnetlink.h>
 #include <sys/eventfd.h>
+#include <net/if.h>
+
 #ifdef HAVE_SELINUX
 #include <selinux/selinux.h>
 #endif
@@ -48,6 +51,7 @@
 #include "sd-daemon.h"
 #include "sd-bus.h"
 #include "sd-id128.h"
+#include "sd-rtnl.h"
 #include "log.h"
 #include "util.h"
 #include "mkdir.h"
@@ -68,6 +72,7 @@
 #include "bus-kernel.h"
 #include "env-util.h"
 #include "def.h"
+#include "rtnl-util.h"
 
 typedef enum LinkJournal {
         LINK_NO,
@@ -121,6 +126,7 @@ static bool arg_quiet = false;
 static bool arg_share_system = false;
 static bool arg_register = true;
 static bool arg_keep_unit = false;
+static char **arg_network_interfaces = NULL;
 
 static int help(void) {
 
@@ -141,6 +147,8 @@ static int help(void) {
                "                            Set the SELinux security context to be used by\n"
                "                            API/tmpfs file systems in the container\n"
                "     --private-network      Disable network in container\n"
+               "     --network-interface=INTERFACE\n"
+               "                            Assign an existing network interface to the container\n"
                "     --share-system         Share system namespaces with host\n"
                "     --read-only            Mount the root directory read-only\n"
                "     --capability=CAP       In addition to the default, retain specified\n"
@@ -176,32 +184,34 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_SETENV,
                 ARG_SHARE_SYSTEM,
                 ARG_REGISTER,
-                ARG_KEEP_UNIT
+                ARG_KEEP_UNIT,
+                ARG_NETWORK_INTERFACE
         };
 
         static const struct option options[] = {
-                { "help",                  no_argument,       NULL, 'h'                 },
-                { "version",               no_argument,       NULL, ARG_VERSION         },
-                { "directory",             required_argument, NULL, 'D'                 },
-                { "user",                  required_argument, NULL, 'u'                 },
-                { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK },
-                { "boot",                  no_argument,       NULL, 'b'                 },
-                { "uuid",                  required_argument, NULL, ARG_UUID            },
-                { "read-only",             no_argument,       NULL, ARG_READ_ONLY       },
-                { "capability",            required_argument, NULL, ARG_CAPABILITY      },
-                { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY },
-                { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL    },
-                { "bind",                  required_argument, NULL, ARG_BIND            },
-                { "bind-ro",               required_argument, NULL, ARG_BIND_RO         },
-                { "machine",               required_argument, NULL, 'M'                 },
-                { "slice",                 required_argument, NULL, 'S'                 },
-                { "setenv",                required_argument, NULL, ARG_SETENV          },
-                { "selinux-context",       required_argument, NULL, 'Z'                 },
-                { "selinux-apifs-context", required_argument, NULL, 'L'                 },
-                { "quiet",                 no_argument,       NULL, 'q'                 },
-                { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM    },
-                { "register",              required_argument, NULL, ARG_REGISTER        },
-                { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT       },
+                { "help",                  no_argument,       NULL, 'h'                   },
+                { "version",               no_argument,       NULL, ARG_VERSION           },
+                { "directory",             required_argument, NULL, 'D'                   },
+                { "user",                  required_argument, NULL, 'u'                   },
+                { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
+                { "boot",                  no_argument,       NULL, 'b'                   },
+                { "uuid",                  required_argument, NULL, ARG_UUID              },
+                { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
+                { "capability",            required_argument, NULL, ARG_CAPABILITY        },
+                { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
+                { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
+                { "bind",                  required_argument, NULL, ARG_BIND              },
+                { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
+                { "machine",               required_argument, NULL, 'M'                   },
+                { "slice",                 required_argument, NULL, 'S'                   },
+                { "setenv",                required_argument, NULL, ARG_SETENV            },
+                { "selinux-context",       required_argument, NULL, 'Z'                   },
+                { "selinux-apifs-context", required_argument, NULL, 'L'                   },
+                { "quiet",                 no_argument,       NULL, 'q'                   },
+                { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
+                { "register",              required_argument, NULL, ARG_REGISTER          },
+                { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
+                { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
                 {}
         };
 
@@ -240,6 +250,12 @@ static int parse_argv(int argc, char *argv[]) {
 
                         break;
 
+                case ARG_NETWORK_INTERFACE:
+                        if (strv_push(&arg_network_interfaces, optarg) < 0)
+                                return log_oom();
+
+                        /* fall through */
+
                 case ARG_PRIVATE_NETWORK:
                         arg_private_network = true;
                         break;
@@ -300,25 +316,29 @@ static int parse_argv(int argc, char *argv[]) {
                         size_t length;
 
                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
+                                _cleanup_free_ char *t;
                                 cap_value_t cap;
-                                char *t;
 
                                 t = strndup(word, length);
                                 if (!t)
                                         return log_oom();
 
-                                if (cap_from_name(t, &cap) < 0) {
-                                        log_error("Failed to parse capability %s.", t);
-                                        free(t);
-                                        return -EINVAL;
+                                if (streq(t, "all")) {
+                                        if (c == ARG_CAPABILITY)
+                                                arg_retain = (uint64_t) -1;
+                                        else
+                                                arg_retain = 0;
+                                } else {
+                                        if (cap_from_name(t, &cap) < 0) {
+                                                log_error("Failed to parse capability %s.", t);
+                                                return -EINVAL;
+                                        }
+
+                                        if (c == ARG_CAPABILITY)
+                                                arg_retain |= 1ULL << (uint64_t) cap;
+                                        else
+                                                arg_retain &= ~(1ULL << (uint64_t) cap);
                                 }
-
-                                free(t);
-
-                                if (c == ARG_CAPABILITY)
-                                        arg_retain |= 1ULL << (uint64_t) cap;
-                                else
-                                        arg_retain &= ~(1ULL << (uint64_t) cap);
                         }
 
                         break;
@@ -1198,15 +1218,86 @@ static int terminate_machine(pid_t pid) {
         return 0;
 }
 
-static bool audit_enabled(void) {
-        int fd;
+static int reset_audit_loginuid(void) {
+        _cleanup_free_ char *p = NULL;
+        int r;
+
+        if (arg_share_system)
+                return 0;
+
+        r = read_one_line_file("/proc/self/loginuid", &p);
+        if (r == -EEXIST)
+                return 0;
+        if (r < 0) {
+                log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
+                return r;
+        }
+
+        /* Already reset? */
+        if (streq(p, "4294967295"))
+                return 0;
+
+        r = write_string_file("/proc/self/loginuid", "4294967295");
+        if (r < 0) {
+                log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
+                          "old and you have audit enabled. Note that the auditing subsystem is known to\n"
+                          "be incompatible with containers on old kernels. Please make sure to upgrade\n"
+                          "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
+                          "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
+
+                sleep(5);
+        }
+
+        return 0;
+}
+
+static int move_network_interfaces(pid_t pid) {
+        _cleanup_sd_rtnl_unref_ sd_rtnl *rtnl = NULL;
+        char **i;
+        int r;
+
+        if (!arg_private_network)
+                return 0;
+
+        if (strv_isempty(arg_network_interfaces))
+                return 0;
 
-        fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
-        if (fd >= 0) {
-                close_nointr_nofail(fd);
-                return true;
+        r = sd_rtnl_open(NETLINK_ROUTE, &rtnl);
+        if (r < 0) {
+                log_error("Failed to connect to netlink: %s", strerror(-r));
+                return r;
         }
-        return false;
+
+        STRV_FOREACH(i, arg_network_interfaces) {
+                _cleanup_sd_rtnl_message_unref_ sd_rtnl_message *m = NULL;
+                unsigned ifi;
+
+                ifi = if_nametoindex(*i);
+                if (ifi == 0) {
+                        log_error("Failed to resolve interface %s: %m", *i);
+                        return -errno;
+                }
+
+                r = sd_rtnl_message_link_new(RTM_NEWLINK, ifi, &m);
+                if (r < 0) {
+                        log_error("Failed to allocate netlink message: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
+                if (r < 0) {
+                        log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
+                        return r;
+                }
+
+                r = sd_rtnl_call(rtnl, m, 0, NULL);
+                if (r < 0) {
+                        log_error("Failed to move interface to namespace: %s", strerror(-r));
+                        return r;
+                }
+        }
+
+        return 0;
 }
 
 int main(int argc, char *argv[]) {
@@ -1271,13 +1362,6 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-        if (arg_boot && audit_enabled()) {
-                log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
-                            "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
-                            "line before using systemd-nspawn. Sleeping for 5s...\n");
-                sleep(5);
-        }
-
         if (path_equal(arg_directory, "/")) {
                 log_error("Spawning container on root directory not supported.");
                 goto finish;
@@ -1436,6 +1520,9 @@ int main(int argc, char *argv[]) {
                                 goto child_fail;
                         }
 
+                        if (reset_audit_loginuid() < 0)
+                                goto child_fail;
+
                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
                                 log_error("PR_SET_PDEATHSIG failed: %m");
                                 goto child_fail;
@@ -1678,6 +1765,10 @@ int main(int argc, char *argv[]) {
                 if (r < 0)
                         goto finish;
 
+                r = move_network_interfaces(pid);
+                if (r < 0)
+                        goto finish;
+
                 eventfd_write(sync_fd, 1);
                 close_nointr_nofail(sync_fd);
                 sync_fd = -1;
@@ -1748,6 +1839,7 @@ finish:
         free(arg_directory);
         free(arg_machine);
         free(arg_setenv);
+        free(arg_network_interfaces);
 
         return r;
 }