chiark / gitweb /
tree-wide: there is no ENOTSUP on linux
[elogind.git] / src / nspawn / nspawn.c
index 9967423dbc977d1a941f8a65204fff22cafe0b81..136933ec6cdb3d8c07f68da76b1c4c405d059c7a 100644 (file)
 #include <sched.h>
 #include <unistd.h>
 #include <sys/types.h>
-#include <sys/syscall.h>
 #include <sys/mount.h>
-#include <sys/wait.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <errno.h>
 #include <sys/prctl.h>
 #include <getopt.h>
-#include <termios.h>
-#include <sys/signalfd.h>
 #include <grp.h>
 #include <linux/fs.h>
-#include <sys/un.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 #include <net/if.h>
 #include <linux/veth.h>
 #include <sys/personality.h>
 #include <linux/loop.h>
-#include <poll.h>
 #include <sys/file.h>
 
 #ifdef HAVE_SELINUX
@@ -66,7 +60,6 @@
 #include "util.h"
 #include "mkdir.h"
 #include "macro.h"
-#include "audit.h"
 #include "missing.h"
 #include "cgroup-util.h"
 #include "strv.h"
@@ -79,9 +72,7 @@
 #include "bus-util.h"
 #include "bus-error.h"
 #include "ptyfwd.h"
-#include "bus-kernel.h"
 #include "env-util.h"
-#include "def.h"
 #include "rtnl-util.h"
 #include "udev-util.h"
 #include "blkid-util.h"
@@ -190,6 +181,7 @@ static ExposePort *arg_expose_ports = NULL;
 static char **arg_property = NULL;
 static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
 static bool arg_userns = false;
+static int arg_kill_signal = 0;
 
 static void help(void) {
         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
@@ -238,6 +230,7 @@ static void help(void) {
                "     --capability=CAP       In addition to the default, retain specified\n"
                "                            capability\n"
                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
+               "     --kill-signal=SIGNAL   Select signal to use for shutting down PID 1\n"
                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host,\n"
                "                            try-guest, try-host\n"
                "  -j                        Equivalent to --link-journal=try-guest\n"
@@ -302,6 +295,7 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_TEMPLATE,
                 ARG_PROPERTY,
                 ARG_PRIVATE_USERS,
+                ARG_KILL_SIGNAL,
         };
 
         static const struct option options[] = {
@@ -341,6 +335,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "port",                  required_argument, NULL, 'p'                   },
                 { "property",              required_argument, NULL, ARG_PROPERTY          },
                 { "private-users",         optional_argument, NULL, ARG_PRIVATE_USERS     },
+                { "kill-signal",           required_argument, NULL, ARG_KILL_SIGNAL       },
                 {}
         };
 
@@ -776,6 +771,15 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_userns = true;
                         break;
 
+                case ARG_KILL_SIGNAL:
+                        arg_kill_signal = signal_from_string_try_harder(optarg);
+                        if (arg_kill_signal < 0) {
+                                log_error("Cannot parse signal: %s", optarg);
+                                return -EINVAL;
+                        }
+
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -838,6 +842,9 @@ static int parse_argv(int argc, char *argv[]) {
 
         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
 
+        if (arg_boot && arg_kill_signal <= 0)
+                arg_kill_signal = SIGRTMIN+3;
+
         return 1;
 }
 
@@ -1444,6 +1451,10 @@ static int copy_devnodes(const char *dest) {
 
                         if (mknod(to, st.st_mode, st.st_rdev) < 0)
                                 return log_error_errno(errno, "mknod(%s) failed: %m", to);
+
+                        if (arg_userns && arg_uid_shift != UID_INVALID)
+                                if (lchown(to, arg_uid_shift, arg_uid_shift) < 0)
+                                        return log_error_errno(errno, "chown() of device node %s failed: %m", to);
                 }
         }
 
@@ -1460,6 +1471,10 @@ static int setup_ptmx(const char *dest) {
         if (symlink("pts/ptmx", p) < 0)
                 return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m");
 
+        if (arg_userns && arg_uid_shift != UID_INVALID)
+                if (lchown(p, arg_uid_shift, arg_uid_shift) < 0)
+                        return log_error_errno(errno, "lchown() of symlink %s failed: %m", p);
+
         return 0;
 }
 
@@ -2552,19 +2567,19 @@ static int setup_ipvlan(pid_t pid) {
 static int setup_seccomp(void) {
 
 #ifdef HAVE_SECCOMP
-        static const int blacklist[] = {
-                SCMP_SYS(kexec_load),
-                SCMP_SYS(open_by_handle_at),
-                SCMP_SYS(iopl),
-                SCMP_SYS(ioperm),
-                SCMP_SYS(swapon),
-                SCMP_SYS(swapoff),
-        };
-
-        static const int kmod_blacklist[] = {
-                SCMP_SYS(init_module),
-                SCMP_SYS(finit_module),
-                SCMP_SYS(delete_module),
+        static const struct {
+                uint64_t capability;
+                int syscall_num;
+        } blacklist[] = {
+                { CAP_SYS_RAWIO,  SCMP_SYS(iopl)},
+                { CAP_SYS_RAWIO,  SCMP_SYS(ioperm)},
+                { CAP_SYS_BOOT,   SCMP_SYS(kexec_load)},
+                { CAP_SYS_ADMIN,  SCMP_SYS(swapon)},
+                { CAP_SYS_ADMIN,  SCMP_SYS(swapoff)},
+                { CAP_SYS_ADMIN,  SCMP_SYS(open_by_handle_at)},
+                { CAP_SYS_MODULE, SCMP_SYS(init_module)},
+                { CAP_SYS_MODULE, SCMP_SYS(finit_module)},
+                { CAP_SYS_MODULE, SCMP_SYS(delete_module)},
         };
 
         scmp_filter_ctx seccomp;
@@ -2582,7 +2597,10 @@ static int setup_seccomp(void) {
         }
 
         for (i = 0; i < ELEMENTSOF(blacklist); i++) {
-                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
+                if (arg_retain & (1ULL << blacklist[i].capability))
+                        continue;
+
+                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0);
                 if (r == -EFAULT)
                         continue; /* unknown syscall */
                 if (r < 0) {
@@ -2591,19 +2609,6 @@ static int setup_seccomp(void) {
                 }
         }
 
-        /* If the CAP_SYS_MODULE capability is not requested then
-         * we'll block the kmod syscalls too */
-        if (!(arg_retain & (1ULL << CAP_SYS_MODULE))) {
-                for (i = 0; i < ELEMENTSOF(kmod_blacklist); i++) {
-                        r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), kmod_blacklist[i], 0);
-                        if (r == -EFAULT)
-                                continue; /* unknown syscall */
-                        if (r < 0) {
-                                log_error_errno(r, "Failed to block syscall: %m");
-                                goto finish;
-                        }
-                }
-        }
 
         /*
            Audit is broken in containers, much of the userspace audit
@@ -2746,7 +2751,7 @@ static int setup_image(char **device_path, int *loop_nr) {
 
 #define PARTITION_TABLE_BLURB \
         "Note that the disk image needs to either contain only a single MBR partition of\n" \
-        "type 0x83 that is marked bootable, or a sinlge GPT partition of type" \
+        "type 0x83 that is marked bootable, or a single GPT partition of type " \
         "0FC63DAF-8483-4772-8E79-3D69D8477DE4 or follow\n" \
         "    http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \
         "to be bootable with systemd-nspawn."
@@ -3120,7 +3125,7 @@ static int dissect_image(
         return 0;
 #else
         log_error("--image= is not supported, compiled without blkid support.");
-        return -ENOTSUP;
+        return -EOPNOTSUPP;
 #endif
 }
 
@@ -3175,7 +3180,7 @@ static int mount_device(const char *what, const char *where, const char *directo
 
         if (streq(fstype, "crypto_LUKS")) {
                 log_error("nspawn currently does not support LUKS disk images.");
-                return -ENOTSUP;
+                return -EOPNOTSUPP;
         }
 
         if (mount(what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), NULL) < 0)
@@ -3184,7 +3189,7 @@ static int mount_device(const char *what, const char *where, const char *directo
         return 0;
 #else
         log_error("--image= is not supported, compiled without blkid support.");
-        return -ENOTSUP;
+        return -EOPNOTSUPP;
 #endif
 }
 
@@ -3560,7 +3565,7 @@ static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo
 
         pid = PTR_TO_UINT32(userdata);
         if (pid > 0) {
-                if (kill(pid, SIGRTMIN+3) >= 0) {
+                if (kill(pid, arg_kill_signal) >= 0) {
                         log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
                         sd_event_source_set_userdata(s, NULL);
                         return 0;
@@ -3730,7 +3735,7 @@ int main(int argc, char *argv[]) {
                 }
 
                 if (arg_ephemeral) {
-                        char *np;
+                        _cleanup_free_ char *np = NULL;
 
                         /* If the specified path is a mount point we
                          * generate the new snapshot immediately
@@ -3760,13 +3765,13 @@ int main(int argc, char *argv[]) {
 
                         r = btrfs_subvol_snapshot(arg_directory, np, arg_read_only, true);
                         if (r < 0) {
-                                free(np);
                                 log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory);
                                 goto finish;
                         }
 
                         free(arg_directory);
                         arg_directory = np;
+                        np = NULL;
 
                         remove_subvol = true;
 
@@ -4357,7 +4362,7 @@ int main(int argc, char *argv[]) {
                                         goto finish;
                                 }
 
-                                if (arg_boot) {
+                                if (arg_kill_signal > 0) {
                                         /* Try to kill the init system on SIGINT or SIGTERM */
                                         sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid));
                                         sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid));