chiark / gitweb /
nspawn: block open_by_handle_at() and others via seccomp
authorLennart Poettering <lennart@poettering.net>
Mon, 30 Jun 2014 14:22:12 +0000 (16:22 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 30 Jun 2014 14:22:12 +0000 (16:22 +0200)
Let's protect ourselves against the recently reported docker security
issue. Our man page makes clear that we do not make any security
promises anyway, but well, this one is easy to mitigate, so let's do it.
While we are at it block a couple of more syscalls that are no good in
containers, too.

src/nspawn/nspawn.c

index fd61d07761fc9353852b6ca3ca7bee79a5878f28..656c1bf9f53b04aaf8775e4aea6e2b120561e162 100644 (file)
@@ -1864,22 +1864,25 @@ static int setup_macvlan(pid_t pid) {
         return 0;
 }
 
-static int audit_still_doesnt_work_in_containers(void) {
+static int setup_seccomp(void) {
 
 #ifdef HAVE_SECCOMP
+        static const int blacklist[] = {
+                SCMP_SYS(kexec_load),
+                SCMP_SYS(open_by_handle_at),
+                SCMP_SYS(init_module),
+                SCMP_SYS(finit_module),
+                SCMP_SYS(delete_module),
+                SCMP_SYS(iopl),
+                SCMP_SYS(ioperm),
+                SCMP_SYS(swapon),
+                SCMP_SYS(swapoff),
+        };
+
         scmp_filter_ctx seccomp;
+        unsigned i;
         int r;
 
-        /*
-           Audit is broken in containers, much of the userspace audit
-           hookup will fail if running inside a container. We don't
-           care and just turn off creation of audit sockets.
-
-           This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
-           with EAFNOSUPPORT which audit userspace uses as indication
-           that audit is disabled in the kernel.
-         */
-
         seccomp = seccomp_init(SCMP_ACT_ALLOW);
         if (!seccomp)
                 return log_oom();
@@ -1890,6 +1893,26 @@ static int audit_still_doesnt_work_in_containers(void) {
                 goto finish;
         }
 
+        for (i = 0; i < ELEMENTSOF(blacklist); i++) {
+                r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
+                if (r == -EFAULT)
+                        continue; /* unknown syscall */
+                if (r < 0) {
+                        log_error("Failed to block syscall: %s", strerror(-r));
+                        goto finish;
+                }
+        }
+
+        /*
+           Audit is broken in containers, much of the userspace audit
+           hookup will fail if running inside a container. We don't
+           care and just turn off creation of audit sockets.
+
+           This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
+           with EAFNOSUPPORT which audit userspace uses as indication
+           that audit is disabled in the kernel.
+         */
+
         r = seccomp_rule_add(
                         seccomp,
                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
@@ -3050,7 +3073,7 @@ int main(int argc, char *argv[]) {
 
                         dev_setup(arg_directory);
 
-                        if (audit_still_doesnt_work_in_containers() < 0)
+                        if (setup_seccomp() < 0)
                                 goto child_fail;
 
                         if (setup_dev_console(arg_directory, console) < 0)