#include <sched.h>
#include <unistd.h>
#include <sys/types.h>
-#include <sys/syscall.h>
#include <sys/mount.h>
-#include <sys/wait.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <sys/prctl.h>
#include <getopt.h>
-#include <termios.h>
-#include <sys/signalfd.h>
#include <grp.h>
#include <linux/fs.h>
-#include <sys/un.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <net/if.h>
#include <linux/veth.h>
#include <sys/personality.h>
#include <linux/loop.h>
-#include <poll.h>
#include <sys/file.h>
#ifdef HAVE_SELINUX
#include "util.h"
#include "mkdir.h"
#include "macro.h"
-#include "audit.h"
#include "missing.h"
#include "cgroup-util.h"
#include "strv.h"
#include "bus-util.h"
#include "bus-error.h"
#include "ptyfwd.h"
-#include "bus-kernel.h"
#include "env-util.h"
-#include "def.h"
#include "rtnl-util.h"
#include "udev-util.h"
#include "blkid-util.h"
static char **arg_property = NULL;
static uid_t arg_uid_shift = UID_INVALID, arg_uid_range = 0x10000U;
static bool arg_userns = false;
+static int arg_kill_signal = 0;
static void help(void) {
printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
" --capability=CAP In addition to the default, retain specified\n"
" capability\n"
" --drop-capability=CAP Drop the specified capability from the default set\n"
+ " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
" --link-journal=MODE Link up guest journal, one of no, auto, guest, host,\n"
" try-guest, try-host\n"
" -j Equivalent to --link-journal=try-guest\n"
ARG_TEMPLATE,
ARG_PROPERTY,
ARG_PRIVATE_USERS,
+ ARG_KILL_SIGNAL,
};
static const struct option options[] = {
{ "port", required_argument, NULL, 'p' },
{ "property", required_argument, NULL, ARG_PROPERTY },
{ "private-users", optional_argument, NULL, ARG_PRIVATE_USERS },
+ { "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
{}
};
arg_userns = true;
break;
+ case ARG_KILL_SIGNAL:
+ arg_kill_signal = signal_from_string_try_harder(optarg);
+ if (arg_kill_signal < 0) {
+ log_error("Cannot parse signal: %s", optarg);
+ return -EINVAL;
+ }
+
+ break;
+
case '?':
return -EINVAL;
arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
+ if (arg_boot && arg_kill_signal <= 0)
+ arg_kill_signal = SIGRTMIN+3;
+
return 1;
}
int r = 0;
for (k = 0; k < ELEMENTSOF(mount_table); k++) {
- _cleanup_free_ char *where = NULL;
-#ifdef HAVE_SELINUX
- _cleanup_free_ char *options = NULL;
-#endif
+ _cleanup_free_ char *where = NULL, *options = NULL;
const char *o;
int t;
if (mknod(to, st.st_mode, st.st_rdev) < 0)
return log_error_errno(errno, "mknod(%s) failed: %m", to);
+
+ if (arg_userns && arg_uid_shift != UID_INVALID)
+ if (lchown(to, arg_uid_shift, arg_uid_shift) < 0)
+ return log_error_errno(errno, "chown() of device node %s failed: %m", to);
}
}
if (symlink("pts/ptmx", p) < 0)
return log_error_errno(errno, "Failed to create /dev/ptmx symlink: %m");
+ if (arg_userns && arg_uid_shift != UID_INVALID)
+ if (lchown(p, arg_uid_shift, arg_uid_shift) < 0)
+ return log_error_errno(errno, "lchown() of symlink %s failed: %m", p);
+
return 0;
}
static int setup_seccomp(void) {
#ifdef HAVE_SECCOMP
- static const int blacklist[] = {
- SCMP_SYS(kexec_load),
- SCMP_SYS(open_by_handle_at),
- SCMP_SYS(iopl),
- SCMP_SYS(ioperm),
- SCMP_SYS(swapon),
- SCMP_SYS(swapoff),
- };
-
- static const int kmod_blacklist[] = {
- SCMP_SYS(init_module),
- SCMP_SYS(finit_module),
- SCMP_SYS(delete_module),
+ static const struct {
+ uint64_t capability;
+ int syscall_num;
+ } blacklist[] = {
+ { CAP_SYS_RAWIO, SCMP_SYS(iopl)},
+ { CAP_SYS_RAWIO, SCMP_SYS(ioperm)},
+ { CAP_SYS_BOOT, SCMP_SYS(kexec_load)},
+ { CAP_SYS_ADMIN, SCMP_SYS(swapon)},
+ { CAP_SYS_ADMIN, SCMP_SYS(swapoff)},
+ { CAP_SYS_ADMIN, SCMP_SYS(open_by_handle_at)},
+ { CAP_SYS_MODULE, SCMP_SYS(init_module)},
+ { CAP_SYS_MODULE, SCMP_SYS(finit_module)},
+ { CAP_SYS_MODULE, SCMP_SYS(delete_module)},
};
scmp_filter_ctx seccomp;
}
for (i = 0; i < ELEMENTSOF(blacklist); i++) {
- r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i], 0);
+ if (arg_retain & (1ULL << blacklist[i].capability))
+ continue;
+
+ r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), blacklist[i].syscall_num, 0);
if (r == -EFAULT)
continue; /* unknown syscall */
if (r < 0) {
}
}
- /* If the CAP_SYS_MODULE capability is not requested then
- * we'll block the kmod syscalls too */
- if (!(arg_retain & (1ULL << CAP_SYS_MODULE))) {
- for (i = 0; i < ELEMENTSOF(kmod_blacklist); i++) {
- r = seccomp_rule_add(seccomp, SCMP_ACT_ERRNO(EPERM), kmod_blacklist[i], 0);
- if (r == -EFAULT)
- continue; /* unknown syscall */
- if (r < 0) {
- log_error_errno(r, "Failed to block syscall: %m");
- goto finish;
- }
- }
- }
/*
Audit is broken in containers, much of the userspace audit
#define PARTITION_TABLE_BLURB \
"Note that the disk image needs to either contain only a single MBR partition of\n" \
- "type 0x83 that is marked bootable, or a sinlge GPT partition of type" \
+ "type 0x83 that is marked bootable, or a single GPT partition of type " \
"0FC63DAF-8483-4772-8E79-3D69D8477DE4 or follow\n" \
" http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/\n" \
"to be bootable with systemd-nspawn."
return 0;
#else
log_error("--image= is not supported, compiled without blkid support.");
- return -ENOTSUP;
+ return -EOPNOTSUPP;
#endif
}
if (streq(fstype, "crypto_LUKS")) {
log_error("nspawn currently does not support LUKS disk images.");
- return -ENOTSUP;
+ return -EOPNOTSUPP;
}
if (mount(what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), NULL) < 0)
return 0;
#else
log_error("--image= is not supported, compiled without blkid support.");
- return -ENOTSUP;
+ return -EOPNOTSUPP;
#endif
}
pid = PTR_TO_UINT32(userdata);
if (pid > 0) {
- if (kill(pid, SIGRTMIN+3) >= 0) {
+ if (kill(pid, arg_kill_signal) >= 0) {
log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
sd_event_source_set_userdata(s, NULL);
return 0;
}
if (arg_ephemeral) {
- char *np;
+ _cleanup_free_ char *np = NULL;
/* If the specified path is a mount point we
* generate the new snapshot immediately
r = btrfs_subvol_snapshot(arg_directory, np, arg_read_only, true);
if (r < 0) {
- free(np);
log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory);
goto finish;
}
free(arg_directory);
arg_directory = np;
+ np = NULL;
remove_subvol = true;
goto finish;
}
- if (arg_boot) {
+ if (arg_kill_signal > 0) {
/* Try to kill the init system on SIGINT or SIGTERM */
sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid));
sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid));