X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fnspawn%2Fnspawn.c;h=1d7511e2ab690aa4d5c5c5090bab1a3c131e6a97;hp=31e8b015df2d1895b7974f7d1d53873dc2735773;hb=db7feb7e9c436ec3ad3b90cf21bd43d8036aad0d;hpb=9eb977db5b89b44f254ab40c1876a76b7d7ea2d0 diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 31e8b015d..1d7511e2a 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -52,6 +52,14 @@ #include "strv.h" #include "path-util.h" #include "loopback-setup.h" +#include "sd-id128.h" + +typedef enum LinkJournal { + LINK_NO, + LINK_AUTO, + LINK_HOST, + LINK_GUEST +} LinkJournal; static char *arg_directory = NULL; static char *arg_user = NULL; @@ -60,19 +68,46 @@ static char *arg_uuid = NULL; static bool arg_private_network = false; static bool arg_read_only = false; static bool arg_boot = false; +static LinkJournal arg_link_journal = LINK_AUTO; +static uint64_t arg_retain = + (1ULL << CAP_CHOWN) | + (1ULL << CAP_DAC_OVERRIDE) | + (1ULL << CAP_DAC_READ_SEARCH) | + (1ULL << CAP_FOWNER) | + (1ULL << CAP_FSETID) | + (1ULL << CAP_IPC_OWNER) | + (1ULL << CAP_KILL) | + (1ULL << CAP_LEASE) | + (1ULL << CAP_LINUX_IMMUTABLE) | + (1ULL << CAP_NET_BIND_SERVICE) | + (1ULL << CAP_NET_BROADCAST) | + (1ULL << CAP_NET_RAW) | + (1ULL << CAP_SETGID) | + (1ULL << CAP_SETFCAP) | + (1ULL << CAP_SETPCAP) | + (1ULL << CAP_SETUID) | + (1ULL << CAP_SYS_ADMIN) | + (1ULL << CAP_SYS_CHROOT) | + (1ULL << CAP_SYS_NICE) | + (1ULL << CAP_SYS_PTRACE) | + (1ULL << CAP_SYS_TTY_CONFIG) | + (1ULL << CAP_SYS_RESOURCE); static int help(void) { printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" "Spawn a minimal namespace container for debugging, testing and building.\n\n" - " -h --help Show this help\n" - " -D --directory=NAME Root directory for the container\n" - " -b --boot Boot up full system (i.e. invoke init)\n" - " -u --user=USER Run the command under specified user or uid\n" - " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n" - " --uuid=UUID Set a specific machine UUID for the container\n" - " --private-network Disable network in container\n" - " --read-only Mount the root directory read-only\n", + " -h --help Show this help\n" + " -D --directory=NAME Root directory for the container\n" + " -b --boot Boot up full system (i.e. invoke init)\n" + " -u --user=USER Run the command under specified user or uid\n" + " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n" + " --uuid=UUID Set a specific machine UUID for the container\n" + " --private-network Disable network in container\n" + " --read-only Mount the root directory read-only\n" + " --capability=CAP In addition to the default, retain specified capability\n" + " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n" + " -j Equivalent to --link-journal=host\n", program_invocation_short_name); return 0; @@ -83,7 +118,9 @@ static int parse_argv(int argc, char *argv[]) { enum { ARG_PRIVATE_NETWORK = 0x100, ARG_UUID, - ARG_READ_ONLY + ARG_READ_ONLY, + ARG_CAPABILITY, + ARG_LINK_JOURNAL }; static const struct option options[] = { @@ -95,6 +132,8 @@ static int parse_argv(int argc, char *argv[]) { { "boot", no_argument, NULL, 'b' }, { "uuid", required_argument, NULL, ARG_UUID }, { "read-only", no_argument, NULL, ARG_READ_ONLY }, + { "capability", required_argument, NULL, ARG_CAPABILITY }, + { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL }, { NULL, 0, NULL, 0 } }; @@ -103,7 +142,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "+hD:u:C:b", options, NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) { switch (c) { @@ -157,6 +196,53 @@ static int parse_argv(int argc, char *argv[]) { arg_read_only = true; break; + case ARG_CAPABILITY: { + char *state, *word; + size_t length; + + FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) { + cap_value_t cap; + char *t; + + t = strndup(word, length); + if (!t) { + log_error("Out of memory."); + return -ENOMEM; + } + + if (cap_from_name(t, &cap) < 0) { + log_error("Failed to parse capability %s.", t); + free(t); + return -EINVAL; + } + + free(t); + arg_retain |= 1ULL << (uint64_t) cap; + } + + break; + } + + case 'j': + arg_link_journal = LINK_GUEST; + break; + + case ARG_LINK_JOURNAL: + if (streq(optarg, "auto")) + arg_link_journal = LINK_AUTO; + else if (streq(optarg, "no")) + arg_link_journal = LINK_NO; + else if (streq(optarg, "guest")) + arg_link_journal = LINK_GUEST; + else if (streq(optarg, "host")) + arg_link_journal = LINK_HOST; + else { + log_error("Failed to parse link journal mode %s", optarg); + return -EINVAL; + } + + break; + case '?': return -EINVAL; @@ -222,7 +308,7 @@ static int mount_all(const char *dest) { continue; } - mkdir_p(where, 0755); + mkdir_p_label(where, 0755); if (mount(mount_table[k].what, where, @@ -543,50 +629,157 @@ static int setup_hostname(void) { return r; } -static int drop_capabilities(void) { - static const unsigned long retain[] = { - CAP_CHOWN, - CAP_DAC_OVERRIDE, - CAP_DAC_READ_SEARCH, - CAP_FOWNER, - CAP_FSETID, - CAP_IPC_OWNER, - CAP_KILL, - CAP_LEASE, - CAP_LINUX_IMMUTABLE, - CAP_NET_BIND_SERVICE, - CAP_NET_BROADCAST, - CAP_NET_RAW, - CAP_SETGID, - CAP_SETFCAP, - CAP_SETPCAP, - CAP_SETUID, - CAP_SYS_ADMIN, - CAP_SYS_CHROOT, - CAP_SYS_NICE, - CAP_SYS_PTRACE, - CAP_SYS_TTY_CONFIG - }; +static int setup_journal(const char *directory) { + sd_id128_t machine_id; + char *p = NULL, *b = NULL, *l, *q = NULL, *d = NULL; + int r; + + if (arg_link_journal == LINK_NO) + return 0; + + p = strappend(directory, "/etc/machine-id"); + if (!p) { + log_error("Out of memory"); + r = -ENOMEM; + goto finish; + } - unsigned long l; + r = read_one_line_file(p, &b); + if (r == -ENOENT && arg_link_journal == LINK_AUTO) { + r = 0; + goto finish; + } else if (r < 0) { + log_error("Failed to read machine ID: %s", strerror(-r)); + return r; + } - for (l = 0; l <= cap_last_cap(); l++) { - unsigned i; + l = strstrip(b); + if (isempty(l) && arg_link_journal == LINK_AUTO) { + r = 0; + goto finish; + } - for (i = 0; i < ELEMENTSOF(retain); i++) - if (retain[i] == l) - break; + /* Verify validaty */ + r = sd_id128_from_string(l, &machine_id); + if (r < 0) { + log_error("Failed to parse machine ID: %s", strerror(-r)); + goto finish; + } - if (i < ELEMENTSOF(retain)) - continue; + free(p); + p = strappend("/var/log/journal/", l); + q = strjoin(directory, "/var/log/journal/", l, NULL); + if (!p || !q) { + log_error("Out of memory"); + r = -ENOMEM; + goto finish; + } - if (prctl(PR_CAPBSET_DROP, l) < 0) { - log_error("PR_CAPBSET_DROP failed: %m"); - return -errno; + if (path_is_mount_point(p, false) > 0 || + path_is_mount_point(q, false) > 0) { + if (arg_link_journal != LINK_AUTO) { + log_error("Journal already a mount point, refusing."); + r = -EEXIST; + goto finish; } + + r = 0; + goto finish; } - return 0; + r = readlink_and_make_absolute(p, &d); + if (r >= 0) { + if ((arg_link_journal == LINK_GUEST || + arg_link_journal == LINK_AUTO) && + path_equal(d, q)) { + + mkdir_p(q, 0755); + + r = 0; + goto finish; + } + + if (unlink(p) < 0) { + log_error("Failed to remove symlink %s: %m", p); + r = -errno; + goto finish; + } + } else if (r == -EINVAL) { + + if (arg_link_journal == LINK_GUEST && + rmdir(p) < 0) { + + if (errno == ENOTDIR) + log_error("%s already exists and is neither symlink nor directory.", p); + else { + log_error("Failed to remove %s: %m", p); + r = -errno; + } + + goto finish; + } + } else if (r != -ENOENT) { + log_error("readlink(%s) failed: %m", p); + goto finish; + } + + if (arg_link_journal == LINK_GUEST) { + + if (symlink(q, p) < 0) { + log_error("Failed to symlink %s to %s: %m", q, p); + r = -errno; + goto finish; + } + + mkdir_p(q, 0755); + + r = 0; + goto finish; + } + + if (arg_link_journal == LINK_HOST) { + r = mkdir_p(p, 0755); + if (r < 0) { + log_error("Failed to create %s: %m", p); + goto finish; + } + + } else if (access(p, F_OK) < 0) { + r = 0; + goto finish; + } + + if (dir_is_empty(q) == 0) { + log_error("%s not empty.", q); + r = -ENOTEMPTY; + goto finish; + } + + r = mkdir_p(q, 0755); + if (r < 0) { + log_error("Failed to create %s: %m", q); + goto finish; + } + + if (mount(p, q, "bind", MS_BIND, NULL) < 0) { + log_error("Failed to bind mount journal from host into guest: %m"); + r = -errno; + goto finish; + } + + r = 0; + +finish: + free(p); + free(q); + free(d); + free(b); + return r; + +} + +static int drop_capabilities(void) { + return capability_bounding_set_drop(~arg_retain, false); } static int is_os_tree(const char *path) { @@ -615,13 +808,15 @@ static int process_pty(int master, sigset_t *mask) { fd_nonblock(STDOUT_FILENO, 1); fd_nonblock(master, 1); - if ((signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC)) < 0) { + signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC); + if (signal_fd < 0) { log_error("signalfd(): %m"); r = -errno; goto finish; } - if ((ep = epoll_create1(EPOLL_CLOEXEC)) < 0) { + ep = epoll_create1(EPOLL_CLOEXEC); + if (ep < 0) { log_error("Failed to create epoll: %m"); r = -errno; goto finish; @@ -657,7 +852,8 @@ static int process_pty(int master, sigset_t *mask) { ssize_t k; int i, nfds; - if ((nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1)) < 0) { + nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1); + if (nfds < 0) { if (errno == EINTR || errno == EAGAIN) continue; @@ -692,7 +888,8 @@ static int process_pty(int master, sigset_t *mask) { struct signalfd_siginfo sfsi; ssize_t n; - if ((n = read(signal_fd, &sfsi, sizeof(sfsi))) != sizeof(sfsi)) { + n = read(signal_fd, &sfsi, sizeof(sfsi)); + if (n != sizeof(sfsi)) { if (n >= 0) { log_error("Failed to read from signalfd: invalid block size"); @@ -728,7 +925,8 @@ static int process_pty(int master, sigset_t *mask) { if (stdin_readable && in_buffer_full < LINE_MAX) { - if ((k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full)) < 0) { + k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full); + if (k < 0) { if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) stdin_readable = false; @@ -743,7 +941,8 @@ static int process_pty(int master, sigset_t *mask) { if (master_writable && in_buffer_full > 0) { - if ((k = write(master, in_buffer, in_buffer_full)) < 0) { + k = write(master, in_buffer, in_buffer_full); + if (k < 0) { if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) master_writable = false; @@ -762,7 +961,8 @@ static int process_pty(int master, sigset_t *mask) { if (master_readable && out_buffer_full < LINE_MAX) { - if ((k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full)) < 0) { + k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full); + if (k < 0) { if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) master_readable = false; @@ -777,7 +977,8 @@ static int process_pty(int master, sigset_t *mask) { if (stdout_writable && out_buffer_full > 0) { - if ((k = write(STDOUT_FILENO, out_buffer, out_buffer_full)) < 0) { + k = write(STDOUT_FILENO, out_buffer, out_buffer_full); + if (k < 0) { if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO) stdout_writable = false; @@ -822,7 +1023,8 @@ int main(int argc, char *argv[]) { log_parse_environment(); log_open(); - if ((r = parse_argv(argc, argv)) <= 0) + r = parse_argv(argc, argv); + if (r <= 0) goto finish; if (arg_directory) { @@ -861,7 +1063,8 @@ int main(int argc, char *argv[]) { goto finish; } - if ((k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg)) < 0) { + k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg); + if (k < 0) { log_error("Failed to determine current cgroup: %s", strerror(-k)); goto finish; } @@ -877,18 +1080,20 @@ int main(int argc, char *argv[]) { goto finish; } - STRV_FOREACH(controller,arg_controllers) { + STRV_FOREACH(controller, arg_controllers) { k = cg_create_and_attach(*controller, newcg, 0); if (k < 0) log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k)); } - if ((master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY)) < 0) { + master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY); + if (master < 0) { log_error("Failed to acquire pseudo tty: %m"); goto finish; } - if (!(console = ptsname(master))) { + console = ptsname(master); + if (!console) { log_error("Failed to determine tty name: %m"); goto finish; } @@ -970,15 +1175,26 @@ int main(int argc, char *argv[]) { assert_se(sigemptyset(&mask) == 0); assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); - if (setsid() < 0) + if (open_terminal(console, O_RDWR) != STDIN_FILENO || + dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || + dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) goto child_fail; - if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) + if (setsid() < 0) { + log_error("setsid() failed: %m"); goto child_fail; + } + + if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { + log_error("PR_SET_PDEATHSIG failed: %m"); + goto child_fail; + } /* Mark / as private, in case somebody marked it shared */ - if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) { + log_error("MS_PRIVATE|MS_REC failed: %m"); goto child_fail; + } /* Turn directory into bind mount */ if (mount(arg_directory, arg_directory, "bind", MS_BIND, NULL) < 0) { @@ -1012,16 +1228,14 @@ int main(int argc, char *argv[]) { if (setup_resolv_conf(arg_directory) < 0) goto child_fail; + if (setup_journal(arg_directory) < 0) + goto child_fail; + if (chdir(arg_directory) < 0) { log_error("chdir(%s) failed: %m", arg_directory); goto child_fail; } - if (open_terminal("dev/console", O_RDWR) != STDIN_FILENO || - dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || - dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) - goto child_fail; - if (mount(arg_directory, "/", "bind", MS_MOVE, NULL) < 0) { log_error("mount(MS_BIND) failed: %m"); goto child_fail; @@ -1041,23 +1255,25 @@ int main(int argc, char *argv[]) { loopback_setup(); - if (drop_capabilities() < 0) + if (drop_capabilities() < 0) { + log_error("drop_capabilities() failed: %m"); goto child_fail; + } if (arg_user) { - if (get_user_creds((const char**)&arg_user, &uid, &gid, &home) < 0) { + if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) { log_error("get_user_creds() failed: %m"); goto child_fail; } - if (mkdir_parents(home, 0775) < 0) { - log_error("mkdir_parents() failed: %m"); + if (mkdir_parents_label(home, 0775) < 0) { + log_error("mkdir_parents_label() failed: %m"); goto child_fail; } - if (safe_mkdir(home, 0775, uid, gid) < 0) { - log_error("safe_mkdir() failed: %m"); + if (mkdir_safe_label(home, 0775, uid, gid) < 0) { + log_error("mkdir_safe_label() failed: %m"); goto child_fail; }