X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fnspawn%2Fnspawn.c;h=ffd8fd4ef50a74ac22ebb8fb6c590f1abd09ea85;hp=90c8b94248b406c3baefca74fd272e2a6c609462;hb=b2896c905bef7be7ed9a760d9d61aa6ad0f614a3;hpb=4d46fec56db73e1d2d01076792f9b8f3231d5cb1 diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 90c8b9424..ffd8fd4ef 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -50,12 +50,39 @@ #include "missing.h" #include "cgroup-util.h" #include "strv.h" +#include "path-util.h" #include "loopback-setup.h" static char *arg_directory = NULL; static char *arg_user = NULL; static char **arg_controllers = NULL; +static char *arg_uuid = NULL; static bool arg_private_network = false; +static bool arg_read_only = false; +static bool arg_boot = false; +static uint64_t arg_retain = + (1ULL << CAP_CHOWN) | + (1ULL << CAP_DAC_OVERRIDE) | + (1ULL << CAP_DAC_READ_SEARCH) | + (1ULL << CAP_FOWNER) | + (1ULL << CAP_FSETID) | + (1ULL << CAP_IPC_OWNER) | + (1ULL << CAP_KILL) | + (1ULL << CAP_LEASE) | + (1ULL << CAP_LINUX_IMMUTABLE) | + (1ULL << CAP_NET_BIND_SERVICE) | + (1ULL << CAP_NET_BROADCAST) | + (1ULL << CAP_NET_RAW) | + (1ULL << CAP_SETGID) | + (1ULL << CAP_SETFCAP) | + (1ULL << CAP_SETPCAP) | + (1ULL << CAP_SETUID) | + (1ULL << CAP_SYS_ADMIN) | + (1ULL << CAP_SYS_CHROOT) | + (1ULL << CAP_SYS_NICE) | + (1ULL << CAP_SYS_PTRACE) | + (1ULL << CAP_SYS_TTY_CONFIG) | + (1ULL << CAP_SYS_RESOURCE); static int help(void) { @@ -63,9 +90,13 @@ static int help(void) { "Spawn a minimal namespace container for debugging, testing and building.\n\n" " -h --help Show this help\n" " -D --directory=NAME Root directory for the container\n" + " -b --boot Boot up full system (i.e. invoke init)\n" " -u --user=USER Run the command under specified user or uid\n" " -C --controllers=LIST Put the container in specified comma-separated cgroup hierarchies\n" - " --private-network Disable network in container\n", + " --uuid=UUID Set a specific machine UUID for the container\n" + " --private-network Disable network in container\n" + " --read-only Mount the root directory read-only\n" + " --capability=CAP In addition to the default, retain specified capability\n", program_invocation_short_name); return 0; @@ -74,7 +105,10 @@ static int help(void) { static int parse_argv(int argc, char *argv[]) { enum { - ARG_PRIVATE_NETWORK = 0x100 + ARG_PRIVATE_NETWORK = 0x100, + ARG_UUID, + ARG_READ_ONLY, + ARG_CAPABILITY }; static const struct option options[] = { @@ -83,6 +117,10 @@ static int parse_argv(int argc, char *argv[]) { { "user", required_argument, NULL, 'u' }, { "controllers", required_argument, NULL, 'C' }, { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK }, + { "boot", no_argument, NULL, 'b' }, + { "uuid", required_argument, NULL, ARG_UUID }, + { "read-only", no_argument, NULL, ARG_READ_ONLY }, + { "capability", required_argument, NULL, ARG_CAPABILITY }, { NULL, 0, NULL, 0 } }; @@ -91,7 +129,7 @@ static int parse_argv(int argc, char *argv[]) { assert(argc >= 0); assert(argv); - while ((c = getopt_long(argc, argv, "+hD:u:C:", options, NULL)) >= 0) { + while ((c = getopt_long(argc, argv, "+hD:u:C:b", options, NULL)) >= 0) { switch (c) { @@ -101,8 +139,9 @@ static int parse_argv(int argc, char *argv[]) { case 'D': free(arg_directory); - if (!(arg_directory = strdup(optarg))) { - log_error("Failed to duplicate root directory."); + arg_directory = canonicalize_file_name(optarg); + if (!arg_directory) { + log_error("Failed to canonicalize root directory."); return -ENOMEM; } @@ -132,6 +171,45 @@ static int parse_argv(int argc, char *argv[]) { arg_private_network = true; break; + case 'b': + arg_boot = true; + break; + + case ARG_UUID: + arg_uuid = optarg; + break; + + case ARG_READ_ONLY: + arg_read_only = true; + break; + + case ARG_CAPABILITY: { + char *state, *word; + size_t length; + + FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) { + cap_value_t cap; + char *t; + + t = strndup(word, length); + if (!t) { + log_error("Out of memory."); + return -ENOMEM; + } + + if (cap_from_name(t, &cap) < 0) { + log_error("Failed to parse capability %s.", t); + free(t); + return -EINVAL; + } + + free(t); + arg_retain |= 1ULL << (uint64_t) cap; + } + + break; + } + case '?': return -EINVAL; @@ -197,7 +275,7 @@ static int mount_all(const char *dest) { continue; } - mkdir_p(where, 0755); + mkdir_p_label(where, 0755); if (mount(mount_table[k].what, where, @@ -247,6 +325,28 @@ static int setup_timezone(const char *dest) { return 0; } +static int setup_resolv_conf(const char *dest) { + char *where; + + assert(dest); + + if (arg_private_network) + return 0; + + /* Fix resolv.conf, if possible */ + if (asprintf(&where, "%s/etc/resolv.conf", dest) < 0) { + log_error("Out of memory"); + return -ENOMEM; + } + + if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0) + mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + + free(where); + + return 0; +} + static int copy_devnodes(const char *dest) { static const char devnodes[] = @@ -394,6 +494,13 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { u = umask(0000); + /* We create the kmsg FIFO as /dev/kmsg, but immediately + * delete it after bind mounting it to /proc/kmsg. While FIFOs + * on the reading side behave very similar to /proc/kmsg, + * their writing side behaves differently from /dev/kmsg in + * that writing blocks when nothing is reading. In order to + * avoid any problems with containers deadlocking due to this + * we simply make /dev/kmsg unavailable to the container. */ if (asprintf(&from, "%s/dev/kmsg", dest) < 0) { log_error("Out of memory"); r = -ENOMEM; @@ -456,6 +563,9 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { goto finish; } + /* And now make the FIFO unavailable as /dev/kmsg... */ + unlink(from); + finish: free(from); free(to); @@ -464,50 +574,30 @@ finish: return r; } -static int drop_capabilities(void) { - static const unsigned long retain[] = { - CAP_CHOWN, - CAP_DAC_OVERRIDE, - CAP_DAC_READ_SEARCH, - CAP_FOWNER, - CAP_FSETID, - CAP_IPC_OWNER, - CAP_KILL, - CAP_LEASE, - CAP_LINUX_IMMUTABLE, - CAP_NET_BIND_SERVICE, - CAP_NET_BROADCAST, - CAP_NET_RAW, - CAP_SETGID, - CAP_SETFCAP, - CAP_SETPCAP, - CAP_SETUID, - CAP_SYS_ADMIN, - CAP_SYS_CHROOT, - CAP_SYS_NICE, - CAP_SYS_PTRACE, - CAP_SYS_TTY_CONFIG - }; - - unsigned long l; +static int setup_hostname(void) { + char *hn; + int r = 0; - for (l = 0; l <= cap_last_cap(); l++) { - unsigned i; + hn = path_get_file_name(arg_directory); + if (hn) { + hn = strdup(hn); + if (!hn) + return -ENOMEM; - for (i = 0; i < ELEMENTSOF(retain); i++) - if (retain[i] == l) - break; + hostname_cleanup(hn); - if (i < ELEMENTSOF(retain)) - continue; + if (!isempty(hn)) + if (sethostname(hn, strlen(hn)) < 0) + r = -errno; - if (prctl(PR_CAPBSET_DROP, l) < 0) { - log_error("PR_CAPBSET_DROP failed: %m"); - return -errno; - } + free(hn); } - return 0; + return r; +} + +static int drop_capabilities(void) { + return capability_bounding_set_drop(~arg_retain, false); } static int is_os_tree(const char *path) { @@ -862,7 +952,6 @@ int main(int argc, char *argv[]) { if (pid == 0) { /* child */ - const char *hn; const char *home = NULL; uid_t uid = (uid_t) -1; gid_t gid = (gid_t) -1; @@ -873,6 +962,7 @@ int main(int argc, char *argv[]) { NULL, /* HOME */ NULL, /* USER */ NULL, /* LOGNAME */ + NULL, /* container_uuid */ NULL }; @@ -901,6 +991,18 @@ int main(int argc, char *argv[]) { if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) goto child_fail; + /* Turn directory into bind mount */ + if (mount(arg_directory, arg_directory, "bind", MS_BIND, NULL) < 0) { + log_error("Failed to make bind mount."); + goto child_fail; + } + + if (arg_read_only) + if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { + log_error("Failed to make read-only."); + goto child_fail; + } + if (mount_all(arg_directory) < 0) goto child_fail; @@ -918,6 +1020,9 @@ int main(int argc, char *argv[]) { if (setup_timezone(arg_directory) < 0) goto child_fail; + if (setup_resolv_conf(arg_directory) < 0) + goto child_fail; + if (chdir(arg_directory) < 0) { log_error("chdir(%s) failed: %m", arg_directory); goto child_fail; @@ -928,8 +1033,8 @@ int main(int argc, char *argv[]) { dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) goto child_fail; - if (mount(arg_directory, "/", "bind", MS_BIND, NULL) < 0) { - log_error("mount(MS_MOVE) failed: %m"); + if (mount(arg_directory, "/", "bind", MS_MOVE, NULL) < 0) { + log_error("mount(MS_BIND) failed: %m"); goto child_fail; } @@ -947,8 +1052,10 @@ int main(int argc, char *argv[]) { loopback_setup(); - if (drop_capabilities() < 0) + if (drop_capabilities() < 0) { + log_error("drop_capabilities() failed: %m"); goto child_fail; + } if (arg_user) { @@ -957,13 +1064,13 @@ int main(int argc, char *argv[]) { goto child_fail; } - if (mkdir_parents(home, 0775) < 0) { - log_error("mkdir_parents() failed: %m"); + if (mkdir_parents_label(home, 0775) < 0) { + log_error("mkdir_parents_label() failed: %m"); goto child_fail; } - if (safe_mkdir(home, 0775, uid, gid) < 0) { - log_error("safe_mkdir() failed: %m"); + if (mkdir_safe_label(home, 0775, uid, gid) < 0) { + log_error("mkdir_safe_label() failed: %m"); goto child_fail; } @@ -983,17 +1090,41 @@ int main(int argc, char *argv[]) { } } - if ((asprintf((char**)(envp + 3), "HOME=%s", home? home: "/root") < 0) || - (asprintf((char**)(envp + 4), "USER=%s", arg_user? arg_user : "root") < 0) || - (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user? arg_user : "root") < 0)) { + if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) || + (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) || + (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) { log_error("Out of memory"); goto child_fail; } - if ((hn = file_name_from_path(arg_directory))) - sethostname(hn, strlen(hn)); + if (arg_uuid) { + if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) { + log_error("Out of memory"); + goto child_fail; + } + } + + setup_hostname(); + + if (arg_boot) { + char **a; + size_t l; + + /* Automatically search for the init system */ + + l = 1 + argc - optind; + a = newa(char*, l + 1); + memcpy(a + 1, argv + optind, l * sizeof(char*)); + + a[0] = (char*) "/usr/lib/systemd/systemd"; + execve(a[0], a, (char**) envp); + + a[0] = (char*) "/lib/systemd/systemd"; + execve(a[0], a, (char**) envp); - if (argc > optind) + a[0] = (char*) "/sbin/init"; + execve(a[0], a, (char**) envp); + } else if (argc > optind) execvpe(argv[optind], argv + optind, (char**) envp); else { chdir(home ? home : "/root");