X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=blobdiff_plain;f=src%2Fnspawn%2Fnspawn.c;h=7f084ef2d01274a4ddefca9fa6351a9fc957e95a;hp=1d7511e2ab690aa4d5c5c5090bab1a3c131e6a97;hb=d87be9b0af81a6e07d4fb3028e45c4409100dc26;hpb=db7feb7e9c436ec3ad3b90cf21bd43d8036aad0d diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 1d7511e2a..7f084ef2d 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -53,6 +53,7 @@ #include "path-util.h" #include "loopback-setup.h" #include "sd-id128.h" +#include "dev-setup.h" typedef enum LinkJournal { LINK_NO, @@ -91,7 +92,8 @@ static uint64_t arg_retain = (1ULL << CAP_SYS_NICE) | (1ULL << CAP_SYS_PTRACE) | (1ULL << CAP_SYS_TTY_CONFIG) | - (1ULL << CAP_SYS_RESOURCE); + (1ULL << CAP_SYS_RESOURCE) | + (1ULL << CAP_SYS_BOOT); static int help(void) { @@ -205,10 +207,8 @@ static int parse_argv(int argc, char *argv[]) { char *t; t = strndup(word, length); - if (!t) { - log_error("Out of memory."); - return -ENOMEM; - } + if (!t) + return log_oom(); if (cap_from_name(t, &cap) < 0) { log_error("Failed to parse capability %s.", t); @@ -268,16 +268,15 @@ static int mount_all(const char *dest) { static const MountPoint mount_table[] = { { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true }, - { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ - { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ - { "/sys", "/sys", "bind", NULL, MS_BIND, true }, /* Bind mount first */ - { "/sys", "/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ + { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */ + { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */ + { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true }, { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true }, - { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND, true }, + { "/dev/pts", "/dev/pts", NULL, NULL, MS_BIND, true }, { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true }, #ifdef HAVE_SELINUX - { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND, false }, /* Bind mount first */ - { "/sys/fs/selinux", "/sys/fs/selinux", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */ + { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */ + { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */ #endif }; @@ -289,7 +288,7 @@ static int mount_all(const char *dest) { int t; if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) { - log_error("Out of memory"); + log_oom(); if (r == 0) r = -ENOMEM; @@ -297,7 +296,7 @@ static int mount_all(const char *dest) { break; } - t = path_is_mount_point(where, false); + t = path_is_mount_point(where, true); if (t < 0) { log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); free(where); @@ -308,6 +307,10 @@ static int mount_all(const char *dest) { continue; } + /* Skip this entry if it is not a remount. */ + if (mount_table[k].what && t > 0) + continue; + mkdir_p_label(where, 0755); if (mount(mount_table[k].what, @@ -335,20 +338,18 @@ static int setup_timezone(const char *dest) { assert(dest); /* Fix the timezone, if possible */ - if (asprintf(&where, "%s/etc/localtime", dest) < 0) { - log_error("Out of memory"); - return -ENOMEM; - } + where = strappend(dest, "/etc/localtime"); + if (!where) + return log_oom(); if (mount("/etc/localtime", where, "bind", MS_BIND, NULL) >= 0) mount("/etc/localtime", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); free(where); - if (asprintf(&where, "%s/etc/timezone", dest) < 0) { - log_error("Out of memory"); - return -ENOMEM; - } + where = strappend(dest, "/etc/timezone"); + if (!where) + return log_oom(); if (mount("/etc/timezone", where, "bind", MS_BIND, NULL) >= 0) mount("/etc/timezone", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); @@ -367,10 +368,9 @@ static int setup_resolv_conf(const char *dest) { return 0; /* Fix resolv.conf, if possible */ - if (asprintf(&where, "%s/etc/resolv.conf", dest) < 0) { - log_error("Out of memory"); - return -ENOMEM; - } + where = strappend(dest, "/etc/resolv.conf"); + if (!where) + return log_oom(); if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0) mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); @@ -380,6 +380,61 @@ static int setup_resolv_conf(const char *dest) { return 0; } +static int setup_boot_id(const char *dest) { + char *from = NULL, *to = NULL; + sd_id128_t rnd; + char as_uuid[37]; + int r; + + assert(dest); + + /* Generate a new randomized boot ID, so that each boot-up of + * the container gets a new one */ + + from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id"); + if (!from) { + r = log_oom(); + goto finish; + } + + to = strappend(dest, "/proc/sys/kernel/random/boot_id"); + if (!to) { + r = log_oom(); + goto finish; + } + + r = sd_id128_randomize(&rnd); + if (r < 0) { + log_error("Failed to generate random boot id: %s", strerror(-r)); + goto finish; + } + + snprintf(as_uuid, sizeof(as_uuid), + "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", + SD_ID128_FORMAT_VAL(rnd)); + char_array_0(as_uuid); + + r = write_one_line_file(from, as_uuid); + if (r < 0) { + log_error("Failed to write boot id: %s", strerror(-r)); + goto finish; + } + + if (mount(from, to, "bind", MS_BIND, NULL) < 0) { + log_error("Failed to bind mount boot id: %m"); + r = -errno; + } else + mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL); + + unlink(from); + +finish: + free(from); + free(to); + + return r; +} + static int copy_devnodes(const char *dest) { static const char devnodes[] = @@ -389,8 +444,7 @@ static int copy_devnodes(const char *dest) { "random\0" "urandom\0" "tty\0" - "ptmx\0" - "rtc0\0"; + "ptmx\0"; const char *d; int r = 0; @@ -480,8 +534,7 @@ static int setup_dev_console(const char *dest, const char *console) { } if (asprintf(&to, "%s/dev/console", dest) < 0) { - log_error("Out of memory"); - r = -ENOMEM; + r = log_oom(); goto finish; } @@ -535,14 +588,12 @@ static int setup_kmsg(const char *dest, int kmsg_socket) { * avoid any problems with containers deadlocking due to this * we simply make /dev/kmsg unavailable to the container. */ if (asprintf(&from, "%s/dev/kmsg", dest) < 0) { - log_error("Out of memory"); - r = -ENOMEM; + r = log_oom(); goto finish; } if (asprintf(&to, "%s/proc/kmsg", dest) < 0) { - log_error("Out of memory"); - r = -ENOMEM; + r = log_oom(); goto finish; } @@ -639,8 +690,7 @@ static int setup_journal(const char *directory) { p = strappend(directory, "/etc/machine-id"); if (!p) { - log_error("Out of memory"); - r = -ENOMEM; + r = log_oom(); goto finish; } @@ -670,8 +720,7 @@ static int setup_journal(const char *directory) { p = strappend("/var/log/journal/", l); q = strjoin(directory, "/var/log/journal/", l, NULL); if (!p || !q) { - log_error("Out of memory"); - r = -ENOMEM; + r = log_oom(); goto finish; } @@ -1119,11 +1168,6 @@ int main(int argc, char *argv[]) { cfmakeraw(&raw_attr); raw_attr.c_lflag &= ~ECHO; - if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) { - log_error("Failed to set terminal attributes: %m"); - goto finish; - } - if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) { log_error("Failed to create kmsg socket pair"); goto finish; @@ -1133,225 +1177,271 @@ int main(int argc, char *argv[]) { sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1); assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0); - pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL); - if (pid < 0) { - if (errno == EINVAL) - log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m"); - else - log_error("clone() failed: %m"); + for (;;) { + siginfo_t status; - goto finish; - } + if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) { + log_error("Failed to set terminal attributes: %m"); + goto finish; + } - if (pid == 0) { - /* child */ - - const char *home = NULL; - uid_t uid = (uid_t) -1; - gid_t gid = (gid_t) -1; - const char *envp[] = { - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ - NULL, /* TERM */ - NULL, /* HOME */ - NULL, /* USER */ - NULL, /* LOGNAME */ - NULL, /* container_uuid */ - NULL - }; - - envp[2] = strv_find_prefix(environ, "TERM="); + pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL); + if (pid < 0) { + if (errno == EINVAL) + log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m"); + else + log_error("clone() failed: %m"); - close_nointr_nofail(master); + goto finish; + } - close_nointr(STDIN_FILENO); - close_nointr(STDOUT_FILENO); - close_nointr(STDERR_FILENO); + if (pid == 0) { + /* child */ - close_all_fds(&kmsg_socket_pair[1], 1); + const char *home = NULL; + uid_t uid = (uid_t) -1; + gid_t gid = (gid_t) -1; + const char *envp[] = { + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */ + NULL, /* TERM */ + NULL, /* HOME */ + NULL, /* USER */ + NULL, /* LOGNAME */ + NULL, /* container_uuid */ + NULL + }; - reset_all_signal_handlers(); + envp[2] = strv_find_prefix(environ, "TERM="); - assert_se(sigemptyset(&mask) == 0); - assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); + close_nointr_nofail(master); - if (open_terminal(console, O_RDWR) != STDIN_FILENO || - dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || - dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) - goto child_fail; + close_nointr(STDIN_FILENO); + close_nointr(STDOUT_FILENO); + close_nointr(STDERR_FILENO); - if (setsid() < 0) { - log_error("setsid() failed: %m"); - goto child_fail; - } + close_all_fds(&kmsg_socket_pair[1], 1); - if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { - log_error("PR_SET_PDEATHSIG failed: %m"); - goto child_fail; - } + reset_all_signal_handlers(); - /* Mark / as private, in case somebody marked it shared */ - if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) { - log_error("MS_PRIVATE|MS_REC failed: %m"); - goto child_fail; - } + assert_se(sigemptyset(&mask) == 0); + assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0); - /* Turn directory into bind mount */ - if (mount(arg_directory, arg_directory, "bind", MS_BIND, NULL) < 0) { - log_error("Failed to make bind mount."); - goto child_fail; - } + if (open_terminal(console, O_RDWR) != STDIN_FILENO || + dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO || + dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) + goto child_fail; - if (arg_read_only) - if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { - log_error("Failed to make read-only."); + if (setsid() < 0) { + log_error("setsid() failed: %m"); goto child_fail; } - if (mount_all(arg_directory) < 0) - goto child_fail; - - if (copy_devnodes(arg_directory) < 0) - goto child_fail; - - if (setup_dev_console(arg_directory, console) < 0) - goto child_fail; - - if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0) - goto child_fail; + if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) { + log_error("PR_SET_PDEATHSIG failed: %m"); + goto child_fail; + } - close_nointr_nofail(kmsg_socket_pair[1]); + /* Mark everything as slave, so that we still + * receive mounts from the real root, but don't + * propagate mounts to the real root. */ + if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + log_error("MS_SLAVE|MS_REC failed: %m"); + goto child_fail; + } - if (setup_timezone(arg_directory) < 0) - goto child_fail; + /* Turn directory into bind mount */ + if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) { + log_error("Failed to make bind mount."); + goto child_fail; + } - if (setup_resolv_conf(arg_directory) < 0) - goto child_fail; + if (arg_read_only) + if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) { + log_error("Failed to make read-only."); + goto child_fail; + } - if (setup_journal(arg_directory) < 0) - goto child_fail; + if (mount_all(arg_directory) < 0) + goto child_fail; - if (chdir(arg_directory) < 0) { - log_error("chdir(%s) failed: %m", arg_directory); - goto child_fail; - } + if (copy_devnodes(arg_directory) < 0) + goto child_fail; - if (mount(arg_directory, "/", "bind", MS_MOVE, NULL) < 0) { - log_error("mount(MS_BIND) failed: %m"); - goto child_fail; - } + dev_setup(arg_directory); - if (chroot(".") < 0) { - log_error("chroot() failed: %m"); - goto child_fail; - } + if (setup_dev_console(arg_directory, console) < 0) + goto child_fail; - if (chdir("/") < 0) { - log_error("chdir() failed: %m"); - goto child_fail; - } + if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0) + goto child_fail; - umask(0022); + close_nointr_nofail(kmsg_socket_pair[1]); - loopback_setup(); + if (setup_boot_id(arg_directory) < 0) + goto child_fail; - if (drop_capabilities() < 0) { - log_error("drop_capabilities() failed: %m"); - goto child_fail; - } + if (setup_timezone(arg_directory) < 0) + goto child_fail; - if (arg_user) { + if (setup_resolv_conf(arg_directory) < 0) + goto child_fail; - if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) { - log_error("get_user_creds() failed: %m"); + if (setup_journal(arg_directory) < 0) goto child_fail; - } - if (mkdir_parents_label(home, 0775) < 0) { - log_error("mkdir_parents_label() failed: %m"); + if (chdir(arg_directory) < 0) { + log_error("chdir(%s) failed: %m", arg_directory); goto child_fail; } - if (mkdir_safe_label(home, 0775, uid, gid) < 0) { - log_error("mkdir_safe_label() failed: %m"); + if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) { + log_error("mount(MS_MOVE) failed: %m"); goto child_fail; } - if (initgroups((const char*)arg_user, gid) < 0) { - log_error("initgroups() failed: %m"); + if (chroot(".") < 0) { + log_error("chroot() failed: %m"); goto child_fail; } - if (setresgid(gid, gid, gid) < 0) { - log_error("setregid() failed: %m"); + if (chdir("/") < 0) { + log_error("chdir() failed: %m"); goto child_fail; } - if (setresuid(uid, uid, uid) < 0) { - log_error("setreuid() failed: %m"); + umask(0022); + + loopback_setup(); + + if (drop_capabilities() < 0) { + log_error("drop_capabilities() failed: %m"); goto child_fail; } - } - if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) || - (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) || - (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) { - log_error("Out of memory"); - goto child_fail; - } + if (arg_user) { + + if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) { + log_error("get_user_creds() failed: %m"); + goto child_fail; + } + + if (mkdir_parents_label(home, 0775) < 0) { + log_error("mkdir_parents_label() failed: %m"); + goto child_fail; + } + + if (mkdir_safe_label(home, 0775, uid, gid) < 0) { + log_error("mkdir_safe_label() failed: %m"); + goto child_fail; + } + + if (initgroups((const char*)arg_user, gid) < 0) { + log_error("initgroups() failed: %m"); + goto child_fail; + } - if (arg_uuid) { - if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) { - log_error("Out of memory"); + if (setresgid(gid, gid, gid) < 0) { + log_error("setregid() failed: %m"); + goto child_fail; + } + + if (setresuid(uid, uid, uid) < 0) { + log_error("setreuid() failed: %m"); + goto child_fail; + } + } + + if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) || + (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) || + (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) { + log_oom(); goto child_fail; } - } - setup_hostname(); + if (arg_uuid) { + if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) { + log_oom(); + goto child_fail; + } + } + + setup_hostname(); + + if (arg_boot) { + char **a; + size_t l; - if (arg_boot) { - char **a; - size_t l; + /* Automatically search for the init system */ - /* Automatically search for the init system */ + l = 1 + argc - optind; + a = newa(char*, l + 1); + memcpy(a + 1, argv + optind, l * sizeof(char*)); - l = 1 + argc - optind; - a = newa(char*, l + 1); - memcpy(a + 1, argv + optind, l * sizeof(char*)); + a[0] = (char*) "/usr/lib/systemd/systemd"; + execve(a[0], a, (char**) envp); - a[0] = (char*) "/usr/lib/systemd/systemd"; - execve(a[0], a, (char**) envp); + a[0] = (char*) "/lib/systemd/systemd"; + execve(a[0], a, (char**) envp); + + a[0] = (char*) "/sbin/init"; + execve(a[0], a, (char**) envp); + } else if (argc > optind) + execvpe(argv[optind], argv + optind, (char**) envp); + else { + chdir(home ? home : "/root"); + execle("/bin/bash", "-bash", NULL, (char**) envp); + } - a[0] = (char*) "/lib/systemd/systemd"; - execve(a[0], a, (char**) envp); + log_error("execv() failed: %m"); - a[0] = (char*) "/sbin/init"; - execve(a[0], a, (char**) envp); - } else if (argc > optind) - execvpe(argv[optind], argv + optind, (char**) envp); - else { - chdir(home ? home : "/root"); - execle("/bin/bash", "-bash", NULL, (char**) envp); + child_fail: + _exit(EXIT_FAILURE); } - log_error("execv() failed: %m"); + if (process_pty(master, &mask) < 0) + goto finish; - child_fail: - _exit(EXIT_FAILURE); - } - if (process_pty(master, &mask) < 0) - goto finish; + if (saved_attr_valid) + tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); - if (saved_attr_valid) { - tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr); - saved_attr_valid = false; - } + r = wait_for_terminate(pid, &status); + if (r < 0) { + r = EXIT_FAILURE; + break; + } - r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid); + if (status.si_code == CLD_EXITED) { + if (status.si_status != 0) { + log_error("Container failed with error code %i.", status.si_status); + r = status.si_status; + break; + } - if (r < 0) - r = EXIT_FAILURE; + log_debug("Container exited successfully."); + break; + } else if (status.si_code == CLD_KILLED && + status.si_status == SIGINT) { + log_info("Container has been shut down."); + r = 0; + break; + } else if (status.si_code == CLD_KILLED && + status.si_status == SIGHUP) { + log_info("Container is being rebooted."); + continue; + } else if (status.si_code == CLD_KILLED || + status.si_code == CLD_DUMPED) { + + log_error("Container terminated by signal %s.", signal_to_string(status.si_status)); + r = EXIT_FAILURE; + break; + } else { + log_error("Container failed due to unknown reason."); + r = EXIT_FAILURE; + break; + } + } finish: if (saved_attr_valid)