X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?a=blobdiff_plain;f=src%2Fbasic%2Fvirt.c;h=0ce07b8fdac18d77303404c29e009a0dce7cba31;hb=f960075a059ba6e5abef587f51d772daa75034a6;hp=0c08201035b815eec3835cc98c442c6e26597925;hpb=35eec258c4523c92fe985d764198b266ebc3881a;p=elogind.git diff --git a/src/basic/virt.c b/src/basic/virt.c index 0c0820103..0ce07b8fd 100644 --- a/src/basic/virt.c +++ b/src/basic/virt.c @@ -50,6 +50,8 @@ static int detect_vm_cpuid(void) { { "VMwareVMware", VIRTUALIZATION_VMWARE }, /* http://msdn.microsoft.com/en-us/library/ff542428.aspx */ { "Microsoft Hv", VIRTUALIZATION_MICROSOFT }, + /* https://wiki.freebsd.org/bhyve */ + { "bhyve bhyve ", VIRTUALIZATION_BHYVE }, }; uint32_t eax, ecx; @@ -179,6 +181,8 @@ static int detect_vm_dmi(void) { { "Xen", VIRTUALIZATION_XEN }, { "Bochs", VIRTUALIZATION_BOCHS }, { "Parallels", VIRTUALIZATION_PARALLELS }, + /* https://wiki.freebsd.org/bhyve */ + { "BHYVE", VIRTUALIZATION_BHYVE }, }; unsigned i; int r; @@ -405,8 +409,7 @@ int detect_container(void) { if (cached_found >= 0) return cached_found; - /* /proc/vz exists in container and outside of the container, - * /proc/bc only outside of the container. */ + /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */ if (access("/proc/vz", F_OK) >= 0 && access("/proc/bc", F_OK) < 0) { r = VIRTUALIZATION_OPENVZ; @@ -414,50 +417,58 @@ int detect_container(void) { } if (getpid() == 1) { - /* If we are PID 1 we can just check our own - * environment variable */ + /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */ e = getenv("container"); if (isempty(e)) { r = VIRTUALIZATION_NONE; goto finish; } - } else { - - /* Otherwise, PID 1 dropped this information into a - * file in /run. This is better than accessing - * /proc/1/environ, since we don't need CAP_SYS_PTRACE - * for that. */ - - r = read_one_line_file("/run/systemd/container", &m); - if (r == -ENOENT) { - - /* Fallback for cases where PID 1 was not - * systemd (for example, cases where - * init=/bin/sh is used. */ - - r = getenv_for_pid(1, "container", &m); - if (r <= 0) { - - /* If that didn't work, give up, - * assume no container manager. - * - * Note: This means we still cannot - * detect containers if init=/bin/sh - * is passed but privileges dropped, - * as /proc/1/environ is only readable - * with privileges. */ - - r = VIRTUALIZATION_NONE; - goto finish; - } - } - if (r < 0) - return r; + goto translate_name; + } + + /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing + * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */ + r = read_one_line_file("/run/systemd/container", &m); + if (r >= 0) { + e = m; + goto translate_name; + } + if (r != -ENOENT) + return log_debug_errno(r, "Failed to read /run/systemd/container: %m"); + + /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */ + r = getenv_for_pid(1, "container", &m); + if (r > 0) { e = m; + goto translate_name; } + if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */ + log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m"); + + /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown + * there is not 1, we know we are in a PID namespace. and hence a container. */ + r = read_one_line_file("/proc/1/sched", &m); + if (r >= 0) { + const char *t; + + t = strrchr(m, '('); + if (!t) + return -EIO; + + if (!startswith(t, "(1,")) { + r = VIRTUALIZATION_CONTAINER_OTHER; + goto finish; + } + } else if (r != -ENOENT) + return r; + /* If that didn't work, give up, assume no container manager. */ + r = VIRTUALIZATION_NONE; + goto finish; + +translate_name: for (j = 0; j < ELEMENTSOF(value_table); j++) if (streq(e, value_table[j].value)) { r = value_table[j].id; @@ -467,7 +478,7 @@ int detect_container(void) { r = VIRTUALIZATION_CONTAINER_OTHER; finish: - log_debug("Found container virtualization %s", virtualization_to_string(r)); + log_debug("Found container virtualization %s.", virtualization_to_string(r)); cached_found = r; return r; } @@ -482,16 +493,105 @@ int detect_virtualization(void) { return r; } + +static int userns_has_mapping(const char *name) { + _cleanup_fclose_ FILE *f = NULL; + _cleanup_free_ char *buf = NULL; + size_t n_allocated = 0; + ssize_t n; + uint32_t a, b, c; + int r; + + f = fopen(name, "re"); + if (!f) { + log_debug_errno(errno, "Failed to open %s: %m", name); + return errno == ENOENT ? false : -errno; + } + + n = getline(&buf, &n_allocated, f); + if (n < 0) { + if (feof(f)) { + log_debug("%s is empty, we're in an uninitialized user namespace", name); + return true; + } + + return log_debug_errno(errno, "Failed to read %s: %m", name); + } + + r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c); + if (r < 3) + return log_debug_errno(errno, "Failed to parse %s: %m", name); + + if (a == 0 && b == 0 && c == UINT32_MAX) { + /* The kernel calls mappings_overlap() and does not allow overlaps */ + log_debug("%s has a full 1:1 mapping", name); + return false; + } + + /* Anything else implies that we are in a user namespace */ + log_debug("Mapping found in %s, we're in a user namespace", name); + return true; +} + +int running_in_userns(void) { + _cleanup_free_ char *line = NULL; + int r; + + r = userns_has_mapping("/proc/self/uid_map"); + if (r != 0) + return r; + + r = userns_has_mapping("/proc/self/gid_map"); + if (r != 0) + return r; + + /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also + * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups" + * also does not exist. We cannot distinguish those two cases, so assume that + * we're running on a stripped-down recent kernel, rather than on an old one, + * and if the file is not found, return false. + */ + r = read_one_line_file("/proc/self/setgroups", &line); + if (r < 0) { + log_debug_errno(r, "/proc/self/setgroups: %m"); + return r == -ENOENT ? false : r; + } + + truncate_nl(line); + r = streq(line, "deny"); + /* See user_namespaces(7) for a description of this "setgroups" contents. */ + log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in"); + return r; +} #endif // 0 int running_in_chroot(void) { - int ret; + _cleanup_free_ char *self_mnt = NULL, *pid1_mnt = NULL; + int r; - ret = files_same("/proc/1/root", "/"); - if (ret < 0) - return ret; + /* Try to detect whether we are running in a chroot() environment. Specifically, check whether we have a + * different root directory than PID 1, even though we live in the same mount namespace as it. */ + +#if 0 /// elogind does not allow to ignore chroots, we are never init! + if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0) + return 0; +#endif // 0 + + r = files_same("/proc/1/root", "/"); + if (r < 0) + return r; + if (r > 0) + return 0; + + r = readlink_malloc("/proc/self/ns/mnt", &self_mnt); + if (r < 0) + return r; + + r = readlink_malloc("/proc/1/ns/mnt", &pid1_mnt); + if (r < 0) + return r; - return ret == 0; + return streq(self_mnt, pid1_mnt); /* Only if we live in the same namespace! */ } static const char *const virtualization_table[_VIRTUALIZATION_MAX] = { @@ -506,6 +606,7 @@ static const char *const virtualization_table[_VIRTUALIZATION_MAX] = { [VIRTUALIZATION_MICROSOFT] = "microsoft", [VIRTUALIZATION_ZVM] = "zvm", [VIRTUALIZATION_PARALLELS] = "parallels", + [VIRTUALIZATION_BHYVE] = "bhyve", [VIRTUALIZATION_VM_OTHER] = "vm-other", [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",