chiark / gitweb /
virt: swap order of cpuid and dmi again, but properly detect oracle (#5355)
[elogind.git] / src / basic / virt.c
1 /***
2   This file is part of systemd.
3
4   Copyright 2011 Lennart Poettering
5
6   systemd is free software; you can redistribute it and/or modify it
7   under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or
9   (at your option) any later version.
10
11   systemd is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "dirent-util.h"
28 #include "fd-util.h"
29 #include "fileio.h"
30 #include "macro.h"
31 #include "process-util.h"
32 #include "stat-util.h"
33 #include "string-table.h"
34 #include "string-util.h"
35 #include "virt.h"
36
37 #if 0 /// UNNEEDED by elogind
38 static int detect_vm_cpuid(void) {
39
40         /* CPUID is an x86 specific interface. */
41 #if defined(__i386__) || defined(__x86_64__)
42
43         static const struct {
44                 const char *cpuid;
45                 int id;
46         } cpuid_vendor_table[] = {
47                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
48                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
49                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
50                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
51                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
52                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
53                 /* https://wiki.freebsd.org/bhyve */
54                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
55         };
56
57         uint32_t eax, ecx;
58         bool hypervisor;
59
60         /* http://lwn.net/Articles/301888/ */
61
62 #if defined (__i386__)
63 #define REG_a "eax"
64 #define REG_b "ebx"
65 #elif defined (__amd64__)
66 #define REG_a "rax"
67 #define REG_b "rbx"
68 #endif
69
70         /* First detect whether there is a hypervisor */
71         eax = 1;
72         __asm__ __volatile__ (
73                 /* ebx/rbx is being used for PIC! */
74                 "  push %%"REG_b"         \n\t"
75                 "  cpuid                  \n\t"
76                 "  pop %%"REG_b"          \n\t"
77
78                 : "=a" (eax), "=c" (ecx)
79                 : "0" (eax)
80         );
81
82         hypervisor = !!(ecx & 0x80000000U);
83
84         if (hypervisor) {
85                 union {
86                         uint32_t sig32[3];
87                         char text[13];
88                 } sig = {};
89                 unsigned j;
90
91                 /* There is a hypervisor, see what it is */
92                 eax = 0x40000000U;
93                 __asm__ __volatile__ (
94                         /* ebx/rbx is being used for PIC! */
95                         "  push %%"REG_b"         \n\t"
96                         "  cpuid                  \n\t"
97                         "  mov %%ebx, %1          \n\t"
98                         "  pop %%"REG_b"          \n\t"
99
100                         : "=a" (eax), "=r" (sig.sig32[0]), "=c" (sig.sig32[1]), "=d" (sig.sig32[2])
101                         : "0" (eax)
102                 );
103
104                 log_debug("Virtualization found, CPUID=%s", sig.text);
105
106                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
107                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
108                                 return cpuid_vendor_table[j].id;
109
110                 return VIRTUALIZATION_VM_OTHER;
111         }
112 #endif
113         log_debug("No virtualization found in CPUID");
114
115         return VIRTUALIZATION_NONE;
116 }
117
118 static int detect_vm_device_tree(void) {
119 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
120         _cleanup_free_ char *hvtype = NULL;
121         int r;
122
123         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
124         if (r == -ENOENT) {
125                 _cleanup_closedir_ DIR *dir = NULL;
126                 struct dirent *dent;
127
128                 dir = opendir("/proc/device-tree");
129                 if (!dir) {
130                         if (errno == ENOENT) {
131                                 log_debug_errno(errno, "/proc/device-tree: %m");
132                                 return VIRTUALIZATION_NONE;
133                         }
134                         return -errno;
135                 }
136
137                 FOREACH_DIRENT(dent, dir, return -errno)
138                         if (strstr(dent->d_name, "fw-cfg")) {
139                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
140                                 return VIRTUALIZATION_QEMU;
141                         }
142
143                 log_debug("No virtualization found in /proc/device-tree/*");
144                 return VIRTUALIZATION_NONE;
145         } else if (r < 0)
146                 return r;
147
148         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
149         if (streq(hvtype, "linux,kvm"))
150                 return VIRTUALIZATION_KVM;
151         else if (strstr(hvtype, "xen"))
152                 return VIRTUALIZATION_XEN;
153         else
154                 return VIRTUALIZATION_VM_OTHER;
155 #else
156         log_debug("This platform does not support /proc/device-tree");
157         return VIRTUALIZATION_NONE;
158 #endif
159 }
160
161 static int detect_vm_dmi(void) {
162 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
163
164         static const char *const dmi_vendors[] = {
165                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
166                 "/sys/class/dmi/id/sys_vendor",
167                 "/sys/class/dmi/id/board_vendor",
168                 "/sys/class/dmi/id/bios_vendor"
169         };
170
171         static const struct {
172                 const char *vendor;
173                 int id;
174         } dmi_vendor_table[] = {
175                 { "KVM",           VIRTUALIZATION_KVM       },
176                 { "QEMU",          VIRTUALIZATION_QEMU      },
177                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
178                 { "VMware",        VIRTUALIZATION_VMWARE    },
179                 { "VMW",           VIRTUALIZATION_VMWARE    },
180                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
181                 { "Xen",           VIRTUALIZATION_XEN       },
182                 { "Bochs",         VIRTUALIZATION_BOCHS     },
183                 { "Parallels",     VIRTUALIZATION_PARALLELS },
184                 /* https://wiki.freebsd.org/bhyve */
185                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
186         };
187         unsigned i;
188         int r;
189
190         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
191                 _cleanup_free_ char *s = NULL;
192                 unsigned j;
193
194                 r = read_one_line_file(dmi_vendors[i], &s);
195                 if (r < 0) {
196                         if (r == -ENOENT)
197                                 continue;
198
199                         return r;
200                 }
201
202
203
204                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
205                         if (startswith(s, dmi_vendor_table[j].vendor)) {
206                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
207                                 return dmi_vendor_table[j].id;
208                         }
209         }
210 #endif
211
212         log_debug("No virtualization found in DMI");
213
214         return VIRTUALIZATION_NONE;
215 }
216
217 static int detect_vm_xen(void) {
218         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
219            Thats why we dont check the content of /proc/xen/capabilities here. */
220         if (access("/proc/xen/capabilities", F_OK) < 0) {
221                 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
222                 return VIRTUALIZATION_NONE;
223         }
224
225         log_debug("Virtualization XEN found (/proc/xen/capabilities exists)");
226         return  VIRTUALIZATION_XEN;
227
228 }
229
230 static bool detect_vm_xen_dom0(void) {
231         _cleanup_free_ char *domcap = NULL;
232         char *cap, *i;
233         int r;
234
235         r = read_one_line_file("/proc/xen/capabilities", &domcap);
236         if (r == -ENOENT) {
237                 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
238                 return false;
239         }
240         if (r < 0)
241                 return r;
242
243         i = domcap;
244         while ((cap = strsep(&i, ",")))
245                 if (streq(cap, "control_d"))
246                         break;
247         if (!cap) {
248                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
249                 return false;
250         }
251
252         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
253         return true;
254 }
255
256 static int detect_vm_hypervisor(void) {
257         _cleanup_free_ char *hvtype = NULL;
258         int r;
259
260         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
261         if (r == -ENOENT)
262                 return VIRTUALIZATION_NONE;
263         if (r < 0)
264                 return r;
265
266         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
267
268         if (streq(hvtype, "xen"))
269                 return VIRTUALIZATION_XEN;
270         else
271                 return VIRTUALIZATION_VM_OTHER;
272 }
273
274 static int detect_vm_uml(void) {
275         _cleanup_free_ char *cpuinfo_contents = NULL;
276         int r;
277
278         /* Detect User-Mode Linux by reading /proc/cpuinfo */
279         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
280         if (r < 0)
281                 return r;
282
283         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
284                 log_debug("UML virtualization found in /proc/cpuinfo");
285                 return VIRTUALIZATION_UML;
286         }
287
288         log_debug("No virtualization found in /proc/cpuinfo.");
289         return VIRTUALIZATION_NONE;
290 }
291
292 static int detect_vm_zvm(void) {
293
294 #if defined(__s390__)
295         _cleanup_free_ char *t = NULL;
296         int r;
297
298         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
299         if (r == -ENOENT)
300                 return VIRTUALIZATION_NONE;
301         if (r < 0)
302                 return r;
303
304         log_debug("Virtualization %s found in /proc/sysinfo", t);
305         if (streq(t, "z/VM"))
306                 return VIRTUALIZATION_ZVM;
307         else
308                 return VIRTUALIZATION_KVM;
309 #else
310         log_debug("This platform does not support /proc/sysinfo");
311         return VIRTUALIZATION_NONE;
312 #endif
313 }
314
315 /* Returns a short identifier for the various VM implementations */
316 int detect_vm(void) {
317         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
318         int r, dmi;
319
320         if (cached_found >= 0)
321                 return cached_found;
322
323         /* We have to use the correct order here:
324          *
325          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
326          * -> Second try to detect from cpuid, this will report KVM for
327          *    whatever software is used even if info in dmi is overwritten.
328          * -> Third try to detect from dmi. */
329
330         dmi = detect_vm_dmi();
331         if (dmi == VIRTUALIZATION_ORACLE)
332                 return dmi;
333
334         r = detect_vm_cpuid();
335         if (r < 0)
336                 return r;
337         if (r != VIRTUALIZATION_NONE)
338                 goto finish;
339
340         r = dmi;
341         if (r < 0)
342                 return r;
343         if (r != VIRTUALIZATION_NONE)
344                 goto finish;
345
346         /* x86 xen will most likely be detected by cpuid. If not (most likely
347          * because we're not an x86 guest), then we should try the xen capabilities
348          * file next. If that's not found, then we check for the high-level
349          * hypervisor sysfs file:
350          *
351          * https://bugs.freedesktop.org/show_bug.cgi?id=77271 */
352
353         r = detect_vm_xen();
354         if (r < 0)
355                 return r;
356         if (r != VIRTUALIZATION_NONE)
357                 goto finish;
358
359         r = detect_vm_hypervisor();
360         if (r < 0)
361                 return r;
362         if (r != VIRTUALIZATION_NONE)
363                 goto finish;
364
365         r = detect_vm_device_tree();
366         if (r < 0)
367                 return r;
368         if (r != VIRTUALIZATION_NONE)
369                 goto finish;
370
371         r = detect_vm_uml();
372         if (r < 0)
373                 return r;
374         if (r != VIRTUALIZATION_NONE)
375                 goto finish;
376
377         r = detect_vm_zvm();
378         if (r < 0)
379                 return r;
380
381 finish:
382         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
383          * In order to detect the Dom0 as not virtualization we need to
384          * double-check it */
385         if (r == VIRTUALIZATION_XEN && detect_vm_xen_dom0())
386                 r = VIRTUALIZATION_NONE;
387
388         cached_found = r;
389         log_debug("Found VM virtualization %s", virtualization_to_string(r));
390         return r;
391 }
392 #endif // 0
393
394 int detect_container(void) {
395
396         static const struct {
397                 const char *value;
398                 int id;
399         } value_table[] = {
400                 { "lxc",            VIRTUALIZATION_LXC            },
401                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
402                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
403                 { "docker",         VIRTUALIZATION_DOCKER         },
404                 { "rkt",            VIRTUALIZATION_RKT            },
405         };
406
407         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
408         _cleanup_free_ char *m = NULL;
409         const char *e = NULL;
410         unsigned j;
411         int r;
412
413         if (cached_found >= 0)
414                 return cached_found;
415
416         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
417         if (access("/proc/vz", F_OK) >= 0 &&
418             access("/proc/bc", F_OK) < 0) {
419                 r = VIRTUALIZATION_OPENVZ;
420                 goto finish;
421         }
422
423         if (getpid() == 1) {
424                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
425
426                 e = getenv("container");
427                 if (isempty(e)) {
428                         r = VIRTUALIZATION_NONE;
429                         goto finish;
430                 }
431
432                 goto translate_name;
433         }
434
435         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
436          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
437         r = read_one_line_file("/run/systemd/container", &m);
438         if (r >= 0) {
439                 e = m;
440                 goto translate_name;
441         }
442         if (r != -ENOENT)
443                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
444
445         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
446         r = getenv_for_pid(1, "container", &m);
447         if (r > 0) {
448                 e = m;
449                 goto translate_name;
450         }
451         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
452                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
453
454         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
455          * there is not 1, we know we are in a PID namespace. and hence a container. */
456         r = read_one_line_file("/proc/1/sched", &m);
457         if (r >= 0) {
458                 const char *t;
459
460                 t = strrchr(m, '(');
461                 if (!t)
462                         return -EIO;
463
464                 if (!startswith(t, "(1,")) {
465                         r = VIRTUALIZATION_CONTAINER_OTHER;
466                         goto finish;
467                 }
468         } else if (r != -ENOENT)
469                 return r;
470
471         /* If that didn't work, give up, assume no container manager. */
472         r = VIRTUALIZATION_NONE;
473         goto finish;
474
475 translate_name:
476         for (j = 0; j < ELEMENTSOF(value_table); j++)
477                 if (streq(e, value_table[j].value)) {
478                         r = value_table[j].id;
479                         goto finish;
480                 }
481
482         r = VIRTUALIZATION_CONTAINER_OTHER;
483
484 finish:
485         log_debug("Found container virtualization %s.", virtualization_to_string(r));
486         cached_found = r;
487         return r;
488 }
489
490 #if 0 /// UNNEEDED by elogind
491 int detect_virtualization(void) {
492         int r;
493
494         r = detect_container();
495         if (r == 0)
496                 r = detect_vm();
497
498         return r;
499 }
500
501 static int userns_has_mapping(const char *name) {
502         _cleanup_fclose_ FILE *f = NULL;
503         _cleanup_free_ char *buf = NULL;
504         size_t n_allocated = 0;
505         ssize_t n;
506         uint32_t a, b, c;
507         int r;
508
509         f = fopen(name, "re");
510         if (!f) {
511                 log_debug_errno(errno, "Failed to open %s: %m", name);
512                 return errno == ENOENT ? false : -errno;
513         }
514
515         n = getline(&buf, &n_allocated, f);
516         if (n < 0) {
517                 if (feof(f)) {
518                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
519                         return true;
520                 }
521
522                 return log_debug_errno(errno, "Failed to read %s: %m", name);
523         }
524
525         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
526         if (r < 3)
527                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
528
529         if (a == 0 && b == 0 && c == UINT32_MAX) {
530                 /* The kernel calls mappings_overlap() and does not allow overlaps */
531                 log_debug("%s has a full 1:1 mapping", name);
532                 return false;
533         }
534
535         /* Anything else implies that we are in a user namespace */
536         log_debug("Mapping found in %s, we're in a user namespace", name);
537         return true;
538 }
539
540 int running_in_userns(void) {
541         _cleanup_free_ char *line = NULL;
542         int r;
543
544         r = userns_has_mapping("/proc/self/uid_map");
545         if (r != 0)
546                 return r;
547
548         r = userns_has_mapping("/proc/self/gid_map");
549         if (r != 0)
550                 return r;
551
552         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
553          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
554          * also does not exist. We cannot distinguish those two cases, so assume that
555          * we're running on a stripped-down recent kernel, rather than on an old one,
556          * and if the file is not found, return false.
557          */
558         r = read_one_line_file("/proc/self/setgroups", &line);
559         if (r < 0) {
560                 log_debug_errno(r, "/proc/self/setgroups: %m");
561                 return r == -ENOENT ? false : r;
562         }
563
564         truncate_nl(line);
565         r = streq(line, "deny");
566         /* See user_namespaces(7) for a description of this "setgroups" contents. */
567         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
568         return r;
569 }
570 #endif // 0
571
572 int running_in_chroot(void) {
573         _cleanup_free_ char *self_mnt = NULL, *pid1_mnt = NULL;
574         int r;
575
576         /* Try to detect whether we are running in a chroot() environment. Specifically, check whether we have a
577          * different root directory than PID 1, even though we live in the same mount namespace as it. */
578
579 #if 0 /// elogind does not allow to ignore chroots, we are never init!
580         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
581                 return 0;
582 #endif // 0
583
584         r = files_same("/proc/1/root", "/");
585         if (r < 0)
586                 return r;
587         if (r > 0)
588                 return 0;
589
590         r = readlink_malloc("/proc/self/ns/mnt", &self_mnt);
591         if (r < 0)
592                 return r;
593
594         r = readlink_malloc("/proc/1/ns/mnt", &pid1_mnt);
595         if (r < 0)
596                 return r;
597
598         return streq(self_mnt, pid1_mnt); /* Only if we live in the same namespace! */
599 }
600
601 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
602         [VIRTUALIZATION_NONE] = "none",
603         [VIRTUALIZATION_KVM] = "kvm",
604         [VIRTUALIZATION_QEMU] = "qemu",
605         [VIRTUALIZATION_BOCHS] = "bochs",
606         [VIRTUALIZATION_XEN] = "xen",
607         [VIRTUALIZATION_UML] = "uml",
608         [VIRTUALIZATION_VMWARE] = "vmware",
609         [VIRTUALIZATION_ORACLE] = "oracle",
610         [VIRTUALIZATION_MICROSOFT] = "microsoft",
611         [VIRTUALIZATION_ZVM] = "zvm",
612         [VIRTUALIZATION_PARALLELS] = "parallels",
613         [VIRTUALIZATION_BHYVE] = "bhyve",
614         [VIRTUALIZATION_VM_OTHER] = "vm-other",
615
616         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
617         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
618         [VIRTUALIZATION_LXC] = "lxc",
619         [VIRTUALIZATION_OPENVZ] = "openvz",
620         [VIRTUALIZATION_DOCKER] = "docker",
621         [VIRTUALIZATION_RKT] = "rkt",
622         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
623 };
624
625 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);