chiark / gitweb /
virt: if we detect Xen by DMI, trust that over CPUID
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   This file is part of systemd.
4
5   Copyright 2011 Lennart Poettering
6
7   systemd is free software; you can redistribute it and/or modify it
8   under the terms of the GNU Lesser General Public License as published by
9   the Free Software Foundation; either version 2.1 of the License, or
10   (at your option) any later version.
11
12   systemd is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public License
18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #if defined(__i386__) || defined(__x86_64__)
22 //#include <cpuid.h>
23 #endif
24 #include <errno.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29
30 #include "alloc-util.h"
31 #include "dirent-util.h"
32 #include "env-util.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "macro.h"
36 #include "process-util.h"
37 #include "stat-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "virt.h"
41
42 #if 0 /// UNNEEDED by elogind
43 static int detect_vm_cpuid(void) {
44
45         /* CPUID is an x86 specific interface. */
46 #if defined(__i386__) || defined(__x86_64__)
47
48         static const struct {
49                 const char *cpuid;
50                 int id;
51         } cpuid_vendor_table[] = {
52                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
53                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
54                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
55                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
56                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
57                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
58                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
59                 /* https://wiki.freebsd.org/bhyve */
60                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
61                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
62         };
63
64         uint32_t eax, ebx, ecx, edx;
65         bool hypervisor;
66
67         /* http://lwn.net/Articles/301888/ */
68
69         /* First detect whether there is a hypervisor */
70         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
71                 return VIRTUALIZATION_NONE;
72
73         hypervisor = !!(ecx & 0x80000000U);
74
75         if (hypervisor) {
76                 union {
77                         uint32_t sig32[3];
78                         char text[13];
79                 } sig = {};
80                 unsigned j;
81
82                 /* There is a hypervisor, see what it is */
83                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
84
85                 sig.sig32[0] = ebx;
86                 sig.sig32[1] = ecx;
87                 sig.sig32[2] = edx;
88
89                 log_debug("Virtualization found, CPUID=%s", sig.text);
90
91                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
92                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
93                                 return cpuid_vendor_table[j].id;
94
95                 return VIRTUALIZATION_VM_OTHER;
96         }
97 #endif
98         log_debug("No virtualization found in CPUID");
99
100         return VIRTUALIZATION_NONE;
101 }
102
103 static int detect_vm_device_tree(void) {
104 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
105         _cleanup_free_ char *hvtype = NULL;
106         int r;
107
108         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
109         if (r == -ENOENT) {
110                 _cleanup_closedir_ DIR *dir = NULL;
111                 struct dirent *dent;
112
113                 dir = opendir("/proc/device-tree");
114                 if (!dir) {
115                         if (errno == ENOENT) {
116                                 log_debug_errno(errno, "/proc/device-tree: %m");
117                                 return VIRTUALIZATION_NONE;
118                         }
119                         return -errno;
120                 }
121
122                 FOREACH_DIRENT(dent, dir, return -errno)
123                         if (strstr(dent->d_name, "fw-cfg")) {
124                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
125                                 return VIRTUALIZATION_QEMU;
126                         }
127
128                 log_debug("No virtualization found in /proc/device-tree/*");
129                 return VIRTUALIZATION_NONE;
130         } else if (r < 0)
131                 return r;
132
133         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
134         if (streq(hvtype, "linux,kvm"))
135                 return VIRTUALIZATION_KVM;
136         else if (strstr(hvtype, "xen"))
137                 return VIRTUALIZATION_XEN;
138         else
139                 return VIRTUALIZATION_VM_OTHER;
140 #else
141         log_debug("This platform does not support /proc/device-tree");
142         return VIRTUALIZATION_NONE;
143 #endif
144 }
145
146 static int detect_vm_dmi(void) {
147 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
148
149         static const char *const dmi_vendors[] = {
150                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
151                 "/sys/class/dmi/id/sys_vendor",
152                 "/sys/class/dmi/id/board_vendor",
153                 "/sys/class/dmi/id/bios_vendor"
154         };
155
156         static const struct {
157                 const char *vendor;
158                 int id;
159         } dmi_vendor_table[] = {
160                 { "KVM",           VIRTUALIZATION_KVM       },
161                 { "QEMU",          VIRTUALIZATION_QEMU      },
162                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
163                 { "VMware",        VIRTUALIZATION_VMWARE    },
164                 { "VMW",           VIRTUALIZATION_VMWARE    },
165                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
166                 { "Xen",           VIRTUALIZATION_XEN       },
167                 { "Bochs",         VIRTUALIZATION_BOCHS     },
168                 { "Parallels",     VIRTUALIZATION_PARALLELS },
169                 /* https://wiki.freebsd.org/bhyve */
170                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
171         };
172         unsigned i;
173         int r;
174
175         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
176                 _cleanup_free_ char *s = NULL;
177                 unsigned j;
178
179                 r = read_one_line_file(dmi_vendors[i], &s);
180                 if (r < 0) {
181                         if (r == -ENOENT)
182                                 continue;
183
184                         return r;
185                 }
186
187                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
188                         if (startswith(s, dmi_vendor_table[j].vendor)) {
189                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
190                                 return dmi_vendor_table[j].id;
191                         }
192         }
193 #endif
194
195         log_debug("No virtualization found in DMI");
196
197         return VIRTUALIZATION_NONE;
198 }
199
200 static int detect_vm_xen(void) {
201
202         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
203            The presence of /proc/xen indicates some form of a Xen domain */
204         if (access("/proc/xen", F_OK) < 0) {
205                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
206                 return VIRTUALIZATION_NONE;
207         }
208
209         log_debug("Virtualization XEN found (/proc/xen exists)");
210         return VIRTUALIZATION_XEN;
211 }
212
213 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
214 #define PATH_FEATURES "/sys/hypervisor/properties/features"
215 /* Returns -errno, or 0 for domU, or 1 for dom0 */
216 static int detect_vm_xen_dom0(void) {
217         _cleanup_free_ char *domcap = NULL;
218         char *cap, *i;
219         int r;
220
221         r = read_one_line_file(PATH_FEATURES, &domcap);
222         if (r < 0 && r != -ENOENT)
223                 return r;
224         if (r == 0) {
225                 unsigned long features;
226
227                 /* Here, we need to use sscanf() instead of safe_atoul()
228                  * as the string lacks the leading "0x". */
229                 r = sscanf(domcap, "%lx", &features);
230                 if (r == 1) {
231                         r = !!(features & (1U << XENFEAT_dom0));
232                         log_debug("Virtualization XEN, found %s with value %08lx, "
233                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
234                                   PATH_FEATURES, features, r ? "" : " not");
235                         return r;
236                 }
237                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
238                           PATH_FEATURES, domcap);
239         }
240
241         r = read_one_line_file("/proc/xen/capabilities", &domcap);
242         if (r == -ENOENT) {
243                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
244                 return 0;
245         }
246         if (r < 0)
247                 return r;
248
249         i = domcap;
250         while ((cap = strsep(&i, ",")))
251                 if (streq(cap, "control_d"))
252                         break;
253         if (!cap) {
254                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
255                 return 0;
256         }
257
258         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
259         return 1;
260 }
261
262 static int detect_vm_hypervisor(void) {
263         _cleanup_free_ char *hvtype = NULL;
264         int r;
265
266         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
267         if (r == -ENOENT)
268                 return VIRTUALIZATION_NONE;
269         if (r < 0)
270                 return r;
271
272         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
273
274         if (streq(hvtype, "xen"))
275                 return VIRTUALIZATION_XEN;
276         else
277                 return VIRTUALIZATION_VM_OTHER;
278 }
279
280 static int detect_vm_uml(void) {
281         _cleanup_free_ char *cpuinfo_contents = NULL;
282         int r;
283
284         /* Detect User-Mode Linux by reading /proc/cpuinfo */
285         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
286         if (r == -ENOENT) {
287                 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
288                 return VIRTUALIZATION_NONE;
289         }
290         if (r < 0)
291                 return r;
292
293         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
294                 log_debug("UML virtualization found in /proc/cpuinfo");
295                 return VIRTUALIZATION_UML;
296         }
297
298         log_debug("UML virtualization not found in /proc/cpuinfo.");
299         return VIRTUALIZATION_NONE;
300 }
301
302 static int detect_vm_zvm(void) {
303
304 #if defined(__s390__)
305         _cleanup_free_ char *t = NULL;
306         int r;
307
308         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
309         if (r == -ENOENT)
310                 return VIRTUALIZATION_NONE;
311         if (r < 0)
312                 return r;
313
314         log_debug("Virtualization %s found in /proc/sysinfo", t);
315         if (streq(t, "z/VM"))
316                 return VIRTUALIZATION_ZVM;
317         else
318                 return VIRTUALIZATION_KVM;
319 #else
320         log_debug("This platform does not support /proc/sysinfo");
321         return VIRTUALIZATION_NONE;
322 #endif
323 }
324
325 /* Returns a short identifier for the various VM implementations */
326 int detect_vm(void) {
327         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
328         int r, dmi;
329         bool other = false;
330
331         if (cached_found >= 0)
332                 return cached_found;
333
334         /* We have to use the correct order here:
335          *
336          * â†’ First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
337          *   Hyper-V.
338          *
339          * â†’ Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
340          *   overwritten.
341          *
342          * â†’ Third, try to detect from DMI. */
343
344         dmi = detect_vm_dmi();
345         if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
346                 r = dmi;
347                 goto finish;
348         }
349
350         r = detect_vm_cpuid();
351         if (r < 0)
352                 return r;
353         if (r != VIRTUALIZATION_NONE) {
354                 if (r == VIRTUALIZATION_VM_OTHER)
355                         other = true;
356                 else
357                         goto finish;
358         }
359
360         r = dmi;
361         if (r < 0)
362                 return r;
363         if (r != VIRTUALIZATION_NONE) {
364                 if (r == VIRTUALIZATION_VM_OTHER)
365                         other = true;
366                 else
367                         goto finish;
368         }
369
370         /* x86 xen will most likely be detected by cpuid. If not (most likely
371          * because we're not an x86 guest), then we should try the /proc/xen
372          * directory next. If that's not found, then we check for the high-level
373          * hypervisor sysfs file.
374          */
375
376         r = detect_vm_xen();
377         if (r < 0)
378                 return r;
379         if (r != VIRTUALIZATION_NONE) {
380                 if (r == VIRTUALIZATION_VM_OTHER)
381                         other = true;
382                 else
383                         goto finish;
384         }
385
386         r = detect_vm_hypervisor();
387         if (r < 0)
388                 return r;
389         if (r != VIRTUALIZATION_NONE) {
390                 if (r == VIRTUALIZATION_VM_OTHER)
391                         other = true;
392                 else
393                         goto finish;
394         }
395
396         r = detect_vm_device_tree();
397         if (r < 0)
398                 return r;
399         if (r != VIRTUALIZATION_NONE) {
400                 if (r == VIRTUALIZATION_VM_OTHER)
401                         other = true;
402                 else
403                         goto finish;
404         }
405
406         r = detect_vm_uml();
407         if (r < 0)
408                 return r;
409         if (r != VIRTUALIZATION_NONE) {
410                 if (r == VIRTUALIZATION_VM_OTHER)
411                         other = true;
412                 else
413                         goto finish;
414         }
415
416         r = detect_vm_zvm();
417         if (r < 0)
418                 return r;
419
420 finish:
421         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
422          * In order to detect the Dom0 as not virtualization we need to
423          * double-check it */
424         if (r == VIRTUALIZATION_XEN) {
425                 int ret = detect_vm_xen_dom0();
426                 if (ret < 0)
427                         return ret;
428                 if (ret > 0)
429                         r = VIRTUALIZATION_NONE;
430         } else if (r == VIRTUALIZATION_NONE && other)
431                 r = VIRTUALIZATION_VM_OTHER;
432
433         cached_found = r;
434         log_debug("Found VM virtualization %s", virtualization_to_string(r));
435         return r;
436 }
437 #endif // 0
438
439 int detect_container(void) {
440
441         static const struct {
442                 const char *value;
443                 int id;
444         } value_table[] = {
445                 { "lxc",            VIRTUALIZATION_LXC            },
446                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
447                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
448                 { "docker",         VIRTUALIZATION_DOCKER         },
449                 { "rkt",            VIRTUALIZATION_RKT            },
450         };
451
452         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
453         _cleanup_free_ char *m = NULL;
454         const char *e = NULL;
455         unsigned j;
456         int r;
457
458         if (cached_found >= 0)
459                 return cached_found;
460
461         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
462         if (access("/proc/vz", F_OK) >= 0 &&
463             access("/proc/bc", F_OK) < 0) {
464                 r = VIRTUALIZATION_OPENVZ;
465                 goto finish;
466         }
467
468         if (getpid_cached() == 1) {
469                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
470
471                 e = getenv("container");
472                 if (isempty(e)) {
473                         r = VIRTUALIZATION_NONE;
474                         goto finish;
475                 }
476
477                 goto translate_name;
478         }
479
480         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
481          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
482         r = read_one_line_file("/run/systemd/container", &m);
483         if (r >= 0) {
484                 e = m;
485                 goto translate_name;
486         }
487         if (r != -ENOENT)
488                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
489
490         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
491         r = getenv_for_pid(1, "container", &m);
492         if (r > 0) {
493                 e = m;
494                 goto translate_name;
495         }
496         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
497                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
498
499         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
500          * there is not 1, we know we are in a PID namespace. and hence a container. */
501         r = read_one_line_file("/proc/1/sched", &m);
502         if (r >= 0) {
503                 const char *t;
504
505                 t = strrchr(m, '(');
506                 if (!t)
507                         return -EIO;
508
509                 if (!startswith(t, "(1,")) {
510                         r = VIRTUALIZATION_CONTAINER_OTHER;
511                         goto finish;
512                 }
513         } else if (r != -ENOENT)
514                 return r;
515
516         /* If that didn't work, give up, assume no container manager. */
517         r = VIRTUALIZATION_NONE;
518         goto finish;
519
520 translate_name:
521         for (j = 0; j < ELEMENTSOF(value_table); j++)
522                 if (streq(e, value_table[j].value)) {
523                         r = value_table[j].id;
524                         goto finish;
525                 }
526
527         r = VIRTUALIZATION_CONTAINER_OTHER;
528
529 finish:
530         log_debug("Found container virtualization %s.", virtualization_to_string(r));
531         cached_found = r;
532         return r;
533 }
534
535 #if 0 /// UNNEEDED by elogind
536 int detect_virtualization(void) {
537         int r;
538
539         r = detect_container();
540         if (r == 0)
541                 r = detect_vm();
542
543         return r;
544 }
545
546 static int userns_has_mapping(const char *name) {
547         _cleanup_fclose_ FILE *f = NULL;
548         _cleanup_free_ char *buf = NULL;
549         size_t n_allocated = 0;
550         ssize_t n;
551         uint32_t a, b, c;
552         int r;
553
554         f = fopen(name, "re");
555         if (!f) {
556                 log_debug_errno(errno, "Failed to open %s: %m", name);
557                 return errno == ENOENT ? false : -errno;
558         }
559
560         n = getline(&buf, &n_allocated, f);
561         if (n < 0) {
562                 if (feof(f)) {
563                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
564                         return true;
565                 }
566
567                 return log_debug_errno(errno, "Failed to read %s: %m", name);
568         }
569
570         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
571         if (r < 3)
572                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
573
574         if (a == 0 && b == 0 && c == UINT32_MAX) {
575                 /* The kernel calls mappings_overlap() and does not allow overlaps */
576                 log_debug("%s has a full 1:1 mapping", name);
577                 return false;
578         }
579
580         /* Anything else implies that we are in a user namespace */
581         log_debug("Mapping found in %s, we're in a user namespace", name);
582         return true;
583 }
584
585 int running_in_userns(void) {
586         _cleanup_free_ char *line = NULL;
587         int r;
588
589         r = userns_has_mapping("/proc/self/uid_map");
590         if (r != 0)
591                 return r;
592
593         r = userns_has_mapping("/proc/self/gid_map");
594         if (r != 0)
595                 return r;
596
597         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
598          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
599          * also does not exist. We cannot distinguish those two cases, so assume that
600          * we're running on a stripped-down recent kernel, rather than on an old one,
601          * and if the file is not found, return false.
602          */
603         r = read_one_line_file("/proc/self/setgroups", &line);
604         if (r < 0) {
605                 log_debug_errno(r, "/proc/self/setgroups: %m");
606                 return r == -ENOENT ? false : r;
607         }
608
609         truncate_nl(line);
610         r = streq(line, "deny");
611         /* See user_namespaces(7) for a description of this "setgroups" contents. */
612         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
613         return r;
614 }
615 #endif // 0
616
617 int running_in_chroot(void) {
618         int r;
619
620 #if 0 /// elogind does not allow to ignore chroots, we are never init!
621         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
622                 return 0;
623 #endif // 0
624
625         r = files_same("/proc/1/root", "/", 0);
626         if (r < 0)
627                 return r;
628
629         return r == 0;
630 }
631
632 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
633         [VIRTUALIZATION_NONE] = "none",
634         [VIRTUALIZATION_KVM] = "kvm",
635         [VIRTUALIZATION_QEMU] = "qemu",
636         [VIRTUALIZATION_BOCHS] = "bochs",
637         [VIRTUALIZATION_XEN] = "xen",
638         [VIRTUALIZATION_UML] = "uml",
639         [VIRTUALIZATION_VMWARE] = "vmware",
640         [VIRTUALIZATION_ORACLE] = "oracle",
641         [VIRTUALIZATION_MICROSOFT] = "microsoft",
642         [VIRTUALIZATION_ZVM] = "zvm",
643         [VIRTUALIZATION_PARALLELS] = "parallels",
644         [VIRTUALIZATION_BHYVE] = "bhyve",
645         [VIRTUALIZATION_QNX] = "qnx",
646         [VIRTUALIZATION_VM_OTHER] = "vm-other",
647
648         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
649         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
650         [VIRTUALIZATION_LXC] = "lxc",
651         [VIRTUALIZATION_OPENVZ] = "openvz",
652         [VIRTUALIZATION_DOCKER] = "docker",
653         [VIRTUALIZATION_RKT] = "rkt",
654         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
655 };
656
657 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);