chiark / gitweb /
virt: detect QNX hypervisor Detect QNX hypervisor based on the CPUID.
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   This file is part of systemd.
4
5   Copyright 2011 Lennart Poettering
6
7   systemd is free software; you can redistribute it and/or modify it
8   under the terms of the GNU Lesser General Public License as published by
9   the Free Software Foundation; either version 2.1 of the License, or
10   (at your option) any later version.
11
12   systemd is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public License
18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #if defined(__i386__) || defined(__x86_64__)
22 //#include <cpuid.h>
23 #endif
24 #include <errno.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29
30 #include "alloc-util.h"
31 #include "dirent-util.h"
32 #include "env-util.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "macro.h"
36 #include "process-util.h"
37 #include "stat-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "virt.h"
41
42 #if 0 /// UNNEEDED by elogind
43 static int detect_vm_cpuid(void) {
44
45         /* CPUID is an x86 specific interface. */
46 #if defined(__i386__) || defined(__x86_64__)
47
48         static const struct {
49                 const char *cpuid;
50                 int id;
51         } cpuid_vendor_table[] = {
52                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
53                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
54                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
55                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
56                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
57                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
58                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
59                 /* https://wiki.freebsd.org/bhyve */
60                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
61                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
62         };
63
64         uint32_t eax, ebx, ecx, edx;
65         bool hypervisor;
66
67         /* http://lwn.net/Articles/301888/ */
68
69         /* First detect whether there is a hypervisor */
70         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
71                 return VIRTUALIZATION_NONE;
72
73         hypervisor = !!(ecx & 0x80000000U);
74
75         if (hypervisor) {
76                 union {
77                         uint32_t sig32[3];
78                         char text[13];
79                 } sig = {};
80                 unsigned j;
81
82                 /* There is a hypervisor, see what it is */
83                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
84
85                 sig.sig32[0] = ebx;
86                 sig.sig32[1] = ecx;
87                 sig.sig32[2] = edx;
88
89                 log_debug("Virtualization found, CPUID=%s", sig.text);
90
91                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
92                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
93                                 return cpuid_vendor_table[j].id;
94
95                 return VIRTUALIZATION_VM_OTHER;
96         }
97 #endif
98         log_debug("No virtualization found in CPUID");
99
100         return VIRTUALIZATION_NONE;
101 }
102
103 static int detect_vm_device_tree(void) {
104 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
105         _cleanup_free_ char *hvtype = NULL;
106         int r;
107
108         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
109         if (r == -ENOENT) {
110                 _cleanup_closedir_ DIR *dir = NULL;
111                 struct dirent *dent;
112
113                 dir = opendir("/proc/device-tree");
114                 if (!dir) {
115                         if (errno == ENOENT) {
116                                 log_debug_errno(errno, "/proc/device-tree: %m");
117                                 return VIRTUALIZATION_NONE;
118                         }
119                         return -errno;
120                 }
121
122                 FOREACH_DIRENT(dent, dir, return -errno)
123                         if (strstr(dent->d_name, "fw-cfg")) {
124                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
125                                 return VIRTUALIZATION_QEMU;
126                         }
127
128                 log_debug("No virtualization found in /proc/device-tree/*");
129                 return VIRTUALIZATION_NONE;
130         } else if (r < 0)
131                 return r;
132
133         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
134         if (streq(hvtype, "linux,kvm"))
135                 return VIRTUALIZATION_KVM;
136         else if (strstr(hvtype, "xen"))
137                 return VIRTUALIZATION_XEN;
138         else
139                 return VIRTUALIZATION_VM_OTHER;
140 #else
141         log_debug("This platform does not support /proc/device-tree");
142         return VIRTUALIZATION_NONE;
143 #endif
144 }
145
146 static int detect_vm_dmi(void) {
147 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
148
149         static const char *const dmi_vendors[] = {
150                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
151                 "/sys/class/dmi/id/sys_vendor",
152                 "/sys/class/dmi/id/board_vendor",
153                 "/sys/class/dmi/id/bios_vendor"
154         };
155
156         static const struct {
157                 const char *vendor;
158                 int id;
159         } dmi_vendor_table[] = {
160                 { "KVM",           VIRTUALIZATION_KVM       },
161                 { "QEMU",          VIRTUALIZATION_QEMU      },
162                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
163                 { "VMware",        VIRTUALIZATION_VMWARE    },
164                 { "VMW",           VIRTUALIZATION_VMWARE    },
165                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
166                 { "Xen",           VIRTUALIZATION_XEN       },
167                 { "Bochs",         VIRTUALIZATION_BOCHS     },
168                 { "Parallels",     VIRTUALIZATION_PARALLELS },
169                 /* https://wiki.freebsd.org/bhyve */
170                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
171         };
172         unsigned i;
173         int r;
174
175         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
176                 _cleanup_free_ char *s = NULL;
177                 unsigned j;
178
179                 r = read_one_line_file(dmi_vendors[i], &s);
180                 if (r < 0) {
181                         if (r == -ENOENT)
182                                 continue;
183
184                         return r;
185                 }
186
187                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
188                         if (startswith(s, dmi_vendor_table[j].vendor)) {
189                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
190                                 return dmi_vendor_table[j].id;
191                         }
192         }
193 #endif
194
195         log_debug("No virtualization found in DMI");
196
197         return VIRTUALIZATION_NONE;
198 }
199
200 static int detect_vm_xen(void) {
201
202         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
203            The presence of /proc/xen indicates some form of a Xen domain */
204         if (access("/proc/xen", F_OK) < 0) {
205                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
206                 return VIRTUALIZATION_NONE;
207         }
208
209         log_debug("Virtualization XEN found (/proc/xen exists)");
210         return VIRTUALIZATION_XEN;
211 }
212
213 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
214 #define PATH_FEATURES "/sys/hypervisor/properties/features"
215 /* Returns -errno, or 0 for domU, or 1 for dom0 */
216 static int detect_vm_xen_dom0(void) {
217         _cleanup_free_ char *domcap = NULL;
218         char *cap, *i;
219         int r;
220
221         r = read_one_line_file(PATH_FEATURES, &domcap);
222         if (r < 0 && r != -ENOENT)
223                 return r;
224         if (r == 0) {
225                 unsigned long features;
226
227                 /* Here, we need to use sscanf() instead of safe_atoul()
228                  * as the string lacks the leading "0x". */
229                 r = sscanf(domcap, "%lx", &features);
230                 if (r == 1) {
231                         r = !!(features & (1U << XENFEAT_dom0));
232                         log_debug("Virtualization XEN, found %s with value %08lx, "
233                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
234                                   PATH_FEATURES, features, r ? "" : " not");
235                         return r;
236                 }
237                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
238                           PATH_FEATURES, domcap);
239         }
240
241         r = read_one_line_file("/proc/xen/capabilities", &domcap);
242         if (r == -ENOENT) {
243                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
244                 return 0;
245         }
246         if (r < 0)
247                 return r;
248
249         i = domcap;
250         while ((cap = strsep(&i, ",")))
251                 if (streq(cap, "control_d"))
252                         break;
253         if (!cap) {
254                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
255                 return 0;
256         }
257
258         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
259         return 1;
260 }
261
262 static int detect_vm_hypervisor(void) {
263         _cleanup_free_ char *hvtype = NULL;
264         int r;
265
266         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
267         if (r == -ENOENT)
268                 return VIRTUALIZATION_NONE;
269         if (r < 0)
270                 return r;
271
272         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
273
274         if (streq(hvtype, "xen"))
275                 return VIRTUALIZATION_XEN;
276         else
277                 return VIRTUALIZATION_VM_OTHER;
278 }
279
280 static int detect_vm_uml(void) {
281         _cleanup_free_ char *cpuinfo_contents = NULL;
282         int r;
283
284         /* Detect User-Mode Linux by reading /proc/cpuinfo */
285         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
286         if (r < 0)
287                 return r;
288
289         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
290                 log_debug("UML virtualization found in /proc/cpuinfo");
291                 return VIRTUALIZATION_UML;
292         }
293
294         log_debug("No virtualization found in /proc/cpuinfo.");
295         return VIRTUALIZATION_NONE;
296 }
297
298 static int detect_vm_zvm(void) {
299
300 #if defined(__s390__)
301         _cleanup_free_ char *t = NULL;
302         int r;
303
304         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
305         if (r == -ENOENT)
306                 return VIRTUALIZATION_NONE;
307         if (r < 0)
308                 return r;
309
310         log_debug("Virtualization %s found in /proc/sysinfo", t);
311         if (streq(t, "z/VM"))
312                 return VIRTUALIZATION_ZVM;
313         else
314                 return VIRTUALIZATION_KVM;
315 #else
316         log_debug("This platform does not support /proc/sysinfo");
317         return VIRTUALIZATION_NONE;
318 #endif
319 }
320
321 /* Returns a short identifier for the various VM implementations */
322 int detect_vm(void) {
323         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
324         int r, dmi;
325         bool other = false;
326
327         if (cached_found >= 0)
328                 return cached_found;
329
330         /* We have to use the correct order here:
331          *
332          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
333          * -> Second try to detect from cpuid, this will report KVM for
334          *    whatever software is used even if info in dmi is overwritten.
335          * -> Third try to detect from dmi. */
336
337         dmi = detect_vm_dmi();
338         if (dmi == VIRTUALIZATION_ORACLE) {
339                 r = dmi;
340                 goto finish;
341         }
342
343         r = detect_vm_cpuid();
344         if (r < 0)
345                 return r;
346         if (r != VIRTUALIZATION_NONE) {
347                 if (r == VIRTUALIZATION_VM_OTHER)
348                         other = true;
349                 else
350                         goto finish;
351         }
352
353         r = dmi;
354         if (r < 0)
355                 return r;
356         if (r != VIRTUALIZATION_NONE) {
357                 if (r == VIRTUALIZATION_VM_OTHER)
358                         other = true;
359                 else
360                         goto finish;
361         }
362
363         /* x86 xen will most likely be detected by cpuid. If not (most likely
364          * because we're not an x86 guest), then we should try the /proc/xen
365          * directory next. If that's not found, then we check for the high-level
366          * hypervisor sysfs file.
367          */
368
369         r = detect_vm_xen();
370         if (r < 0)
371                 return r;
372         if (r != VIRTUALIZATION_NONE) {
373                 if (r == VIRTUALIZATION_VM_OTHER)
374                         other = true;
375                 else
376                         goto finish;
377         }
378
379         r = detect_vm_hypervisor();
380         if (r < 0)
381                 return r;
382         if (r != VIRTUALIZATION_NONE) {
383                 if (r == VIRTUALIZATION_VM_OTHER)
384                         other = true;
385                 else
386                         goto finish;
387         }
388
389         r = detect_vm_device_tree();
390         if (r < 0)
391                 return r;
392         if (r != VIRTUALIZATION_NONE) {
393                 if (r == VIRTUALIZATION_VM_OTHER)
394                         other = true;
395                 else
396                         goto finish;
397         }
398
399         r = detect_vm_uml();
400         if (r < 0)
401                 return r;
402         if (r != VIRTUALIZATION_NONE) {
403                 if (r == VIRTUALIZATION_VM_OTHER)
404                         other = true;
405                 else
406                         goto finish;
407         }
408
409         r = detect_vm_zvm();
410         if (r < 0)
411                 return r;
412
413 finish:
414         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
415          * In order to detect the Dom0 as not virtualization we need to
416          * double-check it */
417         if (r == VIRTUALIZATION_XEN) {
418                 int ret = detect_vm_xen_dom0();
419                 if (ret < 0)
420                         return ret;
421                 if (ret > 0)
422                         r = VIRTUALIZATION_NONE;
423         } else if (r == VIRTUALIZATION_NONE && other)
424                 r = VIRTUALIZATION_VM_OTHER;
425
426         cached_found = r;
427         log_debug("Found VM virtualization %s", virtualization_to_string(r));
428         return r;
429 }
430 #endif // 0
431
432 int detect_container(void) {
433
434         static const struct {
435                 const char *value;
436                 int id;
437         } value_table[] = {
438                 { "lxc",            VIRTUALIZATION_LXC            },
439                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
440                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
441                 { "docker",         VIRTUALIZATION_DOCKER         },
442                 { "rkt",            VIRTUALIZATION_RKT            },
443         };
444
445         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
446         _cleanup_free_ char *m = NULL;
447         const char *e = NULL;
448         unsigned j;
449         int r;
450
451         if (cached_found >= 0)
452                 return cached_found;
453
454         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
455         if (access("/proc/vz", F_OK) >= 0 &&
456             access("/proc/bc", F_OK) < 0) {
457                 r = VIRTUALIZATION_OPENVZ;
458                 goto finish;
459         }
460
461         if (getpid_cached() == 1) {
462                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
463
464                 e = getenv("container");
465                 if (isempty(e)) {
466                         r = VIRTUALIZATION_NONE;
467                         goto finish;
468                 }
469
470                 goto translate_name;
471         }
472
473         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
474          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
475         r = read_one_line_file("/run/systemd/container", &m);
476         if (r >= 0) {
477                 e = m;
478                 goto translate_name;
479         }
480         if (r != -ENOENT)
481                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
482
483         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
484         r = getenv_for_pid(1, "container", &m);
485         if (r > 0) {
486                 e = m;
487                 goto translate_name;
488         }
489         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
490                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
491
492         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
493          * there is not 1, we know we are in a PID namespace. and hence a container. */
494         r = read_one_line_file("/proc/1/sched", &m);
495         if (r >= 0) {
496                 const char *t;
497
498                 t = strrchr(m, '(');
499                 if (!t)
500                         return -EIO;
501
502                 if (!startswith(t, "(1,")) {
503                         r = VIRTUALIZATION_CONTAINER_OTHER;
504                         goto finish;
505                 }
506         } else if (r != -ENOENT)
507                 return r;
508
509         /* If that didn't work, give up, assume no container manager. */
510         r = VIRTUALIZATION_NONE;
511         goto finish;
512
513 translate_name:
514         for (j = 0; j < ELEMENTSOF(value_table); j++)
515                 if (streq(e, value_table[j].value)) {
516                         r = value_table[j].id;
517                         goto finish;
518                 }
519
520         r = VIRTUALIZATION_CONTAINER_OTHER;
521
522 finish:
523         log_debug("Found container virtualization %s.", virtualization_to_string(r));
524         cached_found = r;
525         return r;
526 }
527
528 #if 0 /// UNNEEDED by elogind
529 int detect_virtualization(void) {
530         int r;
531
532         r = detect_container();
533         if (r == 0)
534                 r = detect_vm();
535
536         return r;
537 }
538
539 static int userns_has_mapping(const char *name) {
540         _cleanup_fclose_ FILE *f = NULL;
541         _cleanup_free_ char *buf = NULL;
542         size_t n_allocated = 0;
543         ssize_t n;
544         uint32_t a, b, c;
545         int r;
546
547         f = fopen(name, "re");
548         if (!f) {
549                 log_debug_errno(errno, "Failed to open %s: %m", name);
550                 return errno == ENOENT ? false : -errno;
551         }
552
553         n = getline(&buf, &n_allocated, f);
554         if (n < 0) {
555                 if (feof(f)) {
556                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
557                         return true;
558                 }
559
560                 return log_debug_errno(errno, "Failed to read %s: %m", name);
561         }
562
563         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
564         if (r < 3)
565                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
566
567         if (a == 0 && b == 0 && c == UINT32_MAX) {
568                 /* The kernel calls mappings_overlap() and does not allow overlaps */
569                 log_debug("%s has a full 1:1 mapping", name);
570                 return false;
571         }
572
573         /* Anything else implies that we are in a user namespace */
574         log_debug("Mapping found in %s, we're in a user namespace", name);
575         return true;
576 }
577
578 int running_in_userns(void) {
579         _cleanup_free_ char *line = NULL;
580         int r;
581
582         r = userns_has_mapping("/proc/self/uid_map");
583         if (r != 0)
584                 return r;
585
586         r = userns_has_mapping("/proc/self/gid_map");
587         if (r != 0)
588                 return r;
589
590         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
591          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
592          * also does not exist. We cannot distinguish those two cases, so assume that
593          * we're running on a stripped-down recent kernel, rather than on an old one,
594          * and if the file is not found, return false.
595          */
596         r = read_one_line_file("/proc/self/setgroups", &line);
597         if (r < 0) {
598                 log_debug_errno(r, "/proc/self/setgroups: %m");
599                 return r == -ENOENT ? false : r;
600         }
601
602         truncate_nl(line);
603         r = streq(line, "deny");
604         /* See user_namespaces(7) for a description of this "setgroups" contents. */
605         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
606         return r;
607 }
608 #endif // 0
609
610 int running_in_chroot(void) {
611         int ret;
612
613 #if 0 /// elogind does not allow to ignore chroots, we are never init!
614         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
615                 return 0;
616 #endif // 0
617
618         ret = files_same("/proc/1/root", "/", 0);
619         if (ret < 0)
620                 return ret;
621
622         return ret == 0;
623 }
624
625 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
626         [VIRTUALIZATION_NONE] = "none",
627         [VIRTUALIZATION_KVM] = "kvm",
628         [VIRTUALIZATION_QEMU] = "qemu",
629         [VIRTUALIZATION_BOCHS] = "bochs",
630         [VIRTUALIZATION_XEN] = "xen",
631         [VIRTUALIZATION_UML] = "uml",
632         [VIRTUALIZATION_VMWARE] = "vmware",
633         [VIRTUALIZATION_ORACLE] = "oracle",
634         [VIRTUALIZATION_MICROSOFT] = "microsoft",
635         [VIRTUALIZATION_ZVM] = "zvm",
636         [VIRTUALIZATION_PARALLELS] = "parallels",
637         [VIRTUALIZATION_BHYVE] = "bhyve",
638         [VIRTUALIZATION_QNX] = "qnx",
639         [VIRTUALIZATION_VM_OTHER] = "vm-other",
640
641         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
642         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
643         [VIRTUALIZATION_LXC] = "lxc",
644         [VIRTUALIZATION_OPENVZ] = "openvz",
645         [VIRTUALIZATION_DOCKER] = "docker",
646         [VIRTUALIZATION_RKT] = "rkt",
647         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
648 };
649
650 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);