chiark / gitweb /
3b2f1b9172d1b944b2d56175711c770867374abe
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   This file is part of systemd.
4
5   Copyright 2011 Lennart Poettering
6 ***/
7
8 #if defined(__i386__) || defined(__x86_64__)
9 //#include <cpuid.h>
10 #endif
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <unistd.h>
16
17 #include "alloc-util.h"
18 #include "dirent-util.h"
19 #include "env-util.h"
20 #include "fd-util.h"
21 #include "fileio.h"
22 #include "macro.h"
23 #include "process-util.h"
24 #include "stat-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "virt.h"
28
29 #if 0 /// UNNEEDED by elogind
30 static int detect_vm_cpuid(void) {
31
32         /* CPUID is an x86 specific interface. */
33 #if defined(__i386__) || defined(__x86_64__)
34
35         static const struct {
36                 const char *cpuid;
37                 int id;
38         } cpuid_vendor_table[] = {
39                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
40                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
41                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
42                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
43                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
44                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
45                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
46                 /* https://wiki.freebsd.org/bhyve */
47                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
48                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
49         };
50
51         uint32_t eax, ebx, ecx, edx;
52         bool hypervisor;
53
54         /* http://lwn.net/Articles/301888/ */
55
56         /* First detect whether there is a hypervisor */
57         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
58                 return VIRTUALIZATION_NONE;
59
60         hypervisor = !!(ecx & 0x80000000U);
61
62         if (hypervisor) {
63                 union {
64                         uint32_t sig32[3];
65                         char text[13];
66                 } sig = {};
67                 unsigned j;
68
69                 /* There is a hypervisor, see what it is */
70                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
71
72                 sig.sig32[0] = ebx;
73                 sig.sig32[1] = ecx;
74                 sig.sig32[2] = edx;
75
76                 log_debug("Virtualization found, CPUID=%s", sig.text);
77
78                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
79                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
80                                 return cpuid_vendor_table[j].id;
81
82                 return VIRTUALIZATION_VM_OTHER;
83         }
84 #endif
85         log_debug("No virtualization found in CPUID");
86
87         return VIRTUALIZATION_NONE;
88 }
89
90 static int detect_vm_device_tree(void) {
91 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
92         _cleanup_free_ char *hvtype = NULL;
93         int r;
94
95         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
96         if (r == -ENOENT) {
97                 _cleanup_closedir_ DIR *dir = NULL;
98                 struct dirent *dent;
99
100                 dir = opendir("/proc/device-tree");
101                 if (!dir) {
102                         if (errno == ENOENT) {
103                                 log_debug_errno(errno, "/proc/device-tree: %m");
104                                 return VIRTUALIZATION_NONE;
105                         }
106                         return -errno;
107                 }
108
109                 FOREACH_DIRENT(dent, dir, return -errno)
110                         if (strstr(dent->d_name, "fw-cfg")) {
111                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
112                                 return VIRTUALIZATION_QEMU;
113                         }
114
115                 log_debug("No virtualization found in /proc/device-tree/*");
116                 return VIRTUALIZATION_NONE;
117         } else if (r < 0)
118                 return r;
119
120         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
121         if (streq(hvtype, "linux,kvm"))
122                 return VIRTUALIZATION_KVM;
123         else if (strstr(hvtype, "xen"))
124                 return VIRTUALIZATION_XEN;
125         else
126                 return VIRTUALIZATION_VM_OTHER;
127 #else
128         log_debug("This platform does not support /proc/device-tree");
129         return VIRTUALIZATION_NONE;
130 #endif
131 }
132
133 static int detect_vm_dmi(void) {
134 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
135
136         static const char *const dmi_vendors[] = {
137                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
138                 "/sys/class/dmi/id/sys_vendor",
139                 "/sys/class/dmi/id/board_vendor",
140                 "/sys/class/dmi/id/bios_vendor"
141         };
142
143         static const struct {
144                 const char *vendor;
145                 int id;
146         } dmi_vendor_table[] = {
147                 { "KVM",           VIRTUALIZATION_KVM       },
148                 { "QEMU",          VIRTUALIZATION_QEMU      },
149                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
150                 { "VMware",        VIRTUALIZATION_VMWARE    },
151                 { "VMW",           VIRTUALIZATION_VMWARE    },
152                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
153                 { "Xen",           VIRTUALIZATION_XEN       },
154                 { "Bochs",         VIRTUALIZATION_BOCHS     },
155                 { "Parallels",     VIRTUALIZATION_PARALLELS },
156                 /* https://wiki.freebsd.org/bhyve */
157                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
158         };
159         unsigned i;
160         int r;
161
162         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
163                 _cleanup_free_ char *s = NULL;
164                 unsigned j;
165
166                 r = read_one_line_file(dmi_vendors[i], &s);
167                 if (r < 0) {
168                         if (r == -ENOENT)
169                                 continue;
170
171                         return r;
172                 }
173
174                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
175                         if (startswith(s, dmi_vendor_table[j].vendor)) {
176                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
177                                 return dmi_vendor_table[j].id;
178                         }
179         }
180 #endif
181
182         log_debug("No virtualization found in DMI");
183
184         return VIRTUALIZATION_NONE;
185 }
186
187 static int detect_vm_xen(void) {
188
189         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
190            The presence of /proc/xen indicates some form of a Xen domain */
191         if (access("/proc/xen", F_OK) < 0) {
192                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
193                 return VIRTUALIZATION_NONE;
194         }
195
196         log_debug("Virtualization XEN found (/proc/xen exists)");
197         return VIRTUALIZATION_XEN;
198 }
199
200 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
201 #define PATH_FEATURES "/sys/hypervisor/properties/features"
202 /* Returns -errno, or 0 for domU, or 1 for dom0 */
203 static int detect_vm_xen_dom0(void) {
204         _cleanup_free_ char *domcap = NULL;
205         char *cap, *i;
206         int r;
207
208         r = read_one_line_file(PATH_FEATURES, &domcap);
209         if (r < 0 && r != -ENOENT)
210                 return r;
211         if (r == 0) {
212                 unsigned long features;
213
214                 /* Here, we need to use sscanf() instead of safe_atoul()
215                  * as the string lacks the leading "0x". */
216                 r = sscanf(domcap, "%lx", &features);
217                 if (r == 1) {
218                         r = !!(features & (1U << XENFEAT_dom0));
219                         log_debug("Virtualization XEN, found %s with value %08lx, "
220                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
221                                   PATH_FEATURES, features, r ? "" : " not");
222                         return r;
223                 }
224                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
225                           PATH_FEATURES, domcap);
226         }
227
228         r = read_one_line_file("/proc/xen/capabilities", &domcap);
229         if (r == -ENOENT) {
230                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
231                 return 0;
232         }
233         if (r < 0)
234                 return r;
235
236         i = domcap;
237         while ((cap = strsep(&i, ",")))
238                 if (streq(cap, "control_d"))
239                         break;
240         if (!cap) {
241                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
242                 return 0;
243         }
244
245         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
246         return 1;
247 }
248
249 static int detect_vm_hypervisor(void) {
250         _cleanup_free_ char *hvtype = NULL;
251         int r;
252
253         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
254         if (r == -ENOENT)
255                 return VIRTUALIZATION_NONE;
256         if (r < 0)
257                 return r;
258
259         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
260
261         if (streq(hvtype, "xen"))
262                 return VIRTUALIZATION_XEN;
263         else
264                 return VIRTUALIZATION_VM_OTHER;
265 }
266
267 static int detect_vm_uml(void) {
268         _cleanup_free_ char *cpuinfo_contents = NULL;
269         int r;
270
271         /* Detect User-Mode Linux by reading /proc/cpuinfo */
272         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
273         if (r == -ENOENT) {
274                 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
275                 return VIRTUALIZATION_NONE;
276         }
277         if (r < 0)
278                 return r;
279
280         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
281                 log_debug("UML virtualization found in /proc/cpuinfo");
282                 return VIRTUALIZATION_UML;
283         }
284
285         log_debug("UML virtualization not found in /proc/cpuinfo.");
286         return VIRTUALIZATION_NONE;
287 }
288
289 static int detect_vm_zvm(void) {
290
291 #if defined(__s390__)
292         _cleanup_free_ char *t = NULL;
293         int r;
294
295         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
296         if (r == -ENOENT)
297                 return VIRTUALIZATION_NONE;
298         if (r < 0)
299                 return r;
300
301         log_debug("Virtualization %s found in /proc/sysinfo", t);
302         if (streq(t, "z/VM"))
303                 return VIRTUALIZATION_ZVM;
304         else
305                 return VIRTUALIZATION_KVM;
306 #else
307         log_debug("This platform does not support /proc/sysinfo");
308         return VIRTUALIZATION_NONE;
309 #endif
310 }
311
312 /* Returns a short identifier for the various VM implementations */
313 int detect_vm(void) {
314         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
315         bool other = false;
316         int r, dmi;
317
318         if (cached_found >= 0)
319                 return cached_found;
320
321         /* We have to use the correct order here:
322          *
323          * â†’ First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
324          *   Hyper-V.
325          *
326          * â†’ Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
327          *   overwritten.
328          *
329          * â†’ Third, try to detect from DMI. */
330
331         dmi = detect_vm_dmi();
332         if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
333                 r = dmi;
334                 goto finish;
335         }
336
337         r = detect_vm_cpuid();
338         if (r < 0)
339                 return r;
340         if (r == VIRTUALIZATION_VM_OTHER)
341                 other = true;
342         else if (r != VIRTUALIZATION_NONE)
343                 goto finish;
344
345         /* Now, let's get back to DMI */
346         if (dmi < 0)
347                 return dmi;
348         if (dmi == VIRTUALIZATION_VM_OTHER)
349                 other = true;
350         else if (dmi != VIRTUALIZATION_NONE) {
351                 r = dmi;
352                 goto finish;
353         }
354
355         /* x86 xen will most likely be detected by cpuid. If not (most likely
356          * because we're not an x86 guest), then we should try the /proc/xen
357          * directory next. If that's not found, then we check for the high-level
358          * hypervisor sysfs file.
359          */
360
361         r = detect_vm_xen();
362         if (r < 0)
363                 return r;
364         if (r == VIRTUALIZATION_VM_OTHER)
365                 other = true;
366         else if (r != VIRTUALIZATION_NONE)
367                 goto finish;
368
369         r = detect_vm_hypervisor();
370         if (r < 0)
371                 return r;
372         if (r == VIRTUALIZATION_VM_OTHER)
373                 other = true;
374         else if (r != VIRTUALIZATION_NONE)
375                 goto finish;
376
377         r = detect_vm_device_tree();
378         if (r < 0)
379                 return r;
380         if (r == VIRTUALIZATION_VM_OTHER)
381                 other = true;
382         else if (r != VIRTUALIZATION_NONE)
383                 goto finish;
384
385         r = detect_vm_uml();
386         if (r < 0)
387                 return r;
388         if (r == VIRTUALIZATION_VM_OTHER)
389                 other = true;
390         else if (r != VIRTUALIZATION_NONE)
391                 goto finish;
392
393         r = detect_vm_zvm();
394         if (r < 0)
395                 return r;
396
397 finish:
398         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
399          * In order to detect the Dom0 as not virtualization we need to
400          * double-check it */
401         if (r == VIRTUALIZATION_XEN) {
402                 int dom0;
403
404                 dom0 = detect_vm_xen_dom0();
405                 if (dom0 < 0)
406                         return dom0;
407                 if (dom0 > 0)
408                         r = VIRTUALIZATION_NONE;
409         } else if (r == VIRTUALIZATION_NONE && other)
410                 r = VIRTUALIZATION_VM_OTHER;
411
412         cached_found = r;
413         log_debug("Found VM virtualization %s", virtualization_to_string(r));
414         return r;
415 }
416 #endif // 0
417
418 int detect_container(void) {
419
420         static const struct {
421                 const char *value;
422                 int id;
423         } value_table[] = {
424                 { "lxc",            VIRTUALIZATION_LXC            },
425                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
426                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
427                 { "docker",         VIRTUALIZATION_DOCKER         },
428                 { "rkt",            VIRTUALIZATION_RKT            },
429         };
430
431         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
432         _cleanup_free_ char *m = NULL;
433         const char *e = NULL;
434         unsigned j;
435         int r;
436
437         if (cached_found >= 0)
438                 return cached_found;
439
440         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
441         if (access("/proc/vz", F_OK) >= 0 &&
442             access("/proc/bc", F_OK) < 0) {
443                 r = VIRTUALIZATION_OPENVZ;
444                 goto finish;
445         }
446
447         if (getpid_cached() == 1) {
448                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
449
450                 e = getenv("container");
451                 if (isempty(e)) {
452                         r = VIRTUALIZATION_NONE;
453                         goto finish;
454                 }
455
456                 goto translate_name;
457         }
458
459         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
460          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
461         r = read_one_line_file("/run/systemd/container", &m);
462         if (r >= 0) {
463                 e = m;
464                 goto translate_name;
465         }
466         if (r != -ENOENT)
467                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
468
469         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
470         r = getenv_for_pid(1, "container", &m);
471         if (r > 0) {
472                 e = m;
473                 goto translate_name;
474         }
475         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
476                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
477
478         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
479          * there is not 1, we know we are in a PID namespace. and hence a container. */
480         r = read_one_line_file("/proc/1/sched", &m);
481         if (r >= 0) {
482                 const char *t;
483
484                 t = strrchr(m, '(');
485                 if (!t)
486                         return -EIO;
487
488                 if (!startswith(t, "(1,")) {
489                         r = VIRTUALIZATION_CONTAINER_OTHER;
490                         goto finish;
491                 }
492         } else if (r != -ENOENT)
493                 return r;
494
495         /* If that didn't work, give up, assume no container manager. */
496         r = VIRTUALIZATION_NONE;
497         goto finish;
498
499 translate_name:
500         for (j = 0; j < ELEMENTSOF(value_table); j++)
501                 if (streq(e, value_table[j].value)) {
502                         r = value_table[j].id;
503                         goto finish;
504                 }
505
506         r = VIRTUALIZATION_CONTAINER_OTHER;
507
508 finish:
509         log_debug("Found container virtualization %s.", virtualization_to_string(r));
510         cached_found = r;
511         return r;
512 }
513
514 #if 0 /// UNNEEDED by elogind
515 int detect_virtualization(void) {
516         int r;
517
518         r = detect_container();
519         if (r == 0)
520                 r = detect_vm();
521
522         return r;
523 }
524
525 static int userns_has_mapping(const char *name) {
526         _cleanup_fclose_ FILE *f = NULL;
527         _cleanup_free_ char *buf = NULL;
528         size_t n_allocated = 0;
529         ssize_t n;
530         uint32_t a, b, c;
531         int r;
532
533         f = fopen(name, "re");
534         if (!f) {
535                 log_debug_errno(errno, "Failed to open %s: %m", name);
536                 return errno == ENOENT ? false : -errno;
537         }
538
539         n = getline(&buf, &n_allocated, f);
540         if (n < 0) {
541                 if (feof(f)) {
542                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
543                         return true;
544                 }
545
546                 return log_debug_errno(errno, "Failed to read %s: %m", name);
547         }
548
549         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
550         if (r < 3)
551                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
552
553         if (a == 0 && b == 0 && c == UINT32_MAX) {
554                 /* The kernel calls mappings_overlap() and does not allow overlaps */
555                 log_debug("%s has a full 1:1 mapping", name);
556                 return false;
557         }
558
559         /* Anything else implies that we are in a user namespace */
560         log_debug("Mapping found in %s, we're in a user namespace", name);
561         return true;
562 }
563
564 int running_in_userns(void) {
565         _cleanup_free_ char *line = NULL;
566         int r;
567
568         r = userns_has_mapping("/proc/self/uid_map");
569         if (r != 0)
570                 return r;
571
572         r = userns_has_mapping("/proc/self/gid_map");
573         if (r != 0)
574                 return r;
575
576         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
577          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
578          * also does not exist. We cannot distinguish those two cases, so assume that
579          * we're running on a stripped-down recent kernel, rather than on an old one,
580          * and if the file is not found, return false.
581          */
582         r = read_one_line_file("/proc/self/setgroups", &line);
583         if (r < 0) {
584                 log_debug_errno(r, "/proc/self/setgroups: %m");
585                 return r == -ENOENT ? false : r;
586         }
587
588         truncate_nl(line);
589         r = streq(line, "deny");
590         /* See user_namespaces(7) for a description of this "setgroups" contents. */
591         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
592         return r;
593 }
594 #endif // 0
595
596 int running_in_chroot(void) {
597         int r;
598
599 #if 0 /// elogind does not allow to ignore chroots, we are never init!
600         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
601                 return 0;
602 #endif // 0
603
604         r = files_same("/proc/1/root", "/", 0);
605         if (r < 0)
606                 return r;
607
608         return r == 0;
609 }
610
611 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
612         [VIRTUALIZATION_NONE] = "none",
613         [VIRTUALIZATION_KVM] = "kvm",
614         [VIRTUALIZATION_QEMU] = "qemu",
615         [VIRTUALIZATION_BOCHS] = "bochs",
616         [VIRTUALIZATION_XEN] = "xen",
617         [VIRTUALIZATION_UML] = "uml",
618         [VIRTUALIZATION_VMWARE] = "vmware",
619         [VIRTUALIZATION_ORACLE] = "oracle",
620         [VIRTUALIZATION_MICROSOFT] = "microsoft",
621         [VIRTUALIZATION_ZVM] = "zvm",
622         [VIRTUALIZATION_PARALLELS] = "parallels",
623         [VIRTUALIZATION_BHYVE] = "bhyve",
624         [VIRTUALIZATION_QNX] = "qnx",
625         [VIRTUALIZATION_VM_OTHER] = "vm-other",
626
627         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
628         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
629         [VIRTUALIZATION_LXC] = "lxc",
630         [VIRTUALIZATION_OPENVZ] = "openvz",
631         [VIRTUALIZATION_DOCKER] = "docker",
632         [VIRTUALIZATION_RKT] = "rkt",
633         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
634 };
635
636 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);