chiark / gitweb /
529722a2adc2ca9d87bb90d781314cd4a5084efc
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   Copyright 2011 Lennart Poettering
4 ***/
5
6 #if defined(__i386__) || defined(__x86_64__)
7 //#include <cpuid.h>
8 #endif
9 #include <errno.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14
15 #include "alloc-util.h"
16 #include "dirent-util.h"
17 #include "env-util.h"
18 #include "fd-util.h"
19 #include "fileio.h"
20 #include "macro.h"
21 #include "process-util.h"
22 #include "stat-util.h"
23 #include "string-table.h"
24 #include "string-util.h"
25 #include "virt.h"
26
27 #if 0 /// UNNEEDED by elogind
28 static int detect_vm_cpuid(void) {
29
30         /* CPUID is an x86 specific interface. */
31 #if defined(__i386__) || defined(__x86_64__)
32
33         static const struct {
34                 const char *cpuid;
35                 int id;
36         } cpuid_vendor_table[] = {
37                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
38                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
39                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
40                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
41                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
42                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
43                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
44                 /* https://wiki.freebsd.org/bhyve */
45                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
46                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
47         };
48
49         uint32_t eax, ebx, ecx, edx;
50         bool hypervisor;
51
52         /* http://lwn.net/Articles/301888/ */
53
54         /* First detect whether there is a hypervisor */
55         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
56                 return VIRTUALIZATION_NONE;
57
58         hypervisor = ecx & 0x80000000U;
59
60         if (hypervisor) {
61                 union {
62                         uint32_t sig32[3];
63                         char text[13];
64                 } sig = {};
65                 unsigned j;
66
67                 /* There is a hypervisor, see what it is */
68                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
69
70                 sig.sig32[0] = ebx;
71                 sig.sig32[1] = ecx;
72                 sig.sig32[2] = edx;
73
74                 log_debug("Virtualization found, CPUID=%s", sig.text);
75
76                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
77                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
78                                 return cpuid_vendor_table[j].id;
79
80                 return VIRTUALIZATION_VM_OTHER;
81         }
82 #endif
83         log_debug("No virtualization found in CPUID");
84
85         return VIRTUALIZATION_NONE;
86 }
87
88 static int detect_vm_device_tree(void) {
89 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
90         _cleanup_free_ char *hvtype = NULL;
91         int r;
92
93         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
94         if (r == -ENOENT) {
95                 _cleanup_closedir_ DIR *dir = NULL;
96                 struct dirent *dent;
97
98                 dir = opendir("/proc/device-tree");
99                 if (!dir) {
100                         if (errno == ENOENT) {
101                                 log_debug_errno(errno, "/proc/device-tree: %m");
102                                 return VIRTUALIZATION_NONE;
103                         }
104                         return -errno;
105                 }
106
107                 FOREACH_DIRENT(dent, dir, return -errno)
108                         if (strstr(dent->d_name, "fw-cfg")) {
109                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
110                                 return VIRTUALIZATION_QEMU;
111                         }
112
113                 log_debug("No virtualization found in /proc/device-tree/*");
114                 return VIRTUALIZATION_NONE;
115         } else if (r < 0)
116                 return r;
117
118         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
119         if (streq(hvtype, "linux,kvm"))
120                 return VIRTUALIZATION_KVM;
121         else if (strstr(hvtype, "xen"))
122                 return VIRTUALIZATION_XEN;
123         else
124                 return VIRTUALIZATION_VM_OTHER;
125 #else
126         log_debug("This platform does not support /proc/device-tree");
127         return VIRTUALIZATION_NONE;
128 #endif
129 }
130
131 static int detect_vm_dmi(void) {
132 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
133
134         static const char *const dmi_vendors[] = {
135                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
136                 "/sys/class/dmi/id/sys_vendor",
137                 "/sys/class/dmi/id/board_vendor",
138                 "/sys/class/dmi/id/bios_vendor"
139         };
140
141         static const struct {
142                 const char *vendor;
143                 int id;
144         } dmi_vendor_table[] = {
145                 { "KVM",           VIRTUALIZATION_KVM       },
146                 { "QEMU",          VIRTUALIZATION_QEMU      },
147                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
148                 { "VMware",        VIRTUALIZATION_VMWARE    },
149                 { "VMW",           VIRTUALIZATION_VMWARE    },
150                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
151                 { "Xen",           VIRTUALIZATION_XEN       },
152                 { "Bochs",         VIRTUALIZATION_BOCHS     },
153                 { "Parallels",     VIRTUALIZATION_PARALLELS },
154                 /* https://wiki.freebsd.org/bhyve */
155                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
156         };
157         unsigned i;
158         int r;
159
160         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
161                 _cleanup_free_ char *s = NULL;
162                 unsigned j;
163
164                 r = read_one_line_file(dmi_vendors[i], &s);
165                 if (r < 0) {
166                         if (r == -ENOENT)
167                                 continue;
168
169                         return r;
170                 }
171
172                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
173                         if (startswith(s, dmi_vendor_table[j].vendor)) {
174                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
175                                 return dmi_vendor_table[j].id;
176                         }
177         }
178 #endif
179
180         log_debug("No virtualization found in DMI");
181
182         return VIRTUALIZATION_NONE;
183 }
184
185 static int detect_vm_xen(void) {
186
187         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
188            The presence of /proc/xen indicates some form of a Xen domain */
189         if (access("/proc/xen", F_OK) < 0) {
190                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
191                 return VIRTUALIZATION_NONE;
192         }
193
194         log_debug("Virtualization XEN found (/proc/xen exists)");
195         return VIRTUALIZATION_XEN;
196 }
197
198 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
199 #define PATH_FEATURES "/sys/hypervisor/properties/features"
200 /* Returns -errno, or 0 for domU, or 1 for dom0 */
201 static int detect_vm_xen_dom0(void) {
202         _cleanup_free_ char *domcap = NULL;
203         char *cap, *i;
204         int r;
205
206         r = read_one_line_file(PATH_FEATURES, &domcap);
207         if (r < 0 && r != -ENOENT)
208                 return r;
209         if (r == 0) {
210                 unsigned long features;
211
212                 /* Here, we need to use sscanf() instead of safe_atoul()
213                  * as the string lacks the leading "0x". */
214                 r = sscanf(domcap, "%lx", &features);
215                 if (r == 1) {
216                         r = !!(features & (1U << XENFEAT_dom0));
217                         log_debug("Virtualization XEN, found %s with value %08lx, "
218                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
219                                   PATH_FEATURES, features, r ? "" : " not");
220                         return r;
221                 }
222                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
223                           PATH_FEATURES, domcap);
224         }
225
226         r = read_one_line_file("/proc/xen/capabilities", &domcap);
227         if (r == -ENOENT) {
228                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
229                 return 0;
230         }
231         if (r < 0)
232                 return r;
233
234         i = domcap;
235         while ((cap = strsep(&i, ",")))
236                 if (streq(cap, "control_d"))
237                         break;
238         if (!cap) {
239                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
240                 return 0;
241         }
242
243         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
244         return 1;
245 }
246
247 static int detect_vm_hypervisor(void) {
248         _cleanup_free_ char *hvtype = NULL;
249         int r;
250
251         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
252         if (r == -ENOENT)
253                 return VIRTUALIZATION_NONE;
254         if (r < 0)
255                 return r;
256
257         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
258
259         if (streq(hvtype, "xen"))
260                 return VIRTUALIZATION_XEN;
261         else
262                 return VIRTUALIZATION_VM_OTHER;
263 }
264
265 static int detect_vm_uml(void) {
266         _cleanup_free_ char *cpuinfo_contents = NULL;
267         int r;
268
269         /* Detect User-Mode Linux by reading /proc/cpuinfo */
270         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
271         if (r == -ENOENT) {
272                 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
273                 return VIRTUALIZATION_NONE;
274         }
275         if (r < 0)
276                 return r;
277
278         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
279                 log_debug("UML virtualization found in /proc/cpuinfo");
280                 return VIRTUALIZATION_UML;
281         }
282
283         log_debug("UML virtualization not found in /proc/cpuinfo.");
284         return VIRTUALIZATION_NONE;
285 }
286
287 static int detect_vm_zvm(void) {
288
289 #if defined(__s390__)
290         _cleanup_free_ char *t = NULL;
291         int r;
292
293         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
294         if (r == -ENOENT)
295                 return VIRTUALIZATION_NONE;
296         if (r < 0)
297                 return r;
298
299         log_debug("Virtualization %s found in /proc/sysinfo", t);
300         if (streq(t, "z/VM"))
301                 return VIRTUALIZATION_ZVM;
302         else
303                 return VIRTUALIZATION_KVM;
304 #else
305         log_debug("This platform does not support /proc/sysinfo");
306         return VIRTUALIZATION_NONE;
307 #endif
308 }
309
310 /* Returns a short identifier for the various VM implementations */
311 int detect_vm(void) {
312         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
313         bool other = false;
314         int r, dmi;
315
316         if (cached_found >= 0)
317                 return cached_found;
318
319         /* We have to use the correct order here:
320          *
321          * â†’ First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
322          *   Hyper-V.
323          *
324          * â†’ Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
325          *   overwritten.
326          *
327          * â†’ Third, try to detect from DMI. */
328
329         dmi = detect_vm_dmi();
330         if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
331                 r = dmi;
332                 goto finish;
333         }
334
335         r = detect_vm_cpuid();
336         if (r < 0)
337                 return r;
338         if (r == VIRTUALIZATION_VM_OTHER)
339                 other = true;
340         else if (r != VIRTUALIZATION_NONE)
341                 goto finish;
342
343         /* Now, let's get back to DMI */
344         if (dmi < 0)
345                 return dmi;
346         if (dmi == VIRTUALIZATION_VM_OTHER)
347                 other = true;
348         else if (dmi != VIRTUALIZATION_NONE) {
349                 r = dmi;
350                 goto finish;
351         }
352
353         /* x86 xen will most likely be detected by cpuid. If not (most likely
354          * because we're not an x86 guest), then we should try the /proc/xen
355          * directory next. If that's not found, then we check for the high-level
356          * hypervisor sysfs file.
357          */
358
359         r = detect_vm_xen();
360         if (r < 0)
361                 return r;
362         if (r == VIRTUALIZATION_VM_OTHER)
363                 other = true;
364         else if (r != VIRTUALIZATION_NONE)
365                 goto finish;
366
367         r = detect_vm_hypervisor();
368         if (r < 0)
369                 return r;
370         if (r == VIRTUALIZATION_VM_OTHER)
371                 other = true;
372         else if (r != VIRTUALIZATION_NONE)
373                 goto finish;
374
375         r = detect_vm_device_tree();
376         if (r < 0)
377                 return r;
378         if (r == VIRTUALIZATION_VM_OTHER)
379                 other = true;
380         else if (r != VIRTUALIZATION_NONE)
381                 goto finish;
382
383         r = detect_vm_uml();
384         if (r < 0)
385                 return r;
386         if (r == VIRTUALIZATION_VM_OTHER)
387                 other = true;
388         else if (r != VIRTUALIZATION_NONE)
389                 goto finish;
390
391         r = detect_vm_zvm();
392         if (r < 0)
393                 return r;
394
395 finish:
396         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
397          * In order to detect the Dom0 as not virtualization we need to
398          * double-check it */
399         if (r == VIRTUALIZATION_XEN) {
400                 int dom0;
401
402                 dom0 = detect_vm_xen_dom0();
403                 if (dom0 < 0)
404                         return dom0;
405                 if (dom0 > 0)
406                         r = VIRTUALIZATION_NONE;
407         } else if (r == VIRTUALIZATION_NONE && other)
408                 r = VIRTUALIZATION_VM_OTHER;
409
410         cached_found = r;
411         log_debug("Found VM virtualization %s", virtualization_to_string(r));
412         return r;
413 }
414 #endif // 0
415
416 int detect_container(void) {
417
418         static const struct {
419                 const char *value;
420                 int id;
421         } value_table[] = {
422                 { "lxc",            VIRTUALIZATION_LXC            },
423                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
424                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
425                 { "docker",         VIRTUALIZATION_DOCKER         },
426                 { "rkt",            VIRTUALIZATION_RKT            },
427         };
428
429         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
430         _cleanup_free_ char *m = NULL;
431         const char *e = NULL;
432         unsigned j;
433         int r;
434
435         if (cached_found >= 0)
436                 return cached_found;
437
438         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
439         if (access("/proc/vz", F_OK) >= 0 &&
440             access("/proc/bc", F_OK) < 0) {
441                 r = VIRTUALIZATION_OPENVZ;
442                 goto finish;
443         }
444
445         if (getpid_cached() == 1) {
446                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
447
448                 e = getenv("container");
449                 if (isempty(e)) {
450                         r = VIRTUALIZATION_NONE;
451                         goto finish;
452                 }
453
454                 goto translate_name;
455         }
456
457         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
458          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
459         r = read_one_line_file("/run/systemd/container", &m);
460         if (r >= 0) {
461                 e = m;
462                 goto translate_name;
463         }
464         if (r != -ENOENT)
465                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
466
467         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
468         r = getenv_for_pid(1, "container", &m);
469         if (r > 0) {
470                 e = m;
471                 goto translate_name;
472         }
473         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
474                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
475
476         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
477          * there is not 1, we know we are in a PID namespace. and hence a container. */
478         r = read_one_line_file("/proc/1/sched", &m);
479         if (r >= 0) {
480                 const char *t;
481
482                 t = strrchr(m, '(');
483                 if (!t)
484                         return -EIO;
485
486                 if (!startswith(t, "(1,")) {
487                         r = VIRTUALIZATION_CONTAINER_OTHER;
488                         goto finish;
489                 }
490         } else if (r != -ENOENT)
491                 return r;
492
493         /* If that didn't work, give up, assume no container manager. */
494         r = VIRTUALIZATION_NONE;
495         goto finish;
496
497 translate_name:
498         for (j = 0; j < ELEMENTSOF(value_table); j++)
499                 if (streq(e, value_table[j].value)) {
500                         r = value_table[j].id;
501                         goto finish;
502                 }
503
504         r = VIRTUALIZATION_CONTAINER_OTHER;
505
506 finish:
507         log_debug("Found container virtualization %s.", virtualization_to_string(r));
508         cached_found = r;
509         return r;
510 }
511
512 #if 0 /// UNNEEDED by elogind
513 int detect_virtualization(void) {
514         int r;
515
516         r = detect_container();
517         if (r == 0)
518                 r = detect_vm();
519
520         return r;
521 }
522
523 static int userns_has_mapping(const char *name) {
524         _cleanup_fclose_ FILE *f = NULL;
525         _cleanup_free_ char *buf = NULL;
526         size_t n_allocated = 0;
527         ssize_t n;
528         uint32_t a, b, c;
529         int r;
530
531         f = fopen(name, "re");
532         if (!f) {
533                 log_debug_errno(errno, "Failed to open %s: %m", name);
534                 return errno == ENOENT ? false : -errno;
535         }
536
537         n = getline(&buf, &n_allocated, f);
538         if (n < 0) {
539                 if (feof(f)) {
540                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
541                         return true;
542                 }
543
544                 return log_debug_errno(errno, "Failed to read %s: %m", name);
545         }
546
547         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
548         if (r < 3)
549                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
550
551         if (a == 0 && b == 0 && c == UINT32_MAX) {
552                 /* The kernel calls mappings_overlap() and does not allow overlaps */
553                 log_debug("%s has a full 1:1 mapping", name);
554                 return false;
555         }
556
557         /* Anything else implies that we are in a user namespace */
558         log_debug("Mapping found in %s, we're in a user namespace", name);
559         return true;
560 }
561
562 int running_in_userns(void) {
563         _cleanup_free_ char *line = NULL;
564         int r;
565
566         r = userns_has_mapping("/proc/self/uid_map");
567         if (r != 0)
568                 return r;
569
570         r = userns_has_mapping("/proc/self/gid_map");
571         if (r != 0)
572                 return r;
573
574         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
575          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
576          * also does not exist. We cannot distinguish those two cases, so assume that
577          * we're running on a stripped-down recent kernel, rather than on an old one,
578          * and if the file is not found, return false.
579          */
580         r = read_one_line_file("/proc/self/setgroups", &line);
581         if (r < 0) {
582                 log_debug_errno(r, "/proc/self/setgroups: %m");
583                 return r == -ENOENT ? false : r;
584         }
585
586         truncate_nl(line);
587         r = streq(line, "deny");
588         /* See user_namespaces(7) for a description of this "setgroups" contents. */
589         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
590         return r;
591 }
592 #endif // 0
593
594 int running_in_chroot(void) {
595         int r;
596
597 #if 0 /// elogind does not allow to ignore chroots, we are never init!
598         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
599                 return 0;
600 #endif // 0
601
602         r = files_same("/proc/1/root", "/", 0);
603         if (r < 0)
604                 return r;
605
606         return r == 0;
607 }
608
609 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
610         [VIRTUALIZATION_NONE] = "none",
611         [VIRTUALIZATION_KVM] = "kvm",
612         [VIRTUALIZATION_QEMU] = "qemu",
613         [VIRTUALIZATION_BOCHS] = "bochs",
614         [VIRTUALIZATION_XEN] = "xen",
615         [VIRTUALIZATION_UML] = "uml",
616         [VIRTUALIZATION_VMWARE] = "vmware",
617         [VIRTUALIZATION_ORACLE] = "oracle",
618         [VIRTUALIZATION_MICROSOFT] = "microsoft",
619         [VIRTUALIZATION_ZVM] = "zvm",
620         [VIRTUALIZATION_PARALLELS] = "parallels",
621         [VIRTUALIZATION_BHYVE] = "bhyve",
622         [VIRTUALIZATION_QNX] = "qnx",
623         [VIRTUALIZATION_VM_OTHER] = "vm-other",
624
625         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
626         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
627         [VIRTUALIZATION_LXC] = "lxc",
628         [VIRTUALIZATION_OPENVZ] = "openvz",
629         [VIRTUALIZATION_DOCKER] = "docker",
630         [VIRTUALIZATION_RKT] = "rkt",
631         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
632 };
633
634 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);