chiark / gitweb /
tree-wide: remove Lennart's copyright lines
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if defined(__i386__) || defined(__x86_64__)
4 //#include <cpuid.h>
5 #endif
6 #include <errno.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11
12 #include "alloc-util.h"
13 #include "dirent-util.h"
14 #include "env-util.h"
15 #include "fd-util.h"
16 #include "fileio.h"
17 #include "macro.h"
18 #include "process-util.h"
19 #include "stat-util.h"
20 #include "string-table.h"
21 #include "string-util.h"
22 #include "virt.h"
23
24 #if 0 /// UNNEEDED by elogind
25 static int detect_vm_cpuid(void) {
26
27         /* CPUID is an x86 specific interface. */
28 #if defined(__i386__) || defined(__x86_64__)
29
30         static const struct {
31                 const char *cpuid;
32                 int id;
33         } cpuid_vendor_table[] = {
34                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
35                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
36                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
37                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
38                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
39                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
40                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
41                 /* https://wiki.freebsd.org/bhyve */
42                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
43                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
44         };
45
46         uint32_t eax, ebx, ecx, edx;
47         bool hypervisor;
48
49         /* http://lwn.net/Articles/301888/ */
50
51         /* First detect whether there is a hypervisor */
52         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
53                 return VIRTUALIZATION_NONE;
54
55         hypervisor = ecx & 0x80000000U;
56
57         if (hypervisor) {
58                 union {
59                         uint32_t sig32[3];
60                         char text[13];
61                 } sig = {};
62                 unsigned j;
63
64                 /* There is a hypervisor, see what it is */
65                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
66
67                 sig.sig32[0] = ebx;
68                 sig.sig32[1] = ecx;
69                 sig.sig32[2] = edx;
70
71                 log_debug("Virtualization found, CPUID=%s", sig.text);
72
73                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
74                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
75                                 return cpuid_vendor_table[j].id;
76
77                 return VIRTUALIZATION_VM_OTHER;
78         }
79 #endif
80         log_debug("No virtualization found in CPUID");
81
82         return VIRTUALIZATION_NONE;
83 }
84
85 static int detect_vm_device_tree(void) {
86 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
87         _cleanup_free_ char *hvtype = NULL;
88         int r;
89
90         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
91         if (r == -ENOENT) {
92                 _cleanup_closedir_ DIR *dir = NULL;
93                 struct dirent *dent;
94
95                 dir = opendir("/proc/device-tree");
96                 if (!dir) {
97                         if (errno == ENOENT) {
98                                 log_debug_errno(errno, "/proc/device-tree: %m");
99                                 return VIRTUALIZATION_NONE;
100                         }
101                         return -errno;
102                 }
103
104                 FOREACH_DIRENT(dent, dir, return -errno)
105                         if (strstr(dent->d_name, "fw-cfg")) {
106                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
107                                 return VIRTUALIZATION_QEMU;
108                         }
109
110                 log_debug("No virtualization found in /proc/device-tree/*");
111                 return VIRTUALIZATION_NONE;
112         } else if (r < 0)
113                 return r;
114
115         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
116         if (streq(hvtype, "linux,kvm"))
117                 return VIRTUALIZATION_KVM;
118         else if (strstr(hvtype, "xen"))
119                 return VIRTUALIZATION_XEN;
120         else
121                 return VIRTUALIZATION_VM_OTHER;
122 #else
123         log_debug("This platform does not support /proc/device-tree");
124         return VIRTUALIZATION_NONE;
125 #endif
126 }
127
128 static int detect_vm_dmi(void) {
129 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
130
131         static const char *const dmi_vendors[] = {
132                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
133                 "/sys/class/dmi/id/sys_vendor",
134                 "/sys/class/dmi/id/board_vendor",
135                 "/sys/class/dmi/id/bios_vendor"
136         };
137
138         static const struct {
139                 const char *vendor;
140                 int id;
141         } dmi_vendor_table[] = {
142                 { "KVM",           VIRTUALIZATION_KVM       },
143                 { "QEMU",          VIRTUALIZATION_QEMU      },
144                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
145                 { "VMware",        VIRTUALIZATION_VMWARE    },
146                 { "VMW",           VIRTUALIZATION_VMWARE    },
147                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
148                 { "Xen",           VIRTUALIZATION_XEN       },
149                 { "Bochs",         VIRTUALIZATION_BOCHS     },
150                 { "Parallels",     VIRTUALIZATION_PARALLELS },
151                 /* https://wiki.freebsd.org/bhyve */
152                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
153         };
154         unsigned i;
155         int r;
156
157         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
158                 _cleanup_free_ char *s = NULL;
159                 unsigned j;
160
161                 r = read_one_line_file(dmi_vendors[i], &s);
162                 if (r < 0) {
163                         if (r == -ENOENT)
164                                 continue;
165
166                         return r;
167                 }
168
169                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
170                         if (startswith(s, dmi_vendor_table[j].vendor)) {
171                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
172                                 return dmi_vendor_table[j].id;
173                         }
174         }
175 #endif
176
177         log_debug("No virtualization found in DMI");
178
179         return VIRTUALIZATION_NONE;
180 }
181
182 static int detect_vm_xen(void) {
183
184         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
185            The presence of /proc/xen indicates some form of a Xen domain */
186         if (access("/proc/xen", F_OK) < 0) {
187                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
188                 return VIRTUALIZATION_NONE;
189         }
190
191         log_debug("Virtualization XEN found (/proc/xen exists)");
192         return VIRTUALIZATION_XEN;
193 }
194
195 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
196 #define PATH_FEATURES "/sys/hypervisor/properties/features"
197 /* Returns -errno, or 0 for domU, or 1 for dom0 */
198 static int detect_vm_xen_dom0(void) {
199         _cleanup_free_ char *domcap = NULL;
200         char *cap, *i;
201         int r;
202
203         r = read_one_line_file(PATH_FEATURES, &domcap);
204         if (r < 0 && r != -ENOENT)
205                 return r;
206         if (r == 0) {
207                 unsigned long features;
208
209                 /* Here, we need to use sscanf() instead of safe_atoul()
210                  * as the string lacks the leading "0x". */
211                 r = sscanf(domcap, "%lx", &features);
212                 if (r == 1) {
213                         r = !!(features & (1U << XENFEAT_dom0));
214                         log_debug("Virtualization XEN, found %s with value %08lx, "
215                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
216                                   PATH_FEATURES, features, r ? "" : " not");
217                         return r;
218                 }
219                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
220                           PATH_FEATURES, domcap);
221         }
222
223         r = read_one_line_file("/proc/xen/capabilities", &domcap);
224         if (r == -ENOENT) {
225                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
226                 return 0;
227         }
228         if (r < 0)
229                 return r;
230
231         i = domcap;
232         while ((cap = strsep(&i, ",")))
233                 if (streq(cap, "control_d"))
234                         break;
235         if (!cap) {
236                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
237                 return 0;
238         }
239
240         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
241         return 1;
242 }
243
244 static int detect_vm_hypervisor(void) {
245         _cleanup_free_ char *hvtype = NULL;
246         int r;
247
248         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
249         if (r == -ENOENT)
250                 return VIRTUALIZATION_NONE;
251         if (r < 0)
252                 return r;
253
254         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
255
256         if (streq(hvtype, "xen"))
257                 return VIRTUALIZATION_XEN;
258         else
259                 return VIRTUALIZATION_VM_OTHER;
260 }
261
262 static int detect_vm_uml(void) {
263         _cleanup_free_ char *cpuinfo_contents = NULL;
264         int r;
265
266         /* Detect User-Mode Linux by reading /proc/cpuinfo */
267         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
268         if (r == -ENOENT) {
269                 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
270                 return VIRTUALIZATION_NONE;
271         }
272         if (r < 0)
273                 return r;
274
275         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
276                 log_debug("UML virtualization found in /proc/cpuinfo");
277                 return VIRTUALIZATION_UML;
278         }
279
280         log_debug("UML virtualization not found in /proc/cpuinfo.");
281         return VIRTUALIZATION_NONE;
282 }
283
284 static int detect_vm_zvm(void) {
285
286 #if defined(__s390__)
287         _cleanup_free_ char *t = NULL;
288         int r;
289
290         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
291         if (r == -ENOENT)
292                 return VIRTUALIZATION_NONE;
293         if (r < 0)
294                 return r;
295
296         log_debug("Virtualization %s found in /proc/sysinfo", t);
297         if (streq(t, "z/VM"))
298                 return VIRTUALIZATION_ZVM;
299         else
300                 return VIRTUALIZATION_KVM;
301 #else
302         log_debug("This platform does not support /proc/sysinfo");
303         return VIRTUALIZATION_NONE;
304 #endif
305 }
306
307 /* Returns a short identifier for the various VM implementations */
308 int detect_vm(void) {
309         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
310         bool other = false;
311         int r, dmi;
312
313         if (cached_found >= 0)
314                 return cached_found;
315
316         /* We have to use the correct order here:
317          *
318          * â†’ First, try to detect Oracle Virtualbox, even if it uses KVM, as well as Xen even if it cloaks as Microsoft
319          *   Hyper-V.
320          *
321          * â†’ Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
322          *   overwritten.
323          *
324          * â†’ Third, try to detect from DMI. */
325
326         dmi = detect_vm_dmi();
327         if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN)) {
328                 r = dmi;
329                 goto finish;
330         }
331
332         r = detect_vm_cpuid();
333         if (r < 0)
334                 return r;
335         if (r == VIRTUALIZATION_VM_OTHER)
336                 other = true;
337         else if (r != VIRTUALIZATION_NONE)
338                 goto finish;
339
340         /* Now, let's get back to DMI */
341         if (dmi < 0)
342                 return dmi;
343         if (dmi == VIRTUALIZATION_VM_OTHER)
344                 other = true;
345         else if (dmi != VIRTUALIZATION_NONE) {
346                 r = dmi;
347                 goto finish;
348         }
349
350         /* x86 xen will most likely be detected by cpuid. If not (most likely
351          * because we're not an x86 guest), then we should try the /proc/xen
352          * directory next. If that's not found, then we check for the high-level
353          * hypervisor sysfs file.
354          */
355
356         r = detect_vm_xen();
357         if (r < 0)
358                 return r;
359         if (r == VIRTUALIZATION_VM_OTHER)
360                 other = true;
361         else if (r != VIRTUALIZATION_NONE)
362                 goto finish;
363
364         r = detect_vm_hypervisor();
365         if (r < 0)
366                 return r;
367         if (r == VIRTUALIZATION_VM_OTHER)
368                 other = true;
369         else if (r != VIRTUALIZATION_NONE)
370                 goto finish;
371
372         r = detect_vm_device_tree();
373         if (r < 0)
374                 return r;
375         if (r == VIRTUALIZATION_VM_OTHER)
376                 other = true;
377         else if (r != VIRTUALIZATION_NONE)
378                 goto finish;
379
380         r = detect_vm_uml();
381         if (r < 0)
382                 return r;
383         if (r == VIRTUALIZATION_VM_OTHER)
384                 other = true;
385         else if (r != VIRTUALIZATION_NONE)
386                 goto finish;
387
388         r = detect_vm_zvm();
389         if (r < 0)
390                 return r;
391
392 finish:
393         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
394          * In order to detect the Dom0 as not virtualization we need to
395          * double-check it */
396         if (r == VIRTUALIZATION_XEN) {
397                 int dom0;
398
399                 dom0 = detect_vm_xen_dom0();
400                 if (dom0 < 0)
401                         return dom0;
402                 if (dom0 > 0)
403                         r = VIRTUALIZATION_NONE;
404         } else if (r == VIRTUALIZATION_NONE && other)
405                 r = VIRTUALIZATION_VM_OTHER;
406
407         cached_found = r;
408         log_debug("Found VM virtualization %s", virtualization_to_string(r));
409         return r;
410 }
411 #endif // 0
412
413 int detect_container(void) {
414
415         static const struct {
416                 const char *value;
417                 int id;
418         } value_table[] = {
419                 { "lxc",            VIRTUALIZATION_LXC            },
420                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
421                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
422                 { "docker",         VIRTUALIZATION_DOCKER         },
423                 { "rkt",            VIRTUALIZATION_RKT            },
424         };
425
426         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
427         _cleanup_free_ char *m = NULL;
428         const char *e = NULL;
429         unsigned j;
430         int r;
431
432         if (cached_found >= 0)
433                 return cached_found;
434
435         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
436         if (access("/proc/vz", F_OK) >= 0 &&
437             access("/proc/bc", F_OK) < 0) {
438                 r = VIRTUALIZATION_OPENVZ;
439                 goto finish;
440         }
441
442         if (getpid_cached() == 1) {
443                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
444
445                 e = getenv("container");
446                 if (isempty(e)) {
447                         r = VIRTUALIZATION_NONE;
448                         goto finish;
449                 }
450
451                 goto translate_name;
452         }
453
454         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
455          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
456         r = read_one_line_file("/run/systemd/container", &m);
457         if (r >= 0) {
458                 e = m;
459                 goto translate_name;
460         }
461         if (r != -ENOENT)
462                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
463
464         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
465         r = getenv_for_pid(1, "container", &m);
466         if (r > 0) {
467                 e = m;
468                 goto translate_name;
469         }
470         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
471                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
472
473         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
474          * there is not 1, we know we are in a PID namespace. and hence a container. */
475         r = read_one_line_file("/proc/1/sched", &m);
476         if (r >= 0) {
477                 const char *t;
478
479                 t = strrchr(m, '(');
480                 if (!t)
481                         return -EIO;
482
483                 if (!startswith(t, "(1,")) {
484                         r = VIRTUALIZATION_CONTAINER_OTHER;
485                         goto finish;
486                 }
487         } else if (r != -ENOENT)
488                 return r;
489
490         /* If that didn't work, give up, assume no container manager. */
491         r = VIRTUALIZATION_NONE;
492         goto finish;
493
494 translate_name:
495         for (j = 0; j < ELEMENTSOF(value_table); j++)
496                 if (streq(e, value_table[j].value)) {
497                         r = value_table[j].id;
498                         goto finish;
499                 }
500
501         r = VIRTUALIZATION_CONTAINER_OTHER;
502
503 finish:
504         log_debug("Found container virtualization %s.", virtualization_to_string(r));
505         cached_found = r;
506         return r;
507 }
508
509 #if 0 /// UNNEEDED by elogind
510 int detect_virtualization(void) {
511         int r;
512
513         r = detect_container();
514         if (r == 0)
515                 r = detect_vm();
516
517         return r;
518 }
519
520 static int userns_has_mapping(const char *name) {
521         _cleanup_fclose_ FILE *f = NULL;
522         _cleanup_free_ char *buf = NULL;
523         size_t n_allocated = 0;
524         ssize_t n;
525         uint32_t a, b, c;
526         int r;
527
528         f = fopen(name, "re");
529         if (!f) {
530                 log_debug_errno(errno, "Failed to open %s: %m", name);
531                 return errno == ENOENT ? false : -errno;
532         }
533
534         n = getline(&buf, &n_allocated, f);
535         if (n < 0) {
536                 if (feof(f)) {
537                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
538                         return true;
539                 }
540
541                 return log_debug_errno(errno, "Failed to read %s: %m", name);
542         }
543
544         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
545         if (r < 3)
546                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
547
548         if (a == 0 && b == 0 && c == UINT32_MAX) {
549                 /* The kernel calls mappings_overlap() and does not allow overlaps */
550                 log_debug("%s has a full 1:1 mapping", name);
551                 return false;
552         }
553
554         /* Anything else implies that we are in a user namespace */
555         log_debug("Mapping found in %s, we're in a user namespace", name);
556         return true;
557 }
558
559 int running_in_userns(void) {
560         _cleanup_free_ char *line = NULL;
561         int r;
562
563         r = userns_has_mapping("/proc/self/uid_map");
564         if (r != 0)
565                 return r;
566
567         r = userns_has_mapping("/proc/self/gid_map");
568         if (r != 0)
569                 return r;
570
571         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
572          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
573          * also does not exist. We cannot distinguish those two cases, so assume that
574          * we're running on a stripped-down recent kernel, rather than on an old one,
575          * and if the file is not found, return false.
576          */
577         r = read_one_line_file("/proc/self/setgroups", &line);
578         if (r < 0) {
579                 log_debug_errno(r, "/proc/self/setgroups: %m");
580                 return r == -ENOENT ? false : r;
581         }
582
583         truncate_nl(line);
584         r = streq(line, "deny");
585         /* See user_namespaces(7) for a description of this "setgroups" contents. */
586         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
587         return r;
588 }
589 #endif // 0
590
591 int running_in_chroot(void) {
592         int r;
593
594 #if 0 /// elogind does not allow to ignore chroots, we are never init!
595         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
596                 return 0;
597 #endif // 0
598
599         r = files_same("/proc/1/root", "/", 0);
600         if (r < 0)
601                 return r;
602
603         return r == 0;
604 }
605
606 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
607         [VIRTUALIZATION_NONE] = "none",
608         [VIRTUALIZATION_KVM] = "kvm",
609         [VIRTUALIZATION_QEMU] = "qemu",
610         [VIRTUALIZATION_BOCHS] = "bochs",
611         [VIRTUALIZATION_XEN] = "xen",
612         [VIRTUALIZATION_UML] = "uml",
613         [VIRTUALIZATION_VMWARE] = "vmware",
614         [VIRTUALIZATION_ORACLE] = "oracle",
615         [VIRTUALIZATION_MICROSOFT] = "microsoft",
616         [VIRTUALIZATION_ZVM] = "zvm",
617         [VIRTUALIZATION_PARALLELS] = "parallels",
618         [VIRTUALIZATION_BHYVE] = "bhyve",
619         [VIRTUALIZATION_QNX] = "qnx",
620         [VIRTUALIZATION_VM_OTHER] = "vm-other",
621
622         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
623         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
624         [VIRTUALIZATION_LXC] = "lxc",
625         [VIRTUALIZATION_OPENVZ] = "openvz",
626         [VIRTUALIZATION_DOCKER] = "docker",
627         [VIRTUALIZATION_RKT] = "rkt",
628         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
629 };
630
631 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);