chiark / gitweb /
tree-wide: drop license boilerplate
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   This file is part of systemd.
4
5   Copyright 2011 Lennart Poettering
6 ***/
7
8 #if defined(__i386__) || defined(__x86_64__)
9 //#include <cpuid.h>
10 #endif
11 #include <errno.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <unistd.h>
16
17 #include "alloc-util.h"
18 #include "dirent-util.h"
19 #include "env-util.h"
20 #include "fd-util.h"
21 #include "fileio.h"
22 #include "macro.h"
23 #include "process-util.h"
24 #include "stat-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "virt.h"
28
29 #if 0 /// UNNEEDED by elogind
30 static int detect_vm_cpuid(void) {
31
32         /* CPUID is an x86 specific interface. */
33 #if defined(__i386__) || defined(__x86_64__)
34
35         static const struct {
36                 const char *cpuid;
37                 int id;
38         } cpuid_vendor_table[] = {
39                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
40                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
41                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
42                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
43                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
44                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
45                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
46                 /* https://wiki.freebsd.org/bhyve */
47                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
48                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
49         };
50
51         uint32_t eax, ebx, ecx, edx;
52         bool hypervisor;
53
54         /* http://lwn.net/Articles/301888/ */
55
56         /* First detect whether there is a hypervisor */
57         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
58                 return VIRTUALIZATION_NONE;
59
60         hypervisor = !!(ecx & 0x80000000U);
61
62         if (hypervisor) {
63                 union {
64                         uint32_t sig32[3];
65                         char text[13];
66                 } sig = {};
67                 unsigned j;
68
69                 /* There is a hypervisor, see what it is */
70                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
71
72                 sig.sig32[0] = ebx;
73                 sig.sig32[1] = ecx;
74                 sig.sig32[2] = edx;
75
76                 log_debug("Virtualization found, CPUID=%s", sig.text);
77
78                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
79                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
80                                 return cpuid_vendor_table[j].id;
81
82                 return VIRTUALIZATION_VM_OTHER;
83         }
84 #endif
85         log_debug("No virtualization found in CPUID");
86
87         return VIRTUALIZATION_NONE;
88 }
89
90 static int detect_vm_device_tree(void) {
91 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
92         _cleanup_free_ char *hvtype = NULL;
93         int r;
94
95         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
96         if (r == -ENOENT) {
97                 _cleanup_closedir_ DIR *dir = NULL;
98                 struct dirent *dent;
99
100                 dir = opendir("/proc/device-tree");
101                 if (!dir) {
102                         if (errno == ENOENT) {
103                                 log_debug_errno(errno, "/proc/device-tree: %m");
104                                 return VIRTUALIZATION_NONE;
105                         }
106                         return -errno;
107                 }
108
109                 FOREACH_DIRENT(dent, dir, return -errno)
110                         if (strstr(dent->d_name, "fw-cfg")) {
111                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
112                                 return VIRTUALIZATION_QEMU;
113                         }
114
115                 log_debug("No virtualization found in /proc/device-tree/*");
116                 return VIRTUALIZATION_NONE;
117         } else if (r < 0)
118                 return r;
119
120         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
121         if (streq(hvtype, "linux,kvm"))
122                 return VIRTUALIZATION_KVM;
123         else if (strstr(hvtype, "xen"))
124                 return VIRTUALIZATION_XEN;
125         else
126                 return VIRTUALIZATION_VM_OTHER;
127 #else
128         log_debug("This platform does not support /proc/device-tree");
129         return VIRTUALIZATION_NONE;
130 #endif
131 }
132
133 static int detect_vm_dmi(void) {
134 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
135
136         static const char *const dmi_vendors[] = {
137                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
138                 "/sys/class/dmi/id/sys_vendor",
139                 "/sys/class/dmi/id/board_vendor",
140                 "/sys/class/dmi/id/bios_vendor"
141         };
142
143         static const struct {
144                 const char *vendor;
145                 int id;
146         } dmi_vendor_table[] = {
147                 { "KVM",           VIRTUALIZATION_KVM       },
148                 { "QEMU",          VIRTUALIZATION_QEMU      },
149                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
150                 { "VMware",        VIRTUALIZATION_VMWARE    },
151                 { "VMW",           VIRTUALIZATION_VMWARE    },
152                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
153                 { "Xen",           VIRTUALIZATION_XEN       },
154                 { "Bochs",         VIRTUALIZATION_BOCHS     },
155                 { "Parallels",     VIRTUALIZATION_PARALLELS },
156                 /* https://wiki.freebsd.org/bhyve */
157                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
158         };
159         unsigned i;
160         int r;
161
162         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
163                 _cleanup_free_ char *s = NULL;
164                 unsigned j;
165
166                 r = read_one_line_file(dmi_vendors[i], &s);
167                 if (r < 0) {
168                         if (r == -ENOENT)
169                                 continue;
170
171                         return r;
172                 }
173
174                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
175                         if (startswith(s, dmi_vendor_table[j].vendor)) {
176                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
177                                 return dmi_vendor_table[j].id;
178                         }
179         }
180 #endif
181
182         log_debug("No virtualization found in DMI");
183
184         return VIRTUALIZATION_NONE;
185 }
186
187 static int detect_vm_xen(void) {
188
189         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
190            The presence of /proc/xen indicates some form of a Xen domain */
191         if (access("/proc/xen", F_OK) < 0) {
192                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
193                 return VIRTUALIZATION_NONE;
194         }
195
196         log_debug("Virtualization XEN found (/proc/xen exists)");
197         return VIRTUALIZATION_XEN;
198 }
199
200 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
201 #define PATH_FEATURES "/sys/hypervisor/properties/features"
202 /* Returns -errno, or 0 for domU, or 1 for dom0 */
203 static int detect_vm_xen_dom0(void) {
204         _cleanup_free_ char *domcap = NULL;
205         char *cap, *i;
206         int r;
207
208         r = read_one_line_file(PATH_FEATURES, &domcap);
209         if (r < 0 && r != -ENOENT)
210                 return r;
211         if (r == 0) {
212                 unsigned long features;
213
214                 /* Here, we need to use sscanf() instead of safe_atoul()
215                  * as the string lacks the leading "0x". */
216                 r = sscanf(domcap, "%lx", &features);
217                 if (r == 1) {
218                         r = !!(features & (1U << XENFEAT_dom0));
219                         log_debug("Virtualization XEN, found %s with value %08lx, "
220                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
221                                   PATH_FEATURES, features, r ? "" : " not");
222                         return r;
223                 }
224                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
225                           PATH_FEATURES, domcap);
226         }
227
228         r = read_one_line_file("/proc/xen/capabilities", &domcap);
229         if (r == -ENOENT) {
230                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
231                 return 0;
232         }
233         if (r < 0)
234                 return r;
235
236         i = domcap;
237         while ((cap = strsep(&i, ",")))
238                 if (streq(cap, "control_d"))
239                         break;
240         if (!cap) {
241                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
242                 return 0;
243         }
244
245         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
246         return 1;
247 }
248
249 static int detect_vm_hypervisor(void) {
250         _cleanup_free_ char *hvtype = NULL;
251         int r;
252
253         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
254         if (r == -ENOENT)
255                 return VIRTUALIZATION_NONE;
256         if (r < 0)
257                 return r;
258
259         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
260
261         if (streq(hvtype, "xen"))
262                 return VIRTUALIZATION_XEN;
263         else
264                 return VIRTUALIZATION_VM_OTHER;
265 }
266
267 static int detect_vm_uml(void) {
268         _cleanup_free_ char *cpuinfo_contents = NULL;
269         int r;
270
271         /* Detect User-Mode Linux by reading /proc/cpuinfo */
272         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
273         if (r == -ENOENT) {
274                 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
275                 return VIRTUALIZATION_NONE;
276         }
277         if (r < 0)
278                 return r;
279
280         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
281                 log_debug("UML virtualization found in /proc/cpuinfo");
282                 return VIRTUALIZATION_UML;
283         }
284
285         log_debug("UML virtualization not found in /proc/cpuinfo.");
286         return VIRTUALIZATION_NONE;
287 }
288
289 static int detect_vm_zvm(void) {
290
291 #if defined(__s390__)
292         _cleanup_free_ char *t = NULL;
293         int r;
294
295         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
296         if (r == -ENOENT)
297                 return VIRTUALIZATION_NONE;
298         if (r < 0)
299                 return r;
300
301         log_debug("Virtualization %s found in /proc/sysinfo", t);
302         if (streq(t, "z/VM"))
303                 return VIRTUALIZATION_ZVM;
304         else
305                 return VIRTUALIZATION_KVM;
306 #else
307         log_debug("This platform does not support /proc/sysinfo");
308         return VIRTUALIZATION_NONE;
309 #endif
310 }
311
312 /* Returns a short identifier for the various VM implementations */
313 int detect_vm(void) {
314         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
315         int r, dmi;
316         bool other = false;
317
318         if (cached_found >= 0)
319                 return cached_found;
320
321         /* We have to use the correct order here:
322          *
323          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
324          * -> Second try to detect from cpuid, this will report KVM for
325          *    whatever software is used even if info in dmi is overwritten.
326          * -> Third try to detect from dmi. */
327
328         dmi = detect_vm_dmi();
329         if (dmi == VIRTUALIZATION_ORACLE) {
330                 r = dmi;
331                 goto finish;
332         }
333
334         r = detect_vm_cpuid();
335         if (r < 0)
336                 return r;
337         if (r != VIRTUALIZATION_NONE) {
338                 if (r == VIRTUALIZATION_VM_OTHER)
339                         other = true;
340                 else
341                         goto finish;
342         }
343
344         r = dmi;
345         if (r < 0)
346                 return r;
347         if (r != VIRTUALIZATION_NONE) {
348                 if (r == VIRTUALIZATION_VM_OTHER)
349                         other = true;
350                 else
351                         goto finish;
352         }
353
354         /* x86 xen will most likely be detected by cpuid. If not (most likely
355          * because we're not an x86 guest), then we should try the /proc/xen
356          * directory next. If that's not found, then we check for the high-level
357          * hypervisor sysfs file.
358          */
359
360         r = detect_vm_xen();
361         if (r < 0)
362                 return r;
363         if (r != VIRTUALIZATION_NONE) {
364                 if (r == VIRTUALIZATION_VM_OTHER)
365                         other = true;
366                 else
367                         goto finish;
368         }
369
370         r = detect_vm_hypervisor();
371         if (r < 0)
372                 return r;
373         if (r != VIRTUALIZATION_NONE) {
374                 if (r == VIRTUALIZATION_VM_OTHER)
375                         other = true;
376                 else
377                         goto finish;
378         }
379
380         r = detect_vm_device_tree();
381         if (r < 0)
382                 return r;
383         if (r != VIRTUALIZATION_NONE) {
384                 if (r == VIRTUALIZATION_VM_OTHER)
385                         other = true;
386                 else
387                         goto finish;
388         }
389
390         r = detect_vm_uml();
391         if (r < 0)
392                 return r;
393         if (r != VIRTUALIZATION_NONE) {
394                 if (r == VIRTUALIZATION_VM_OTHER)
395                         other = true;
396                 else
397                         goto finish;
398         }
399
400         r = detect_vm_zvm();
401         if (r < 0)
402                 return r;
403
404 finish:
405         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
406          * In order to detect the Dom0 as not virtualization we need to
407          * double-check it */
408         if (r == VIRTUALIZATION_XEN) {
409                 int ret = detect_vm_xen_dom0();
410                 if (ret < 0)
411                         return ret;
412                 if (ret > 0)
413                         r = VIRTUALIZATION_NONE;
414         } else if (r == VIRTUALIZATION_NONE && other)
415                 r = VIRTUALIZATION_VM_OTHER;
416
417         cached_found = r;
418         log_debug("Found VM virtualization %s", virtualization_to_string(r));
419         return r;
420 }
421 #endif // 0
422
423 int detect_container(void) {
424
425         static const struct {
426                 const char *value;
427                 int id;
428         } value_table[] = {
429                 { "lxc",            VIRTUALIZATION_LXC            },
430                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
431                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
432                 { "docker",         VIRTUALIZATION_DOCKER         },
433                 { "rkt",            VIRTUALIZATION_RKT            },
434         };
435
436         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
437         _cleanup_free_ char *m = NULL;
438         const char *e = NULL;
439         unsigned j;
440         int r;
441
442         if (cached_found >= 0)
443                 return cached_found;
444
445         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
446         if (access("/proc/vz", F_OK) >= 0 &&
447             access("/proc/bc", F_OK) < 0) {
448                 r = VIRTUALIZATION_OPENVZ;
449                 goto finish;
450         }
451
452         if (getpid_cached() == 1) {
453                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
454
455                 e = getenv("container");
456                 if (isempty(e)) {
457                         r = VIRTUALIZATION_NONE;
458                         goto finish;
459                 }
460
461                 goto translate_name;
462         }
463
464         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
465          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
466         r = read_one_line_file("/run/systemd/container", &m);
467         if (r >= 0) {
468                 e = m;
469                 goto translate_name;
470         }
471         if (r != -ENOENT)
472                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
473
474         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
475         r = getenv_for_pid(1, "container", &m);
476         if (r > 0) {
477                 e = m;
478                 goto translate_name;
479         }
480         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
481                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
482
483         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
484          * there is not 1, we know we are in a PID namespace. and hence a container. */
485         r = read_one_line_file("/proc/1/sched", &m);
486         if (r >= 0) {
487                 const char *t;
488
489                 t = strrchr(m, '(');
490                 if (!t)
491                         return -EIO;
492
493                 if (!startswith(t, "(1,")) {
494                         r = VIRTUALIZATION_CONTAINER_OTHER;
495                         goto finish;
496                 }
497         } else if (r != -ENOENT)
498                 return r;
499
500         /* If that didn't work, give up, assume no container manager. */
501         r = VIRTUALIZATION_NONE;
502         goto finish;
503
504 translate_name:
505         for (j = 0; j < ELEMENTSOF(value_table); j++)
506                 if (streq(e, value_table[j].value)) {
507                         r = value_table[j].id;
508                         goto finish;
509                 }
510
511         r = VIRTUALIZATION_CONTAINER_OTHER;
512
513 finish:
514         log_debug("Found container virtualization %s.", virtualization_to_string(r));
515         cached_found = r;
516         return r;
517 }
518
519 #if 0 /// UNNEEDED by elogind
520 int detect_virtualization(void) {
521         int r;
522
523         r = detect_container();
524         if (r == 0)
525                 r = detect_vm();
526
527         return r;
528 }
529
530 static int userns_has_mapping(const char *name) {
531         _cleanup_fclose_ FILE *f = NULL;
532         _cleanup_free_ char *buf = NULL;
533         size_t n_allocated = 0;
534         ssize_t n;
535         uint32_t a, b, c;
536         int r;
537
538         f = fopen(name, "re");
539         if (!f) {
540                 log_debug_errno(errno, "Failed to open %s: %m", name);
541                 return errno == ENOENT ? false : -errno;
542         }
543
544         n = getline(&buf, &n_allocated, f);
545         if (n < 0) {
546                 if (feof(f)) {
547                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
548                         return true;
549                 }
550
551                 return log_debug_errno(errno, "Failed to read %s: %m", name);
552         }
553
554         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
555         if (r < 3)
556                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
557
558         if (a == 0 && b == 0 && c == UINT32_MAX) {
559                 /* The kernel calls mappings_overlap() and does not allow overlaps */
560                 log_debug("%s has a full 1:1 mapping", name);
561                 return false;
562         }
563
564         /* Anything else implies that we are in a user namespace */
565         log_debug("Mapping found in %s, we're in a user namespace", name);
566         return true;
567 }
568
569 int running_in_userns(void) {
570         _cleanup_free_ char *line = NULL;
571         int r;
572
573         r = userns_has_mapping("/proc/self/uid_map");
574         if (r != 0)
575                 return r;
576
577         r = userns_has_mapping("/proc/self/gid_map");
578         if (r != 0)
579                 return r;
580
581         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
582          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
583          * also does not exist. We cannot distinguish those two cases, so assume that
584          * we're running on a stripped-down recent kernel, rather than on an old one,
585          * and if the file is not found, return false.
586          */
587         r = read_one_line_file("/proc/self/setgroups", &line);
588         if (r < 0) {
589                 log_debug_errno(r, "/proc/self/setgroups: %m");
590                 return r == -ENOENT ? false : r;
591         }
592
593         truncate_nl(line);
594         r = streq(line, "deny");
595         /* See user_namespaces(7) for a description of this "setgroups" contents. */
596         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
597         return r;
598 }
599 #endif // 0
600
601 int running_in_chroot(void) {
602         int r;
603
604 #if 0 /// elogind does not allow to ignore chroots, we are never init!
605         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
606                 return 0;
607 #endif // 0
608
609         r = files_same("/proc/1/root", "/", 0);
610         if (r < 0)
611                 return r;
612
613         return r == 0;
614 }
615
616 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
617         [VIRTUALIZATION_NONE] = "none",
618         [VIRTUALIZATION_KVM] = "kvm",
619         [VIRTUALIZATION_QEMU] = "qemu",
620         [VIRTUALIZATION_BOCHS] = "bochs",
621         [VIRTUALIZATION_XEN] = "xen",
622         [VIRTUALIZATION_UML] = "uml",
623         [VIRTUALIZATION_VMWARE] = "vmware",
624         [VIRTUALIZATION_ORACLE] = "oracle",
625         [VIRTUALIZATION_MICROSOFT] = "microsoft",
626         [VIRTUALIZATION_ZVM] = "zvm",
627         [VIRTUALIZATION_PARALLELS] = "parallels",
628         [VIRTUALIZATION_BHYVE] = "bhyve",
629         [VIRTUALIZATION_QNX] = "qnx",
630         [VIRTUALIZATION_VM_OTHER] = "vm-other",
631
632         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
633         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
634         [VIRTUALIZATION_LXC] = "lxc",
635         [VIRTUALIZATION_OPENVZ] = "openvz",
636         [VIRTUALIZATION_DOCKER] = "docker",
637         [VIRTUALIZATION_RKT] = "rkt",
638         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
639 };
640
641 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);