chiark / gitweb /
Fix parsing of features in detect_vm_xen_dom0 (#7890)
[elogind.git] / src / basic / virt.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3   This file is part of systemd.
4
5   Copyright 2011 Lennart Poettering
6
7   systemd is free software; you can redistribute it and/or modify it
8   under the terms of the GNU Lesser General Public License as published by
9   the Free Software Foundation; either version 2.1 of the License, or
10   (at your option) any later version.
11
12   systemd is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public License
18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #if defined(__i386__) || defined(__x86_64__)
22 //#include <cpuid.h>
23 #endif
24 #include <errno.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29
30 #include "alloc-util.h"
31 #include "dirent-util.h"
32 #include "env-util.h"
33 #include "fd-util.h"
34 #include "fileio.h"
35 #include "macro.h"
36 #include "process-util.h"
37 #include "stat-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "virt.h"
41
42 #if 0 /// UNNEEDED by elogind
43 static int detect_vm_cpuid(void) {
44
45         /* CPUID is an x86 specific interface. */
46 #if defined(__i386__) || defined(__x86_64__)
47
48         static const struct {
49                 const char *cpuid;
50                 int id;
51         } cpuid_vendor_table[] = {
52                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
53                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
54                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
55                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
56                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
57                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
58                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
59                 /* https://wiki.freebsd.org/bhyve */
60                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
61         };
62
63         uint32_t eax, ebx, ecx, edx;
64         bool hypervisor;
65
66         /* http://lwn.net/Articles/301888/ */
67
68         /* First detect whether there is a hypervisor */
69         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
70                 return VIRTUALIZATION_NONE;
71
72         hypervisor = !!(ecx & 0x80000000U);
73
74         if (hypervisor) {
75                 union {
76                         uint32_t sig32[3];
77                         char text[13];
78                 } sig = {};
79                 unsigned j;
80
81                 /* There is a hypervisor, see what it is */
82                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
83
84                 sig.sig32[0] = ebx;
85                 sig.sig32[1] = ecx;
86                 sig.sig32[2] = edx;
87
88                 log_debug("Virtualization found, CPUID=%s", sig.text);
89
90                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
91                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
92                                 return cpuid_vendor_table[j].id;
93
94                 return VIRTUALIZATION_VM_OTHER;
95         }
96 #endif
97         log_debug("No virtualization found in CPUID");
98
99         return VIRTUALIZATION_NONE;
100 }
101
102 static int detect_vm_device_tree(void) {
103 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
104         _cleanup_free_ char *hvtype = NULL;
105         int r;
106
107         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
108         if (r == -ENOENT) {
109                 _cleanup_closedir_ DIR *dir = NULL;
110                 struct dirent *dent;
111
112                 dir = opendir("/proc/device-tree");
113                 if (!dir) {
114                         if (errno == ENOENT) {
115                                 log_debug_errno(errno, "/proc/device-tree: %m");
116                                 return VIRTUALIZATION_NONE;
117                         }
118                         return -errno;
119                 }
120
121                 FOREACH_DIRENT(dent, dir, return -errno)
122                         if (strstr(dent->d_name, "fw-cfg")) {
123                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
124                                 return VIRTUALIZATION_QEMU;
125                         }
126
127                 log_debug("No virtualization found in /proc/device-tree/*");
128                 return VIRTUALIZATION_NONE;
129         } else if (r < 0)
130                 return r;
131
132         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
133         if (streq(hvtype, "linux,kvm"))
134                 return VIRTUALIZATION_KVM;
135         else if (strstr(hvtype, "xen"))
136                 return VIRTUALIZATION_XEN;
137         else
138                 return VIRTUALIZATION_VM_OTHER;
139 #else
140         log_debug("This platform does not support /proc/device-tree");
141         return VIRTUALIZATION_NONE;
142 #endif
143 }
144
145 static int detect_vm_dmi(void) {
146 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
147
148         static const char *const dmi_vendors[] = {
149                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
150                 "/sys/class/dmi/id/sys_vendor",
151                 "/sys/class/dmi/id/board_vendor",
152                 "/sys/class/dmi/id/bios_vendor"
153         };
154
155         static const struct {
156                 const char *vendor;
157                 int id;
158         } dmi_vendor_table[] = {
159                 { "KVM",           VIRTUALIZATION_KVM       },
160                 { "QEMU",          VIRTUALIZATION_QEMU      },
161                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
162                 { "VMware",        VIRTUALIZATION_VMWARE    },
163                 { "VMW",           VIRTUALIZATION_VMWARE    },
164                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
165                 { "Xen",           VIRTUALIZATION_XEN       },
166                 { "Bochs",         VIRTUALIZATION_BOCHS     },
167                 { "Parallels",     VIRTUALIZATION_PARALLELS },
168                 /* https://wiki.freebsd.org/bhyve */
169                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
170         };
171         unsigned i;
172         int r;
173
174         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
175                 _cleanup_free_ char *s = NULL;
176                 unsigned j;
177
178                 r = read_one_line_file(dmi_vendors[i], &s);
179                 if (r < 0) {
180                         if (r == -ENOENT)
181                                 continue;
182
183                         return r;
184                 }
185
186                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
187                         if (startswith(s, dmi_vendor_table[j].vendor)) {
188                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
189                                 return dmi_vendor_table[j].id;
190                         }
191         }
192 #endif
193
194         log_debug("No virtualization found in DMI");
195
196         return VIRTUALIZATION_NONE;
197 }
198
199 static int detect_vm_xen(void) {
200
201         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
202            The presence of /proc/xen indicates some form of a Xen domain */
203         if (access("/proc/xen", F_OK) < 0) {
204                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
205                 return VIRTUALIZATION_NONE;
206         }
207
208         log_debug("Virtualization XEN found (/proc/xen exists)");
209         return VIRTUALIZATION_XEN;
210 }
211
212 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
213 #define PATH_FEATURES "/sys/hypervisor/properties/features"
214 /* Returns -errno, or 0 for domU, or 1 for dom0 */
215 static int detect_vm_xen_dom0(void) {
216         _cleanup_free_ char *domcap = NULL;
217         char *cap, *i;
218         int r;
219
220         r = read_one_line_file(PATH_FEATURES, &domcap);
221         if (r < 0 && r != -ENOENT)
222                 return r;
223         if (r == 0) {
224                 unsigned long features;
225
226                 r = sscanf(domcap, "%lx", &features);
227                 if (r == 1) {
228                         r = !!(features & (1U << XENFEAT_dom0));
229                         log_debug("Virtualization XEN, found %s with value %08lx, "
230                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
231                                   PATH_FEATURES, features, r ? "" : " not");
232                         return r;
233                 }
234                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
235                           PATH_FEATURES, domcap);
236         }
237
238         r = read_one_line_file("/proc/xen/capabilities", &domcap);
239         if (r == -ENOENT) {
240                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
241                 return 0;
242         }
243         if (r < 0)
244                 return r;
245
246         i = domcap;
247         while ((cap = strsep(&i, ",")))
248                 if (streq(cap, "control_d"))
249                         break;
250         if (!cap) {
251                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
252                 return 0;
253         }
254
255         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
256         return 1;
257 }
258
259 static int detect_vm_hypervisor(void) {
260         _cleanup_free_ char *hvtype = NULL;
261         int r;
262
263         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
264         if (r == -ENOENT)
265                 return VIRTUALIZATION_NONE;
266         if (r < 0)
267                 return r;
268
269         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
270
271         if (streq(hvtype, "xen"))
272                 return VIRTUALIZATION_XEN;
273         else
274                 return VIRTUALIZATION_VM_OTHER;
275 }
276
277 static int detect_vm_uml(void) {
278         _cleanup_free_ char *cpuinfo_contents = NULL;
279         int r;
280
281         /* Detect User-Mode Linux by reading /proc/cpuinfo */
282         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
283         if (r < 0)
284                 return r;
285
286         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
287                 log_debug("UML virtualization found in /proc/cpuinfo");
288                 return VIRTUALIZATION_UML;
289         }
290
291         log_debug("No virtualization found in /proc/cpuinfo.");
292         return VIRTUALIZATION_NONE;
293 }
294
295 static int detect_vm_zvm(void) {
296
297 #if defined(__s390__)
298         _cleanup_free_ char *t = NULL;
299         int r;
300
301         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
302         if (r == -ENOENT)
303                 return VIRTUALIZATION_NONE;
304         if (r < 0)
305                 return r;
306
307         log_debug("Virtualization %s found in /proc/sysinfo", t);
308         if (streq(t, "z/VM"))
309                 return VIRTUALIZATION_ZVM;
310         else
311                 return VIRTUALIZATION_KVM;
312 #else
313         log_debug("This platform does not support /proc/sysinfo");
314         return VIRTUALIZATION_NONE;
315 #endif
316 }
317
318 /* Returns a short identifier for the various VM implementations */
319 int detect_vm(void) {
320         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
321         int r, dmi;
322         bool other = false;
323
324         if (cached_found >= 0)
325                 return cached_found;
326
327         /* We have to use the correct order here:
328          *
329          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
330          * -> Second try to detect from cpuid, this will report KVM for
331          *    whatever software is used even if info in dmi is overwritten.
332          * -> Third try to detect from dmi. */
333
334         dmi = detect_vm_dmi();
335         if (dmi == VIRTUALIZATION_ORACLE) {
336                 r = dmi;
337                 goto finish;
338         }
339
340         r = detect_vm_cpuid();
341         if (r < 0)
342                 return r;
343         if (r != VIRTUALIZATION_NONE) {
344                 if (r == VIRTUALIZATION_VM_OTHER)
345                         other = true;
346                 else
347                         goto finish;
348         }
349
350         r = dmi;
351         if (r < 0)
352                 return r;
353         if (r != VIRTUALIZATION_NONE) {
354                 if (r == VIRTUALIZATION_VM_OTHER)
355                         other = true;
356                 else
357                         goto finish;
358         }
359
360         /* x86 xen will most likely be detected by cpuid. If not (most likely
361          * because we're not an x86 guest), then we should try the /proc/xen
362          * directory next. If that's not found, then we check for the high-level
363          * hypervisor sysfs file.
364          */
365
366         r = detect_vm_xen();
367         if (r < 0)
368                 return r;
369         if (r != VIRTUALIZATION_NONE) {
370                 if (r == VIRTUALIZATION_VM_OTHER)
371                         other = true;
372                 else
373                         goto finish;
374         }
375
376         r = detect_vm_hypervisor();
377         if (r < 0)
378                 return r;
379         if (r != VIRTUALIZATION_NONE) {
380                 if (r == VIRTUALIZATION_VM_OTHER)
381                         other = true;
382                 else
383                         goto finish;
384         }
385
386         r = detect_vm_device_tree();
387         if (r < 0)
388                 return r;
389         if (r != VIRTUALIZATION_NONE) {
390                 if (r == VIRTUALIZATION_VM_OTHER)
391                         other = true;
392                 else
393                         goto finish;
394         }
395
396         r = detect_vm_uml();
397         if (r < 0)
398                 return r;
399         if (r != VIRTUALIZATION_NONE) {
400                 if (r == VIRTUALIZATION_VM_OTHER)
401                         other = true;
402                 else
403                         goto finish;
404         }
405
406         r = detect_vm_zvm();
407         if (r < 0)
408                 return r;
409
410 finish:
411         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
412          * In order to detect the Dom0 as not virtualization we need to
413          * double-check it */
414         if (r == VIRTUALIZATION_XEN) {
415                 int ret = detect_vm_xen_dom0();
416                 if (ret < 0)
417                         return ret;
418                 if (ret > 0)
419                         r = VIRTUALIZATION_NONE;
420         } else if (r == VIRTUALIZATION_NONE && other)
421                 r = VIRTUALIZATION_VM_OTHER;
422
423         cached_found = r;
424         log_debug("Found VM virtualization %s", virtualization_to_string(r));
425         return r;
426 }
427 #endif // 0
428
429 int detect_container(void) {
430
431         static const struct {
432                 const char *value;
433                 int id;
434         } value_table[] = {
435                 { "lxc",            VIRTUALIZATION_LXC            },
436                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
437                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
438                 { "docker",         VIRTUALIZATION_DOCKER         },
439                 { "rkt",            VIRTUALIZATION_RKT            },
440         };
441
442         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
443         _cleanup_free_ char *m = NULL;
444         const char *e = NULL;
445         unsigned j;
446         int r;
447
448         if (cached_found >= 0)
449                 return cached_found;
450
451         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
452         if (access("/proc/vz", F_OK) >= 0 &&
453             access("/proc/bc", F_OK) < 0) {
454                 r = VIRTUALIZATION_OPENVZ;
455                 goto finish;
456         }
457
458         if (getpid_cached() == 1) {
459                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
460
461                 e = getenv("container");
462                 if (isempty(e)) {
463                         r = VIRTUALIZATION_NONE;
464                         goto finish;
465                 }
466
467                 goto translate_name;
468         }
469
470         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
471          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
472         r = read_one_line_file("/run/systemd/container", &m);
473         if (r >= 0) {
474                 e = m;
475                 goto translate_name;
476         }
477         if (r != -ENOENT)
478                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
479
480         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
481         r = getenv_for_pid(1, "container", &m);
482         if (r > 0) {
483                 e = m;
484                 goto translate_name;
485         }
486         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
487                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
488
489         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
490          * there is not 1, we know we are in a PID namespace. and hence a container. */
491         r = read_one_line_file("/proc/1/sched", &m);
492         if (r >= 0) {
493                 const char *t;
494
495                 t = strrchr(m, '(');
496                 if (!t)
497                         return -EIO;
498
499                 if (!startswith(t, "(1,")) {
500                         r = VIRTUALIZATION_CONTAINER_OTHER;
501                         goto finish;
502                 }
503         } else if (r != -ENOENT)
504                 return r;
505
506         /* If that didn't work, give up, assume no container manager. */
507         r = VIRTUALIZATION_NONE;
508         goto finish;
509
510 translate_name:
511         for (j = 0; j < ELEMENTSOF(value_table); j++)
512                 if (streq(e, value_table[j].value)) {
513                         r = value_table[j].id;
514                         goto finish;
515                 }
516
517         r = VIRTUALIZATION_CONTAINER_OTHER;
518
519 finish:
520         log_debug("Found container virtualization %s.", virtualization_to_string(r));
521         cached_found = r;
522         return r;
523 }
524
525 #if 0 /// UNNEEDED by elogind
526 int detect_virtualization(void) {
527         int r;
528
529         r = detect_container();
530         if (r == 0)
531                 r = detect_vm();
532
533         return r;
534 }
535
536 static int userns_has_mapping(const char *name) {
537         _cleanup_fclose_ FILE *f = NULL;
538         _cleanup_free_ char *buf = NULL;
539         size_t n_allocated = 0;
540         ssize_t n;
541         uint32_t a, b, c;
542         int r;
543
544         f = fopen(name, "re");
545         if (!f) {
546                 log_debug_errno(errno, "Failed to open %s: %m", name);
547                 return errno == ENOENT ? false : -errno;
548         }
549
550         n = getline(&buf, &n_allocated, f);
551         if (n < 0) {
552                 if (feof(f)) {
553                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
554                         return true;
555                 }
556
557                 return log_debug_errno(errno, "Failed to read %s: %m", name);
558         }
559
560         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
561         if (r < 3)
562                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
563
564         if (a == 0 && b == 0 && c == UINT32_MAX) {
565                 /* The kernel calls mappings_overlap() and does not allow overlaps */
566                 log_debug("%s has a full 1:1 mapping", name);
567                 return false;
568         }
569
570         /* Anything else implies that we are in a user namespace */
571         log_debug("Mapping found in %s, we're in a user namespace", name);
572         return true;
573 }
574
575 int running_in_userns(void) {
576         _cleanup_free_ char *line = NULL;
577         int r;
578
579         r = userns_has_mapping("/proc/self/uid_map");
580         if (r != 0)
581                 return r;
582
583         r = userns_has_mapping("/proc/self/gid_map");
584         if (r != 0)
585                 return r;
586
587         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
588          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
589          * also does not exist. We cannot distinguish those two cases, so assume that
590          * we're running on a stripped-down recent kernel, rather than on an old one,
591          * and if the file is not found, return false.
592          */
593         r = read_one_line_file("/proc/self/setgroups", &line);
594         if (r < 0) {
595                 log_debug_errno(r, "/proc/self/setgroups: %m");
596                 return r == -ENOENT ? false : r;
597         }
598
599         truncate_nl(line);
600         r = streq(line, "deny");
601         /* See user_namespaces(7) for a description of this "setgroups" contents. */
602         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
603         return r;
604 }
605 #endif // 0
606
607 int running_in_chroot(void) {
608         int ret;
609
610 #if 0 /// elogind does not allow to ignore chroots, we are never init!
611         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
612                 return 0;
613 #endif // 0
614
615         ret = files_same("/proc/1/root", "/", 0);
616         if (ret < 0)
617                 return ret;
618
619         return ret == 0;
620 }
621
622 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
623         [VIRTUALIZATION_NONE] = "none",
624         [VIRTUALIZATION_KVM] = "kvm",
625         [VIRTUALIZATION_QEMU] = "qemu",
626         [VIRTUALIZATION_BOCHS] = "bochs",
627         [VIRTUALIZATION_XEN] = "xen",
628         [VIRTUALIZATION_UML] = "uml",
629         [VIRTUALIZATION_VMWARE] = "vmware",
630         [VIRTUALIZATION_ORACLE] = "oracle",
631         [VIRTUALIZATION_MICROSOFT] = "microsoft",
632         [VIRTUALIZATION_ZVM] = "zvm",
633         [VIRTUALIZATION_PARALLELS] = "parallels",
634         [VIRTUALIZATION_BHYVE] = "bhyve",
635         [VIRTUALIZATION_VM_OTHER] = "vm-other",
636
637         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
638         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
639         [VIRTUALIZATION_LXC] = "lxc",
640         [VIRTUALIZATION_OPENVZ] = "openvz",
641         [VIRTUALIZATION_DOCKER] = "docker",
642         [VIRTUALIZATION_RKT] = "rkt",
643         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
644 };
645
646 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);