chiark / gitweb /
virt: use /proc/xen as indicator for a Xen domain (#6442, #6662) (#7555)
[elogind.git] / src / basic / virt.c
1 /***
2   This file is part of systemd.
3
4   Copyright 2011 Lennart Poettering
5
6   systemd is free software; you can redistribute it and/or modify it
7   under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or
9   (at your option) any later version.
10
11   systemd is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "dirent-util.h"
28 #include "env-util.h"
29 #include "fd-util.h"
30 #include "fileio.h"
31 #include "macro.h"
32 #include "process-util.h"
33 #include "stat-util.h"
34 #include "string-table.h"
35 #include "string-util.h"
36 #include "virt.h"
37
38 #if 0 /// UNNEEDED by elogind
39 static int detect_vm_cpuid(void) {
40
41         /* CPUID is an x86 specific interface. */
42 #if defined(__i386__) || defined(__x86_64__)
43
44         static const struct {
45                 const char *cpuid;
46                 int id;
47         } cpuid_vendor_table[] = {
48                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
49                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
50                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
51                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
52                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
53                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
54                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
55                 /* https://wiki.freebsd.org/bhyve */
56                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
57         };
58
59         uint32_t eax, ecx;
60         bool hypervisor;
61
62         /* http://lwn.net/Articles/301888/ */
63
64 #if defined (__i386__)
65 #define REG_a "eax"
66 #define REG_b "ebx"
67 #elif defined (__amd64__)
68 #define REG_a "rax"
69 #define REG_b "rbx"
70 #endif
71
72         /* First detect whether there is a hypervisor */
73         eax = 1;
74         __asm__ __volatile__ (
75                 /* ebx/rbx is being used for PIC! */
76                 "  push %%"REG_b"         \n\t"
77                 "  cpuid                  \n\t"
78                 "  pop %%"REG_b"          \n\t"
79
80                 : "=a" (eax), "=c" (ecx)
81                 : "0" (eax)
82         );
83
84         hypervisor = !!(ecx & 0x80000000U);
85
86         if (hypervisor) {
87                 union {
88                         uint32_t sig32[3];
89                         char text[13];
90                 } sig = {};
91                 unsigned j;
92
93                 /* There is a hypervisor, see what it is */
94                 eax = 0x40000000U;
95                 __asm__ __volatile__ (
96                         /* ebx/rbx is being used for PIC! */
97                         "  push %%"REG_b"         \n\t"
98                         "  cpuid                  \n\t"
99                         "  mov %%ebx, %1          \n\t"
100                         "  pop %%"REG_b"          \n\t"
101
102                         : "=a" (eax), "=r" (sig.sig32[0]), "=c" (sig.sig32[1]), "=d" (sig.sig32[2])
103                         : "0" (eax)
104                 );
105
106                 log_debug("Virtualization found, CPUID=%s", sig.text);
107
108                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
109                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
110                                 return cpuid_vendor_table[j].id;
111
112                 return VIRTUALIZATION_VM_OTHER;
113         }
114 #endif
115         log_debug("No virtualization found in CPUID");
116
117         return VIRTUALIZATION_NONE;
118 }
119
120 static int detect_vm_device_tree(void) {
121 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
122         _cleanup_free_ char *hvtype = NULL;
123         int r;
124
125         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
126         if (r == -ENOENT) {
127                 _cleanup_closedir_ DIR *dir = NULL;
128                 struct dirent *dent;
129
130                 dir = opendir("/proc/device-tree");
131                 if (!dir) {
132                         if (errno == ENOENT) {
133                                 log_debug_errno(errno, "/proc/device-tree: %m");
134                                 return VIRTUALIZATION_NONE;
135                         }
136                         return -errno;
137                 }
138
139                 FOREACH_DIRENT(dent, dir, return -errno)
140                         if (strstr(dent->d_name, "fw-cfg")) {
141                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
142                                 return VIRTUALIZATION_QEMU;
143                         }
144
145                 log_debug("No virtualization found in /proc/device-tree/*");
146                 return VIRTUALIZATION_NONE;
147         } else if (r < 0)
148                 return r;
149
150         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
151         if (streq(hvtype, "linux,kvm"))
152                 return VIRTUALIZATION_KVM;
153         else if (strstr(hvtype, "xen"))
154                 return VIRTUALIZATION_XEN;
155         else
156                 return VIRTUALIZATION_VM_OTHER;
157 #else
158         log_debug("This platform does not support /proc/device-tree");
159         return VIRTUALIZATION_NONE;
160 #endif
161 }
162
163 static int detect_vm_dmi(void) {
164 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
165
166         static const char *const dmi_vendors[] = {
167                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
168                 "/sys/class/dmi/id/sys_vendor",
169                 "/sys/class/dmi/id/board_vendor",
170                 "/sys/class/dmi/id/bios_vendor"
171         };
172
173         static const struct {
174                 const char *vendor;
175                 int id;
176         } dmi_vendor_table[] = {
177                 { "KVM",           VIRTUALIZATION_KVM       },
178                 { "QEMU",          VIRTUALIZATION_QEMU      },
179                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
180                 { "VMware",        VIRTUALIZATION_VMWARE    },
181                 { "VMW",           VIRTUALIZATION_VMWARE    },
182                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
183                 { "Xen",           VIRTUALIZATION_XEN       },
184                 { "Bochs",         VIRTUALIZATION_BOCHS     },
185                 { "Parallels",     VIRTUALIZATION_PARALLELS },
186                 /* https://wiki.freebsd.org/bhyve */
187                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
188         };
189         unsigned i;
190         int r;
191
192         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
193                 _cleanup_free_ char *s = NULL;
194                 unsigned j;
195
196                 r = read_one_line_file(dmi_vendors[i], &s);
197                 if (r < 0) {
198                         if (r == -ENOENT)
199                                 continue;
200
201                         return r;
202                 }
203
204
205
206                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
207                         if (startswith(s, dmi_vendor_table[j].vendor)) {
208                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
209                                 return dmi_vendor_table[j].id;
210                         }
211         }
212 #endif
213
214         log_debug("No virtualization found in DMI");
215
216         return VIRTUALIZATION_NONE;
217 }
218
219 static int detect_vm_xen(void) {
220
221         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
222            The presence of /proc/xen indicates some form of a Xen domain */
223         if (access("/proc/xen", F_OK) < 0) {
224                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
225                 return VIRTUALIZATION_NONE;
226         }
227
228         log_debug("Virtualization XEN found (/proc/xen exists)");
229         return VIRTUALIZATION_XEN;
230 }
231
232 static bool detect_vm_xen_dom0(void) {
233         _cleanup_free_ char *domcap = NULL;
234         char *cap, *i;
235         int r;
236
237         r = read_one_line_file("/proc/xen/capabilities", &domcap);
238         if (r == -ENOENT) {
239                 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
240                 return false;
241         }
242         if (r < 0)
243                 return r;
244
245         i = domcap;
246         while ((cap = strsep(&i, ",")))
247                 if (streq(cap, "control_d"))
248                         break;
249         if (!cap) {
250                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
251                 return false;
252         }
253
254         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
255         return true;
256 }
257
258 static int detect_vm_hypervisor(void) {
259         _cleanup_free_ char *hvtype = NULL;
260         int r;
261
262         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
263         if (r == -ENOENT)
264                 return VIRTUALIZATION_NONE;
265         if (r < 0)
266                 return r;
267
268         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
269
270         if (streq(hvtype, "xen"))
271                 return VIRTUALIZATION_XEN;
272         else
273                 return VIRTUALIZATION_VM_OTHER;
274 }
275
276 static int detect_vm_uml(void) {
277         _cleanup_free_ char *cpuinfo_contents = NULL;
278         int r;
279
280         /* Detect User-Mode Linux by reading /proc/cpuinfo */
281         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
282         if (r < 0)
283                 return r;
284
285         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
286                 log_debug("UML virtualization found in /proc/cpuinfo");
287                 return VIRTUALIZATION_UML;
288         }
289
290         log_debug("No virtualization found in /proc/cpuinfo.");
291         return VIRTUALIZATION_NONE;
292 }
293
294 static int detect_vm_zvm(void) {
295
296 #if defined(__s390__)
297         _cleanup_free_ char *t = NULL;
298         int r;
299
300         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
301         if (r == -ENOENT)
302                 return VIRTUALIZATION_NONE;
303         if (r < 0)
304                 return r;
305
306         log_debug("Virtualization %s found in /proc/sysinfo", t);
307         if (streq(t, "z/VM"))
308                 return VIRTUALIZATION_ZVM;
309         else
310                 return VIRTUALIZATION_KVM;
311 #else
312         log_debug("This platform does not support /proc/sysinfo");
313         return VIRTUALIZATION_NONE;
314 #endif
315 }
316
317 /* Returns a short identifier for the various VM implementations */
318 int detect_vm(void) {
319         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
320         int r, dmi;
321         bool other = false;
322
323         if (cached_found >= 0)
324                 return cached_found;
325
326         /* We have to use the correct order here:
327          *
328          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
329          * -> Second try to detect from cpuid, this will report KVM for
330          *    whatever software is used even if info in dmi is overwritten.
331          * -> Third try to detect from dmi. */
332
333         dmi = detect_vm_dmi();
334         if (dmi == VIRTUALIZATION_ORACLE) {
335                 r = dmi;
336                 goto finish;
337         }
338
339         r = detect_vm_cpuid();
340         if (r < 0)
341                 return r;
342         if (r != VIRTUALIZATION_NONE) {
343                 if (r == VIRTUALIZATION_VM_OTHER)
344                         other = true;
345                 else
346                         goto finish;
347         }
348
349         r = dmi;
350         if (r < 0)
351                 return r;
352         if (r != VIRTUALIZATION_NONE) {
353                 if (r == VIRTUALIZATION_VM_OTHER)
354                         other = true;
355                 else
356                         goto finish;
357         }
358
359         /* x86 xen will most likely be detected by cpuid. If not (most likely
360          * because we're not an x86 guest), then we should try the /proc/xen
361          * directory next. If that's not found, then we check for the high-level
362          * hypervisor sysfs file.
363          */
364
365         r = detect_vm_xen();
366         if (r < 0)
367                 return r;
368         if (r != VIRTUALIZATION_NONE) {
369                 if (r == VIRTUALIZATION_VM_OTHER)
370                         other = true;
371                 else
372                         goto finish;
373         }
374
375         r = detect_vm_hypervisor();
376         if (r < 0)
377                 return r;
378         if (r != VIRTUALIZATION_NONE) {
379                 if (r == VIRTUALIZATION_VM_OTHER)
380                         other = true;
381                 else
382                         goto finish;
383         }
384
385         r = detect_vm_device_tree();
386         if (r < 0)
387                 return r;
388         if (r != VIRTUALIZATION_NONE) {
389                 if (r == VIRTUALIZATION_VM_OTHER)
390                         other = true;
391                 else
392                         goto finish;
393         }
394
395         r = detect_vm_uml();
396         if (r < 0)
397                 return r;
398         if (r != VIRTUALIZATION_NONE) {
399                 if (r == VIRTUALIZATION_VM_OTHER)
400                         other = true;
401                 else
402                         goto finish;
403         }
404
405         r = detect_vm_zvm();
406         if (r < 0)
407                 return r;
408
409 finish:
410         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
411          * In order to detect the Dom0 as not virtualization we need to
412          * double-check it */
413         if (r == VIRTUALIZATION_XEN && detect_vm_xen_dom0())
414                 r = VIRTUALIZATION_NONE;
415         else if (r == VIRTUALIZATION_NONE && other)
416                 r = VIRTUALIZATION_VM_OTHER;
417
418         cached_found = r;
419         log_debug("Found VM virtualization %s", virtualization_to_string(r));
420         return r;
421 }
422 #endif // 0
423
424 int detect_container(void) {
425
426         static const struct {
427                 const char *value;
428                 int id;
429         } value_table[] = {
430                 { "lxc",            VIRTUALIZATION_LXC            },
431                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
432                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
433                 { "docker",         VIRTUALIZATION_DOCKER         },
434                 { "rkt",            VIRTUALIZATION_RKT            },
435         };
436
437         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
438         _cleanup_free_ char *m = NULL;
439         const char *e = NULL;
440         unsigned j;
441         int r;
442
443         if (cached_found >= 0)
444                 return cached_found;
445
446         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
447         if (access("/proc/vz", F_OK) >= 0 &&
448             access("/proc/bc", F_OK) < 0) {
449                 r = VIRTUALIZATION_OPENVZ;
450                 goto finish;
451         }
452
453         if (getpid_cached() == 1) {
454                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
455
456                 e = getenv("container");
457                 if (isempty(e)) {
458                         r = VIRTUALIZATION_NONE;
459                         goto finish;
460                 }
461
462                 goto translate_name;
463         }
464
465         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
466          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
467         r = read_one_line_file("/run/systemd/container", &m);
468         if (r >= 0) {
469                 e = m;
470                 goto translate_name;
471         }
472         if (r != -ENOENT)
473                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
474
475         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
476         r = getenv_for_pid(1, "container", &m);
477         if (r > 0) {
478                 e = m;
479                 goto translate_name;
480         }
481         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
482                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
483
484         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
485          * there is not 1, we know we are in a PID namespace. and hence a container. */
486         r = read_one_line_file("/proc/1/sched", &m);
487         if (r >= 0) {
488                 const char *t;
489
490                 t = strrchr(m, '(');
491                 if (!t)
492                         return -EIO;
493
494                 if (!startswith(t, "(1,")) {
495                         r = VIRTUALIZATION_CONTAINER_OTHER;
496                         goto finish;
497                 }
498         } else if (r != -ENOENT)
499                 return r;
500
501         /* If that didn't work, give up, assume no container manager. */
502         r = VIRTUALIZATION_NONE;
503         goto finish;
504
505 translate_name:
506         for (j = 0; j < ELEMENTSOF(value_table); j++)
507                 if (streq(e, value_table[j].value)) {
508                         r = value_table[j].id;
509                         goto finish;
510                 }
511
512         r = VIRTUALIZATION_CONTAINER_OTHER;
513
514 finish:
515         log_debug("Found container virtualization %s.", virtualization_to_string(r));
516         cached_found = r;
517         return r;
518 }
519
520 #if 0 /// UNNEEDED by elogind
521 int detect_virtualization(void) {
522         int r;
523
524         r = detect_container();
525         if (r == 0)
526                 r = detect_vm();
527
528         return r;
529 }
530
531 static int userns_has_mapping(const char *name) {
532         _cleanup_fclose_ FILE *f = NULL;
533         _cleanup_free_ char *buf = NULL;
534         size_t n_allocated = 0;
535         ssize_t n;
536         uint32_t a, b, c;
537         int r;
538
539         f = fopen(name, "re");
540         if (!f) {
541                 log_debug_errno(errno, "Failed to open %s: %m", name);
542                 return errno == ENOENT ? false : -errno;
543         }
544
545         n = getline(&buf, &n_allocated, f);
546         if (n < 0) {
547                 if (feof(f)) {
548                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
549                         return true;
550                 }
551
552                 return log_debug_errno(errno, "Failed to read %s: %m", name);
553         }
554
555         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
556         if (r < 3)
557                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
558
559         if (a == 0 && b == 0 && c == UINT32_MAX) {
560                 /* The kernel calls mappings_overlap() and does not allow overlaps */
561                 log_debug("%s has a full 1:1 mapping", name);
562                 return false;
563         }
564
565         /* Anything else implies that we are in a user namespace */
566         log_debug("Mapping found in %s, we're in a user namespace", name);
567         return true;
568 }
569
570 int running_in_userns(void) {
571         _cleanup_free_ char *line = NULL;
572         int r;
573
574         r = userns_has_mapping("/proc/self/uid_map");
575         if (r != 0)
576                 return r;
577
578         r = userns_has_mapping("/proc/self/gid_map");
579         if (r != 0)
580                 return r;
581
582         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
583          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
584          * also does not exist. We cannot distinguish those two cases, so assume that
585          * we're running on a stripped-down recent kernel, rather than on an old one,
586          * and if the file is not found, return false.
587          */
588         r = read_one_line_file("/proc/self/setgroups", &line);
589         if (r < 0) {
590                 log_debug_errno(r, "/proc/self/setgroups: %m");
591                 return r == -ENOENT ? false : r;
592         }
593
594         truncate_nl(line);
595         r = streq(line, "deny");
596         /* See user_namespaces(7) for a description of this "setgroups" contents. */
597         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
598         return r;
599 }
600 #endif // 0
601
602 int running_in_chroot(void) {
603         int ret;
604
605 #if 0 /// elogind does not allow to ignore chroots, we are never init!
606         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
607                 return 0;
608 #endif // 0
609
610         ret = files_same("/proc/1/root", "/", 0);
611         if (ret < 0)
612                 return ret;
613
614         return ret == 0;
615 }
616
617 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
618         [VIRTUALIZATION_NONE] = "none",
619         [VIRTUALIZATION_KVM] = "kvm",
620         [VIRTUALIZATION_QEMU] = "qemu",
621         [VIRTUALIZATION_BOCHS] = "bochs",
622         [VIRTUALIZATION_XEN] = "xen",
623         [VIRTUALIZATION_UML] = "uml",
624         [VIRTUALIZATION_VMWARE] = "vmware",
625         [VIRTUALIZATION_ORACLE] = "oracle",
626         [VIRTUALIZATION_MICROSOFT] = "microsoft",
627         [VIRTUALIZATION_ZVM] = "zvm",
628         [VIRTUALIZATION_PARALLELS] = "parallels",
629         [VIRTUALIZATION_BHYVE] = "bhyve",
630         [VIRTUALIZATION_VM_OTHER] = "vm-other",
631
632         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
633         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
634         [VIRTUALIZATION_LXC] = "lxc",
635         [VIRTUALIZATION_OPENVZ] = "openvz",
636         [VIRTUALIZATION_DOCKER] = "docker",
637         [VIRTUALIZATION_RKT] = "rkt",
638         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
639 };
640
641 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);