chiark / gitweb /
Prep v235: Apply upstream fixes (3/10) [src/basic]
[elogind.git] / src / basic / virt.c
1 /***
2   This file is part of systemd.
3
4   Copyright 2011 Lennart Poettering
5
6   systemd is free software; you can redistribute it and/or modify it
7   under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or
9   (at your option) any later version.
10
11   systemd is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <errno.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25
26 #include "alloc-util.h"
27 #include "dirent-util.h"
28 #include "fd-util.h"
29 #include "fileio.h"
30 #include "macro.h"
31 #include "process-util.h"
32 #include "stat-util.h"
33 #include "string-table.h"
34 #include "string-util.h"
35 #include "virt.h"
36
37 #if 0 /// UNNEEDED by elogind
38 static int detect_vm_cpuid(void) {
39
40         /* CPUID is an x86 specific interface. */
41 #if defined(__i386__) || defined(__x86_64__)
42
43         static const struct {
44                 const char *cpuid;
45                 int id;
46         } cpuid_vendor_table[] = {
47                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
48                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       },
49                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      },
50                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
51                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
52                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
53                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
54                 /* https://wiki.freebsd.org/bhyve */
55                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
56         };
57
58         uint32_t eax, ecx;
59         bool hypervisor;
60
61         /* http://lwn.net/Articles/301888/ */
62
63 #if defined (__i386__)
64 #define REG_a "eax"
65 #define REG_b "ebx"
66 #elif defined (__amd64__)
67 #define REG_a "rax"
68 #define REG_b "rbx"
69 #endif
70
71         /* First detect whether there is a hypervisor */
72         eax = 1;
73         __asm__ __volatile__ (
74                 /* ebx/rbx is being used for PIC! */
75                 "  push %%"REG_b"         \n\t"
76                 "  cpuid                  \n\t"
77                 "  pop %%"REG_b"          \n\t"
78
79                 : "=a" (eax), "=c" (ecx)
80                 : "0" (eax)
81         );
82
83         hypervisor = !!(ecx & 0x80000000U);
84
85         if (hypervisor) {
86                 union {
87                         uint32_t sig32[3];
88                         char text[13];
89                 } sig = {};
90                 unsigned j;
91
92                 /* There is a hypervisor, see what it is */
93                 eax = 0x40000000U;
94                 __asm__ __volatile__ (
95                         /* ebx/rbx is being used for PIC! */
96                         "  push %%"REG_b"         \n\t"
97                         "  cpuid                  \n\t"
98                         "  mov %%ebx, %1          \n\t"
99                         "  pop %%"REG_b"          \n\t"
100
101                         : "=a" (eax), "=r" (sig.sig32[0]), "=c" (sig.sig32[1]), "=d" (sig.sig32[2])
102                         : "0" (eax)
103                 );
104
105                 log_debug("Virtualization found, CPUID=%s", sig.text);
106
107                 for (j = 0; j < ELEMENTSOF(cpuid_vendor_table); j ++)
108                         if (streq(sig.text, cpuid_vendor_table[j].cpuid))
109                                 return cpuid_vendor_table[j].id;
110
111                 return VIRTUALIZATION_VM_OTHER;
112         }
113 #endif
114         log_debug("No virtualization found in CPUID");
115
116         return VIRTUALIZATION_NONE;
117 }
118
119 static int detect_vm_device_tree(void) {
120 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
121         _cleanup_free_ char *hvtype = NULL;
122         int r;
123
124         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
125         if (r == -ENOENT) {
126                 _cleanup_closedir_ DIR *dir = NULL;
127                 struct dirent *dent;
128
129                 dir = opendir("/proc/device-tree");
130                 if (!dir) {
131                         if (errno == ENOENT) {
132                                 log_debug_errno(errno, "/proc/device-tree: %m");
133                                 return VIRTUALIZATION_NONE;
134                         }
135                         return -errno;
136                 }
137
138                 FOREACH_DIRENT(dent, dir, return -errno)
139                         if (strstr(dent->d_name, "fw-cfg")) {
140                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", dent->d_name);
141                                 return VIRTUALIZATION_QEMU;
142                         }
143
144                 log_debug("No virtualization found in /proc/device-tree/*");
145                 return VIRTUALIZATION_NONE;
146         } else if (r < 0)
147                 return r;
148
149         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
150         if (streq(hvtype, "linux,kvm"))
151                 return VIRTUALIZATION_KVM;
152         else if (strstr(hvtype, "xen"))
153                 return VIRTUALIZATION_XEN;
154         else
155                 return VIRTUALIZATION_VM_OTHER;
156 #else
157         log_debug("This platform does not support /proc/device-tree");
158         return VIRTUALIZATION_NONE;
159 #endif
160 }
161
162 static int detect_vm_dmi(void) {
163 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
164
165         static const char *const dmi_vendors[] = {
166                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
167                 "/sys/class/dmi/id/sys_vendor",
168                 "/sys/class/dmi/id/board_vendor",
169                 "/sys/class/dmi/id/bios_vendor"
170         };
171
172         static const struct {
173                 const char *vendor;
174                 int id;
175         } dmi_vendor_table[] = {
176                 { "KVM",           VIRTUALIZATION_KVM       },
177                 { "QEMU",          VIRTUALIZATION_QEMU      },
178                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
179                 { "VMware",        VIRTUALIZATION_VMWARE    },
180                 { "VMW",           VIRTUALIZATION_VMWARE    },
181                 { "innotek GmbH",  VIRTUALIZATION_ORACLE    },
182                 { "Xen",           VIRTUALIZATION_XEN       },
183                 { "Bochs",         VIRTUALIZATION_BOCHS     },
184                 { "Parallels",     VIRTUALIZATION_PARALLELS },
185                 /* https://wiki.freebsd.org/bhyve */
186                 { "BHYVE",         VIRTUALIZATION_BHYVE     },
187         };
188         unsigned i;
189         int r;
190
191         for (i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
192                 _cleanup_free_ char *s = NULL;
193                 unsigned j;
194
195                 r = read_one_line_file(dmi_vendors[i], &s);
196                 if (r < 0) {
197                         if (r == -ENOENT)
198                                 continue;
199
200                         return r;
201                 }
202
203
204
205                 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
206                         if (startswith(s, dmi_vendor_table[j].vendor)) {
207                                 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
208                                 return dmi_vendor_table[j].id;
209                         }
210         }
211 #endif
212
213         log_debug("No virtualization found in DMI");
214
215         return VIRTUALIZATION_NONE;
216 }
217
218 static int detect_vm_xen(void) {
219         /* Check for Dom0 will be executed later in detect_vm_xen_dom0
220            Thats why we dont check the content of /proc/xen/capabilities here. */
221         if (access("/proc/xen/capabilities", F_OK) < 0) {
222                 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
223                 return VIRTUALIZATION_NONE;
224         }
225
226         log_debug("Virtualization XEN found (/proc/xen/capabilities exists)");
227         return  VIRTUALIZATION_XEN;
228
229 }
230
231 static bool detect_vm_xen_dom0(void) {
232         _cleanup_free_ char *domcap = NULL;
233         char *cap, *i;
234         int r;
235
236         r = read_one_line_file("/proc/xen/capabilities", &domcap);
237         if (r == -ENOENT) {
238                 log_debug("Virtualization XEN not found, /proc/xen/capabilities does not exist");
239                 return false;
240         }
241         if (r < 0)
242                 return r;
243
244         i = domcap;
245         while ((cap = strsep(&i, ",")))
246                 if (streq(cap, "control_d"))
247                         break;
248         if (!cap) {
249                 log_debug("Virtualization XEN DomU found (/proc/xen/capabilites)");
250                 return false;
251         }
252
253         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
254         return true;
255 }
256
257 static int detect_vm_hypervisor(void) {
258         _cleanup_free_ char *hvtype = NULL;
259         int r;
260
261         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
262         if (r == -ENOENT)
263                 return VIRTUALIZATION_NONE;
264         if (r < 0)
265                 return r;
266
267         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
268
269         if (streq(hvtype, "xen"))
270                 return VIRTUALIZATION_XEN;
271         else
272                 return VIRTUALIZATION_VM_OTHER;
273 }
274
275 static int detect_vm_uml(void) {
276         _cleanup_free_ char *cpuinfo_contents = NULL;
277         int r;
278
279         /* Detect User-Mode Linux by reading /proc/cpuinfo */
280         r = read_full_file("/proc/cpuinfo", &cpuinfo_contents, NULL);
281         if (r < 0)
282                 return r;
283
284         if (strstr(cpuinfo_contents, "\nvendor_id\t: User Mode Linux\n")) {
285                 log_debug("UML virtualization found in /proc/cpuinfo");
286                 return VIRTUALIZATION_UML;
287         }
288
289         log_debug("No virtualization found in /proc/cpuinfo.");
290         return VIRTUALIZATION_NONE;
291 }
292
293 static int detect_vm_zvm(void) {
294
295 #if defined(__s390__)
296         _cleanup_free_ char *t = NULL;
297         int r;
298
299         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
300         if (r == -ENOENT)
301                 return VIRTUALIZATION_NONE;
302         if (r < 0)
303                 return r;
304
305         log_debug("Virtualization %s found in /proc/sysinfo", t);
306         if (streq(t, "z/VM"))
307                 return VIRTUALIZATION_ZVM;
308         else
309                 return VIRTUALIZATION_KVM;
310 #else
311         log_debug("This platform does not support /proc/sysinfo");
312         return VIRTUALIZATION_NONE;
313 #endif
314 }
315
316 /* Returns a short identifier for the various VM implementations */
317 int detect_vm(void) {
318         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
319         int r, dmi;
320
321         if (cached_found >= 0)
322                 return cached_found;
323
324         /* We have to use the correct order here:
325          *
326          * -> First try to detect Oracle Virtualbox, even if it uses KVM.
327          * -> Second try to detect from cpuid, this will report KVM for
328          *    whatever software is used even if info in dmi is overwritten.
329          * -> Third try to detect from dmi. */
330
331         dmi = detect_vm_dmi();
332         if (dmi == VIRTUALIZATION_ORACLE) {
333                 r = dmi;
334                 goto finish;
335         }
336
337         r = detect_vm_cpuid();
338         if (r < 0)
339                 return r;
340         if (r != VIRTUALIZATION_NONE)
341                 goto finish;
342
343         r = dmi;
344         if (r < 0)
345                 return r;
346         if (r != VIRTUALIZATION_NONE)
347                 goto finish;
348
349         /* x86 xen will most likely be detected by cpuid. If not (most likely
350          * because we're not an x86 guest), then we should try the xen capabilities
351          * file next. If that's not found, then we check for the high-level
352          * hypervisor sysfs file:
353          *
354          * https://bugs.freedesktop.org/show_bug.cgi?id=77271 */
355
356         r = detect_vm_xen();
357         if (r < 0)
358                 return r;
359         if (r != VIRTUALIZATION_NONE)
360                 goto finish;
361
362         r = detect_vm_hypervisor();
363         if (r < 0)
364                 return r;
365         if (r != VIRTUALIZATION_NONE)
366                 goto finish;
367
368         r = detect_vm_device_tree();
369         if (r < 0)
370                 return r;
371         if (r != VIRTUALIZATION_NONE)
372                 goto finish;
373
374         r = detect_vm_uml();
375         if (r < 0)
376                 return r;
377         if (r != VIRTUALIZATION_NONE)
378                 goto finish;
379
380         r = detect_vm_zvm();
381         if (r < 0)
382                 return r;
383
384 finish:
385         /* x86 xen Dom0 is detected as XEN in hypervisor and maybe others.
386          * In order to detect the Dom0 as not virtualization we need to
387          * double-check it */
388         if (r == VIRTUALIZATION_XEN && detect_vm_xen_dom0())
389                 r = VIRTUALIZATION_NONE;
390
391         cached_found = r;
392         log_debug("Found VM virtualization %s", virtualization_to_string(r));
393         return r;
394 }
395 #endif // 0
396
397 int detect_container(void) {
398
399         static const struct {
400                 const char *value;
401                 int id;
402         } value_table[] = {
403                 { "lxc",            VIRTUALIZATION_LXC            },
404                 { "lxc-libvirt",    VIRTUALIZATION_LXC_LIBVIRT    },
405                 { "systemd-nspawn", VIRTUALIZATION_SYSTEMD_NSPAWN },
406                 { "docker",         VIRTUALIZATION_DOCKER         },
407                 { "rkt",            VIRTUALIZATION_RKT            },
408         };
409
410         static thread_local int cached_found = _VIRTUALIZATION_INVALID;
411         _cleanup_free_ char *m = NULL;
412         const char *e = NULL;
413         unsigned j;
414         int r;
415
416         if (cached_found >= 0)
417                 return cached_found;
418
419         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
420         if (access("/proc/vz", F_OK) >= 0 &&
421             access("/proc/bc", F_OK) < 0) {
422                 r = VIRTUALIZATION_OPENVZ;
423                 goto finish;
424         }
425
426         if (getpid_cached() == 1) {
427                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative. */
428
429                 e = getenv("container");
430                 if (isempty(e)) {
431                         r = VIRTUALIZATION_NONE;
432                         goto finish;
433                 }
434
435                 goto translate_name;
436         }
437
438         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
439          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
440         r = read_one_line_file("/run/systemd/container", &m);
441         if (r >= 0) {
442                 e = m;
443                 goto translate_name;
444         }
445         if (r != -ENOENT)
446                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
447
448         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
449         r = getenv_for_pid(1, "container", &m);
450         if (r > 0) {
451                 e = m;
452                 goto translate_name;
453         }
454         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
455                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
456
457         /* Interestingly /proc/1/sched actually shows the host's PID for what we see as PID 1. Hence, if the PID shown
458          * there is not 1, we know we are in a PID namespace. and hence a container. */
459         r = read_one_line_file("/proc/1/sched", &m);
460         if (r >= 0) {
461                 const char *t;
462
463                 t = strrchr(m, '(');
464                 if (!t)
465                         return -EIO;
466
467                 if (!startswith(t, "(1,")) {
468                         r = VIRTUALIZATION_CONTAINER_OTHER;
469                         goto finish;
470                 }
471         } else if (r != -ENOENT)
472                 return r;
473
474         /* If that didn't work, give up, assume no container manager. */
475         r = VIRTUALIZATION_NONE;
476         goto finish;
477
478 translate_name:
479         for (j = 0; j < ELEMENTSOF(value_table); j++)
480                 if (streq(e, value_table[j].value)) {
481                         r = value_table[j].id;
482                         goto finish;
483                 }
484
485         r = VIRTUALIZATION_CONTAINER_OTHER;
486
487 finish:
488         log_debug("Found container virtualization %s.", virtualization_to_string(r));
489         cached_found = r;
490         return r;
491 }
492
493 #if 0 /// UNNEEDED by elogind
494 int detect_virtualization(void) {
495         int r;
496
497         r = detect_container();
498         if (r == 0)
499                 r = detect_vm();
500
501         return r;
502 }
503
504 static int userns_has_mapping(const char *name) {
505         _cleanup_fclose_ FILE *f = NULL;
506         _cleanup_free_ char *buf = NULL;
507         size_t n_allocated = 0;
508         ssize_t n;
509         uint32_t a, b, c;
510         int r;
511
512         f = fopen(name, "re");
513         if (!f) {
514                 log_debug_errno(errno, "Failed to open %s: %m", name);
515                 return errno == ENOENT ? false : -errno;
516         }
517
518         n = getline(&buf, &n_allocated, f);
519         if (n < 0) {
520                 if (feof(f)) {
521                         log_debug("%s is empty, we're in an uninitialized user namespace", name);
522                         return true;
523                 }
524
525                 return log_debug_errno(errno, "Failed to read %s: %m", name);
526         }
527
528         r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
529         if (r < 3)
530                 return log_debug_errno(errno, "Failed to parse %s: %m", name);
531
532         if (a == 0 && b == 0 && c == UINT32_MAX) {
533                 /* The kernel calls mappings_overlap() and does not allow overlaps */
534                 log_debug("%s has a full 1:1 mapping", name);
535                 return false;
536         }
537
538         /* Anything else implies that we are in a user namespace */
539         log_debug("Mapping found in %s, we're in a user namespace", name);
540         return true;
541 }
542
543 int running_in_userns(void) {
544         _cleanup_free_ char *line = NULL;
545         int r;
546
547         r = userns_has_mapping("/proc/self/uid_map");
548         if (r != 0)
549                 return r;
550
551         r = userns_has_mapping("/proc/self/gid_map");
552         if (r != 0)
553                 return r;
554
555         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
556          * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
557          * also does not exist. We cannot distinguish those two cases, so assume that
558          * we're running on a stripped-down recent kernel, rather than on an old one,
559          * and if the file is not found, return false.
560          */
561         r = read_one_line_file("/proc/self/setgroups", &line);
562         if (r < 0) {
563                 log_debug_errno(r, "/proc/self/setgroups: %m");
564                 return r == -ENOENT ? false : r;
565         }
566
567         truncate_nl(line);
568         r = streq(line, "deny");
569         /* See user_namespaces(7) for a description of this "setgroups" contents. */
570         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
571         return r;
572 }
573 #endif // 0
574
575 int running_in_chroot(void) {
576         int ret;
577
578 #if 0 /// elogind does not allow to ignore chroots, we are never init!
579         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
580                 return 0;
581 #endif // 0
582
583         ret = files_same("/proc/1/root", "/", 0);
584         if (ret < 0)
585                 return ret;
586
587         return ret == 0;
588 }
589
590 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
591         [VIRTUALIZATION_NONE] = "none",
592         [VIRTUALIZATION_KVM] = "kvm",
593         [VIRTUALIZATION_QEMU] = "qemu",
594         [VIRTUALIZATION_BOCHS] = "bochs",
595         [VIRTUALIZATION_XEN] = "xen",
596         [VIRTUALIZATION_UML] = "uml",
597         [VIRTUALIZATION_VMWARE] = "vmware",
598         [VIRTUALIZATION_ORACLE] = "oracle",
599         [VIRTUALIZATION_MICROSOFT] = "microsoft",
600         [VIRTUALIZATION_ZVM] = "zvm",
601         [VIRTUALIZATION_PARALLELS] = "parallels",
602         [VIRTUALIZATION_BHYVE] = "bhyve",
603         [VIRTUALIZATION_VM_OTHER] = "vm-other",
604
605         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
606         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
607         [VIRTUALIZATION_LXC] = "lxc",
608         [VIRTUALIZATION_OPENVZ] = "openvz",
609         [VIRTUALIZATION_DOCKER] = "docker",
610         [VIRTUALIZATION_RKT] = "rkt",
611         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
612 };
613
614 DEFINE_STRING_TABLE_LOOKUP(virtualization, int);