1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
37 #include <sys/signalfd.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
47 #include <linux/loop.h>
50 #include <selinux/selinux.h>
58 #include <blkid/blkid.h>
61 #include "sd-daemon.h"
71 #include "cgroup-util.h"
73 #include "path-util.h"
74 #include "loopback-setup.h"
75 #include "dev-setup.h"
80 #include "bus-error.h"
82 #include "bus-kernel.h"
85 #include "rtnl-util.h"
86 #include "udev-util.h"
87 #include "eventfd-util.h"
88 #include "blkid-util.h"
90 #include "siphash24.h"
92 #include "base-filesystem.h"
95 #include "seccomp-util.h"
98 typedef enum ContainerStatus {
103 typedef enum LinkJournal {
110 static char *arg_directory = NULL;
111 static char *arg_user = NULL;
112 static sd_id128_t arg_uuid = {};
113 static char *arg_machine = NULL;
114 static const char *arg_selinux_context = NULL;
115 static const char *arg_selinux_apifs_context = NULL;
116 static const char *arg_slice = NULL;
117 static bool arg_private_network = false;
118 static bool arg_read_only = false;
119 static bool arg_boot = false;
120 static LinkJournal arg_link_journal = LINK_AUTO;
121 static uint64_t arg_retain =
122 (1ULL << CAP_CHOWN) |
123 (1ULL << CAP_DAC_OVERRIDE) |
124 (1ULL << CAP_DAC_READ_SEARCH) |
125 (1ULL << CAP_FOWNER) |
126 (1ULL << CAP_FSETID) |
127 (1ULL << CAP_IPC_OWNER) |
129 (1ULL << CAP_LEASE) |
130 (1ULL << CAP_LINUX_IMMUTABLE) |
131 (1ULL << CAP_NET_BIND_SERVICE) |
132 (1ULL << CAP_NET_BROADCAST) |
133 (1ULL << CAP_NET_RAW) |
134 (1ULL << CAP_SETGID) |
135 (1ULL << CAP_SETFCAP) |
136 (1ULL << CAP_SETPCAP) |
137 (1ULL << CAP_SETUID) |
138 (1ULL << CAP_SYS_ADMIN) |
139 (1ULL << CAP_SYS_CHROOT) |
140 (1ULL << CAP_SYS_NICE) |
141 (1ULL << CAP_SYS_PTRACE) |
142 (1ULL << CAP_SYS_TTY_CONFIG) |
143 (1ULL << CAP_SYS_RESOURCE) |
144 (1ULL << CAP_SYS_BOOT) |
145 (1ULL << CAP_AUDIT_WRITE) |
146 (1ULL << CAP_AUDIT_CONTROL) |
148 static char **arg_bind = NULL;
149 static char **arg_bind_ro = NULL;
150 static char **arg_tmpfs = NULL;
151 static char **arg_setenv = NULL;
152 static bool arg_quiet = false;
153 static bool arg_share_system = false;
154 static bool arg_register = true;
155 static bool arg_keep_unit = false;
156 static char **arg_network_interfaces = NULL;
157 static char **arg_network_macvlan = NULL;
158 static bool arg_network_veth = false;
159 static const char *arg_network_bridge = NULL;
160 static unsigned long arg_personality = 0xffffffffLU;
161 static const char *arg_image = NULL;
163 static int help(void) {
165 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
166 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
167 " -h --help Show this help\n"
168 " --version Print version string\n"
169 " -q --quiet Do not show status information\n"
170 " -D --directory=PATH Root directory for the container\n"
171 " -i --image=PATH File system device or image for the container\n"
172 " -b --boot Boot up full system (i.e. invoke init)\n"
173 " -u --user=USER Run the command under specified user or uid\n"
174 " -M --machine=NAME Set the machine name for the container\n"
175 " --uuid=UUID Set a specific machine UUID for the container\n"
176 " -S --slice=SLICE Place the container in the specified slice\n"
177 " --private-network Disable network in container\n"
178 " --network-interface=INTERFACE\n"
179 " Assign an existing network interface to the\n"
181 " --network-macvlan=INTERFACE\n"
182 " Create a macvlan network interface based on an\n"
183 " existing network interface to the container\n"
184 " --network-veth Add a virtual ethernet connection between host\n"
186 " --network-bridge=INTERFACE\n"
187 " Add a virtual ethernet connection between host\n"
188 " and container and add it to an existing bridge on\n"
190 " -Z --selinux-context=SECLABEL\n"
191 " Set the SELinux security context to be used by\n"
192 " processes in the container\n"
193 " -L --selinux-apifs-context=SECLABEL\n"
194 " Set the SELinux security context to be used by\n"
195 " API/tmpfs file systems in the container\n"
196 " --capability=CAP In addition to the default, retain specified\n"
198 " --drop-capability=CAP Drop the specified capability from the default set\n"
199 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
200 " -j Equivalent to --link-journal=host\n"
201 " --read-only Mount the root directory read-only\n"
202 " --bind=PATH[:PATH] Bind mount a file or directory from the host into\n"
204 " --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
205 " --tmpfs=PATH:[OPTIONS] Mount an empty tmpfs to the specified directory\n"
206 " --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
207 " --share-system Share system namespaces with host\n"
208 " --register=BOOLEAN Register container as machine\n"
209 " --keep-unit Do not register a scope for the machine, reuse\n"
210 " the service unit nspawn is running in\n",
211 program_invocation_short_name);
216 static int parse_argv(int argc, char *argv[]) {
233 ARG_NETWORK_INTERFACE,
240 static const struct option options[] = {
241 { "help", no_argument, NULL, 'h' },
242 { "version", no_argument, NULL, ARG_VERSION },
243 { "directory", required_argument, NULL, 'D' },
244 { "user", required_argument, NULL, 'u' },
245 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
246 { "boot", no_argument, NULL, 'b' },
247 { "uuid", required_argument, NULL, ARG_UUID },
248 { "read-only", no_argument, NULL, ARG_READ_ONLY },
249 { "capability", required_argument, NULL, ARG_CAPABILITY },
250 { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
251 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
252 { "bind", required_argument, NULL, ARG_BIND },
253 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
254 { "tmpfs", required_argument, NULL, ARG_TMPFS },
255 { "machine", required_argument, NULL, 'M' },
256 { "slice", required_argument, NULL, 'S' },
257 { "setenv", required_argument, NULL, ARG_SETENV },
258 { "selinux-context", required_argument, NULL, 'Z' },
259 { "selinux-apifs-context", required_argument, NULL, 'L' },
260 { "quiet", no_argument, NULL, 'q' },
261 { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM },
262 { "register", required_argument, NULL, ARG_REGISTER },
263 { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
264 { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
265 { "network-macvlan", required_argument, NULL, ARG_NETWORK_MACVLAN },
266 { "network-veth", no_argument, NULL, ARG_NETWORK_VETH },
267 { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
268 { "personality", required_argument, NULL, ARG_PERSONALITY },
269 { "image", required_argument, NULL, 'i' },
274 uint64_t plus = 0, minus = 0;
279 while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:", options, NULL)) >= 0) {
287 puts(PACKAGE_STRING);
288 puts(SYSTEMD_FEATURES);
293 arg_directory = canonicalize_file_name(optarg);
294 if (!arg_directory) {
295 log_error("Invalid root directory: %m");
307 arg_user = strdup(optarg);
313 case ARG_NETWORK_BRIDGE:
314 arg_network_bridge = optarg;
318 case ARG_NETWORK_VETH:
319 arg_network_veth = true;
320 arg_private_network = true;
323 case ARG_NETWORK_INTERFACE:
324 if (strv_extend(&arg_network_interfaces, optarg) < 0)
327 arg_private_network = true;
330 case ARG_NETWORK_MACVLAN:
331 if (strv_extend(&arg_network_macvlan, optarg) < 0)
336 case ARG_PRIVATE_NETWORK:
337 arg_private_network = true;
345 r = sd_id128_from_string(optarg, &arg_uuid);
347 log_error("Invalid UUID: %s", optarg);
357 if (isempty(optarg)) {
362 if (!hostname_is_valid(optarg)) {
363 log_error("Invalid machine name: %s", optarg);
368 arg_machine = strdup(optarg);
376 arg_selinux_context = optarg;
380 arg_selinux_apifs_context = optarg;
384 arg_read_only = true;
388 case ARG_DROP_CAPABILITY: {
392 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
393 _cleanup_free_ char *t;
396 t = strndup(word, length);
400 if (streq(t, "all")) {
401 if (c == ARG_CAPABILITY)
402 plus = (uint64_t) -1;
404 minus = (uint64_t) -1;
406 if (cap_from_name(t, &cap) < 0) {
407 log_error("Failed to parse capability %s.", t);
411 if (c == ARG_CAPABILITY)
412 plus |= 1ULL << (uint64_t) cap;
414 minus |= 1ULL << (uint64_t) cap;
422 arg_link_journal = LINK_GUEST;
425 case ARG_LINK_JOURNAL:
426 if (streq(optarg, "auto"))
427 arg_link_journal = LINK_AUTO;
428 else if (streq(optarg, "no"))
429 arg_link_journal = LINK_NO;
430 else if (streq(optarg, "guest"))
431 arg_link_journal = LINK_GUEST;
432 else if (streq(optarg, "host"))
433 arg_link_journal = LINK_HOST;
435 log_error("Failed to parse link journal mode %s", optarg);
443 _cleanup_free_ char *a = NULL, *b = NULL;
447 x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
449 e = strchr(optarg, ':');
451 a = strndup(optarg, e - optarg);
461 if (!path_is_absolute(a) || !path_is_absolute(b)) {
462 log_error("Invalid bind mount specification: %s", optarg);
466 r = strv_extend(x, a);
470 r = strv_extend(x, b);
478 _cleanup_free_ char *a = NULL, *b = NULL;
481 e = strchr(optarg, ':');
483 a = strndup(optarg, e - optarg);
487 b = strdup("mode=0755");
493 if (!path_is_absolute(a)) {
494 log_error("Invalid tmpfs specification: %s", optarg);
498 r = strv_push(&arg_tmpfs, a);
504 r = strv_push(&arg_tmpfs, b);
516 if (!env_assignment_is_valid(optarg)) {
517 log_error("Environment variable assignment '%s' is not valid.", optarg);
521 n = strv_env_set(arg_setenv, optarg);
525 strv_free(arg_setenv);
534 case ARG_SHARE_SYSTEM:
535 arg_share_system = true;
539 r = parse_boolean(optarg);
541 log_error("Failed to parse --register= argument: %s", optarg);
549 arg_keep_unit = true;
552 case ARG_PERSONALITY:
554 arg_personality = personality_from_string(optarg);
555 if (arg_personality == 0xffffffffLU) {
556 log_error("Unknown or unsupported personality '%s'.", optarg);
566 assert_not_reached("Unhandled option");
570 if (arg_share_system)
571 arg_register = false;
573 if (arg_boot && arg_share_system) {
574 log_error("--boot and --share-system may not be combined.");
578 if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
579 log_error("--keep-unit may not be used when invoked from a user session.");
583 if (arg_directory && arg_image) {
584 log_error("--directory= and --image= may not be combined.");
588 arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
593 static int mount_all(const char *dest) {
595 typedef struct MountPoint {
604 static const MountPoint mount_table[] = {
605 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
606 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
607 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
608 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
609 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
610 { "devpts", "/dev/pts", "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
611 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
612 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
614 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
615 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
622 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
623 _cleanup_free_ char *where = NULL;
625 _cleanup_free_ char *options = NULL;
630 where = strjoin(dest, "/", mount_table[k].where, NULL);
634 t = path_is_mount_point(where, true);
636 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
644 /* Skip this entry if it is not a remount. */
645 if (mount_table[k].what && t > 0)
648 mkdir_p(where, 0755);
651 if (arg_selinux_apifs_context &&
652 (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
653 options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
660 o = mount_table[k].options;
663 if (mount(mount_table[k].what,
666 mount_table[k].flags,
668 mount_table[k].fatal) {
670 log_error("mount(%s) failed: %m", where);
680 static int mount_binds(const char *dest, char **l, bool ro) {
683 STRV_FOREACH_PAIR(x, y, l) {
684 _cleanup_free_ char *where = NULL;
685 struct stat source_st, dest_st;
688 if (stat(*x, &source_st) < 0) {
689 log_error("Failed to stat %s: %m", *x);
693 where = strappend(dest, *y);
697 r = stat(where, &dest_st);
699 if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
700 log_error("The file types of %s and %s do not match. Refusing bind mount", *x, where);
703 } else if (errno == ENOENT) {
704 r = mkdir_parents_label(where, 0755);
706 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
710 log_error("Failed to bind mount %s: %m", *x);
714 /* Create the mount point, but be conservative -- refuse to create block
715 * and char devices. */
716 if (S_ISDIR(source_st.st_mode))
717 mkdir_label(where, 0755);
718 else if (S_ISFIFO(source_st.st_mode))
720 else if (S_ISSOCK(source_st.st_mode))
721 mknod(where, 0644 | S_IFSOCK, 0);
722 else if (S_ISREG(source_st.st_mode))
725 log_error("Refusing to create mountpoint for file: %s", *x);
729 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
730 log_error("mount(%s) failed: %m", where);
735 r = bind_remount_recursive(where, true);
737 log_error("Read-Only bind mount failed: %s", strerror(-r));
746 static int mount_tmpfs(const char *dest) {
749 STRV_FOREACH_PAIR(i, o, arg_tmpfs) {
750 _cleanup_free_ char *where = NULL;
752 where = strappend(dest, *i);
756 mkdir_label(where, 0755);
758 if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, *o) < 0) {
759 log_error("tmpfs mount to %s failed: %m", where);
767 static int setup_timezone(const char *dest) {
768 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
774 /* Fix the timezone, if possible */
775 r = readlink_malloc("/etc/localtime", &p);
777 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
781 z = path_startswith(p, "../usr/share/zoneinfo/");
783 z = path_startswith(p, "/usr/share/zoneinfo/");
785 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
789 where = strappend(dest, "/etc/localtime");
793 r = readlink_malloc(where, &q);
795 y = path_startswith(q, "../usr/share/zoneinfo/");
797 y = path_startswith(q, "/usr/share/zoneinfo/");
800 /* Already pointing to the right place? Then do nothing .. */
801 if (y && streq(y, z))
805 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
809 if (access(check, F_OK) < 0) {
810 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
814 what = strappend("../usr/share/zoneinfo/", z);
819 if (symlink(what, where) < 0) {
820 log_error("Failed to correct timezone of container: %m");
827 static int setup_resolv_conf(const char *dest) {
828 _cleanup_free_ char *where = NULL;
832 if (arg_private_network)
835 /* Fix resolv.conf, if possible */
836 where = strappend(dest, "/etc/resolv.conf");
840 /* We don't really care for the results of this really. If it
841 * fails, it fails, but meh... */
842 copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
847 static char* id128_format_as_uuid(sd_id128_t id, char s[37]) {
850 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
851 SD_ID128_FORMAT_VAL(id));
856 static int setup_boot_id(const char *dest) {
857 _cleanup_free_ char *from = NULL, *to = NULL;
864 if (arg_share_system)
867 /* Generate a new randomized boot ID, so that each boot-up of
868 * the container gets a new one */
870 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
871 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
875 r = sd_id128_randomize(&rnd);
877 log_error("Failed to generate random boot id: %s", strerror(-r));
881 id128_format_as_uuid(rnd, as_uuid);
883 r = write_string_file(from, as_uuid);
885 log_error("Failed to write boot id: %s", strerror(-r));
889 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
890 log_error("Failed to bind mount boot id: %m");
892 } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
893 log_warning("Failed to make boot id read-only: %m");
899 static int copy_devnodes(const char *dest) {
901 static const char devnodes[] =
911 _cleanup_umask_ mode_t u;
917 NULSTR_FOREACH(d, devnodes) {
918 _cleanup_free_ char *from = NULL, *to = NULL;
921 from = strappend("/dev/", d);
922 to = strjoin(dest, "/dev/", d, NULL);
926 if (stat(from, &st) < 0) {
928 if (errno != ENOENT) {
929 log_error("Failed to stat %s: %m", from);
933 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
935 log_error("%s is not a char or block device, cannot copy", from);
938 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
940 log_error("mknod(%s) failed: %m", dest);
948 static int setup_ptmx(const char *dest) {
949 _cleanup_free_ char *p = NULL;
951 p = strappend(dest, "/dev/ptmx");
955 if (symlink("pts/ptmx", p) < 0) {
956 log_error("Failed to create /dev/ptmx symlink: %m");
963 static int setup_dev_console(const char *dest, const char *console) {
964 _cleanup_umask_ mode_t u;
974 if (stat("/dev/null", &st) < 0) {
975 log_error("Failed to stat /dev/null: %m");
979 r = chmod_and_chown(console, 0600, 0, 0);
981 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
985 /* We need to bind mount the right tty to /dev/console since
986 * ptys can only exist on pts file systems. To have something
987 * to bind mount things on we create a device node first, and
988 * use /dev/null for that since we the cgroups device policy
989 * allows us to create that freely, while we cannot create
990 * /dev/console. (Note that the major minor doesn't actually
991 * matter here, since we mount it over anyway). */
993 to = strappenda(dest, "/dev/console");
994 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
995 log_error("mknod() for /dev/console failed: %m");
999 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
1000 log_error("Bind mount for /dev/console failed: %m");
1007 static int setup_kmsg(const char *dest, int kmsg_socket) {
1008 _cleanup_free_ char *from = NULL, *to = NULL;
1010 _cleanup_umask_ mode_t u;
1012 struct cmsghdr cmsghdr;
1013 uint8_t buf[CMSG_SPACE(sizeof(int))];
1015 struct msghdr mh = {
1016 .msg_control = &control,
1017 .msg_controllen = sizeof(control),
1019 struct cmsghdr *cmsg;
1022 assert(kmsg_socket >= 0);
1026 /* We create the kmsg FIFO as /dev/kmsg, but immediately
1027 * delete it after bind mounting it to /proc/kmsg. While FIFOs
1028 * on the reading side behave very similar to /proc/kmsg,
1029 * their writing side behaves differently from /dev/kmsg in
1030 * that writing blocks when nothing is reading. In order to
1031 * avoid any problems with containers deadlocking due to this
1032 * we simply make /dev/kmsg unavailable to the container. */
1033 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
1034 asprintf(&to, "%s/proc/kmsg", dest) < 0)
1037 if (mkfifo(from, 0600) < 0) {
1038 log_error("mkfifo() for /dev/kmsg failed: %m");
1042 r = chmod_and_chown(from, 0600, 0, 0);
1044 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
1048 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
1049 log_error("Bind mount for /proc/kmsg failed: %m");
1053 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
1055 log_error("Failed to open fifo: %m");
1059 cmsg = CMSG_FIRSTHDR(&mh);
1060 cmsg->cmsg_level = SOL_SOCKET;
1061 cmsg->cmsg_type = SCM_RIGHTS;
1062 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
1063 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
1065 mh.msg_controllen = cmsg->cmsg_len;
1067 /* Store away the fd in the socket, so that it stays open as
1068 * long as we run the child */
1069 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
1073 log_error("Failed to send FIFO fd: %m");
1077 /* And now make the FIFO unavailable as /dev/kmsg... */
1082 static int setup_hostname(void) {
1084 if (arg_share_system)
1087 if (sethostname(arg_machine, strlen(arg_machine)) < 0)
1093 static int setup_journal(const char *directory) {
1094 sd_id128_t machine_id, this_id;
1095 _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
1099 p = strappend(directory, "/etc/machine-id");
1103 r = read_one_line_file(p, &b);
1104 if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1107 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1112 if (isempty(id) && arg_link_journal == LINK_AUTO)
1115 /* Verify validity */
1116 r = sd_id128_from_string(id, &machine_id);
1118 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1122 r = sd_id128_get_machine(&this_id);
1124 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1128 if (sd_id128_equal(machine_id, this_id)) {
1129 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1130 "Host and machine ids are equal (%s): refusing to link journals", id);
1131 if (arg_link_journal == LINK_AUTO)
1137 if (arg_link_journal == LINK_NO)
1141 p = strappend("/var/log/journal/", id);
1142 q = strjoin(directory, "/var/log/journal/", id, NULL);
1146 if (path_is_mount_point(p, false) > 0) {
1147 if (arg_link_journal != LINK_AUTO) {
1148 log_error("%s: already a mount point, refusing to use for journal", p);
1155 if (path_is_mount_point(q, false) > 0) {
1156 if (arg_link_journal != LINK_AUTO) {
1157 log_error("%s: already a mount point, refusing to use for journal", q);
1164 r = readlink_and_make_absolute(p, &d);
1166 if ((arg_link_journal == LINK_GUEST ||
1167 arg_link_journal == LINK_AUTO) &&
1170 r = mkdir_p(q, 0755);
1172 log_warning("failed to create directory %s: %m", q);
1176 if (unlink(p) < 0) {
1177 log_error("Failed to remove symlink %s: %m", p);
1180 } else if (r == -EINVAL) {
1182 if (arg_link_journal == LINK_GUEST &&
1185 if (errno == ENOTDIR) {
1186 log_error("%s already exists and is neither a symlink nor a directory", p);
1189 log_error("Failed to remove %s: %m", p);
1193 } else if (r != -ENOENT) {
1194 log_error("readlink(%s) failed: %m", p);
1198 if (arg_link_journal == LINK_GUEST) {
1200 if (symlink(q, p) < 0) {
1201 log_error("Failed to symlink %s to %s: %m", q, p);
1205 r = mkdir_p(q, 0755);
1207 log_warning("failed to create directory %s: %m", q);
1211 if (arg_link_journal == LINK_HOST) {
1212 r = mkdir_p(p, 0755);
1214 log_error("Failed to create %s: %m", p);
1218 } else if (access(p, F_OK) < 0)
1221 if (dir_is_empty(q) == 0)
1222 log_warning("%s is not empty, proceeding anyway.", q);
1224 r = mkdir_p(q, 0755);
1226 log_error("Failed to create %s: %m", q);
1230 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1231 log_error("Failed to bind mount journal from host into guest: %m");
1238 static int setup_kdbus(const char *dest, const char *path) {
1244 p = strappenda(dest, "/dev/kdbus");
1245 if (mkdir(p, 0755) < 0) {
1246 log_error("Failed to create kdbus path: %m");
1250 if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1251 log_error("Failed to mount kdbus domain path: %m");
1258 static int drop_capabilities(void) {
1259 return capability_bounding_set_drop(~arg_retain, false);
1262 static int register_machine(pid_t pid) {
1263 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1264 _cleanup_bus_unref_ sd_bus *bus = NULL;
1270 r = sd_bus_default_system(&bus);
1272 log_error("Failed to open system bus: %s", strerror(-r));
1276 if (arg_keep_unit) {
1277 r = sd_bus_call_method(
1279 "org.freedesktop.machine1",
1280 "/org/freedesktop/machine1",
1281 "org.freedesktop.machine1.Manager",
1287 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1291 strempty(arg_directory));
1293 _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
1295 r = sd_bus_message_new_method_call(
1298 "org.freedesktop.machine1",
1299 "/org/freedesktop/machine1",
1300 "org.freedesktop.machine1.Manager",
1303 log_error("Failed to create message: %s", strerror(-r));
1307 r = sd_bus_message_append(
1311 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1315 strempty(arg_directory));
1317 log_error("Failed to append message arguments: %s", strerror(-r));
1321 r = sd_bus_message_open_container(m, 'a', "(sv)");
1323 log_error("Failed to open container: %s", strerror(-r));
1327 if (!isempty(arg_slice)) {
1328 r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
1330 log_error("Failed to append slice: %s", strerror(-r));
1335 r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
1337 log_error("Failed to add device policy: %s", strerror(-r));
1341 r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 10,
1342 /* Allow the container to
1343 * access and create the API
1344 * device nodes, so that
1345 * PrivateDevices= in the
1346 * container can work
1351 "/dev/random", "rwm",
1352 "/dev/urandom", "rwm",
1354 /* Allow the container
1355 * access to ptys. However,
1357 * container to ever create
1358 * these device nodes. */
1359 "/dev/pts/ptmx", "rw",
1361 /* Allow the container
1362 * access to all kdbus
1363 * devices. Again, the
1364 * container cannot create
1365 * these nodes, only use
1366 * them. We use a pretty
1367 * open match here, so that
1368 * the kernel API can still
1371 "char-kdbus/*", "rw");
1373 log_error("Failed to add device whitelist: %s", strerror(-r));
1377 r = sd_bus_message_close_container(m);
1379 log_error("Failed to close container: %s", strerror(-r));
1383 r = sd_bus_call(bus, m, 0, &error, NULL);
1387 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1394 static int terminate_machine(pid_t pid) {
1395 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1396 _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1397 _cleanup_bus_unref_ sd_bus *bus = NULL;
1404 r = sd_bus_default_system(&bus);
1406 log_error("Failed to open system bus: %s", strerror(-r));
1410 r = sd_bus_call_method(
1412 "org.freedesktop.machine1",
1413 "/org/freedesktop/machine1",
1414 "org.freedesktop.machine1.Manager",
1421 /* Note that the machine might already have been
1422 * cleaned up automatically, hence don't consider it a
1423 * failure if we cannot get the machine object. */
1424 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1428 r = sd_bus_message_read(reply, "o", &path);
1430 return bus_log_parse_error(r);
1432 r = sd_bus_call_method(
1434 "org.freedesktop.machine1",
1436 "org.freedesktop.machine1.Machine",
1442 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1449 static int reset_audit_loginuid(void) {
1450 _cleanup_free_ char *p = NULL;
1453 if (arg_share_system)
1456 r = read_one_line_file("/proc/self/loginuid", &p);
1460 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1464 /* Already reset? */
1465 if (streq(p, "4294967295"))
1468 r = write_string_file("/proc/self/loginuid", "4294967295");
1470 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1471 "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1472 "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1473 "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1474 "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1482 #define HASH_KEY SD_ID128_MAKE(c3,c4,f9,19,b5,57,b2,1c,e6,cf,14,27,03,9c,ee,a2)
1484 static int get_mac(struct ether_addr *mac) {
1491 l = strlen(arg_machine);
1492 sz = sizeof(sd_id128_t) + l;
1495 /* fetch some persistent data unique to the host */
1496 r = sd_id128_get_machine((sd_id128_t*) v);
1500 /* combine with some data unique (on this host) to this
1501 * container instance */
1502 memcpy(v + sizeof(sd_id128_t), arg_machine, l);
1504 /* Let's hash the host machine ID plus the container name. We
1505 * use a fixed, but originally randomly created hash key here. */
1506 siphash24(result, v, sz, HASH_KEY.bytes);
1508 assert_cc(ETH_ALEN <= sizeof(result));
1509 memcpy(mac->ether_addr_octet, result, ETH_ALEN);
1511 /* see eth_random_addr in the kernel */
1512 mac->ether_addr_octet[0] &= 0xfe; /* clear multicast bit */
1513 mac->ether_addr_octet[0] |= 0x02; /* set local assignment bit (IEEE802) */
1518 static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
1519 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1520 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1521 struct ether_addr mac;
1524 if (!arg_private_network)
1527 if (!arg_network_veth)
1530 /* Use two different interface name prefixes depending whether
1531 * we are in bridge mode or not. */
1532 if (arg_network_bridge)
1533 memcpy(iface_name, "vb-", 3);
1535 memcpy(iface_name, "ve-", 3);
1536 strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1540 log_error("Failed to generate predictable MAC address for host0");
1544 r = sd_rtnl_open(&rtnl, 0);
1546 log_error("Failed to connect to netlink: %s", strerror(-r));
1550 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1552 log_error("Failed to allocate netlink message: %s", strerror(-r));
1556 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1558 log_error("Failed to add netlink interface name: %s", strerror(-r));
1562 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1564 log_error("Failed to open netlink container: %s", strerror(-r));
1568 r = sd_rtnl_message_open_container_union(m, IFLA_INFO_DATA, "veth");
1570 log_error("Failed to open netlink container: %s", strerror(-r));
1574 r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1576 log_error("Failed to open netlink container: %s", strerror(-r));
1580 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1582 log_error("Failed to add netlink interface name: %s", strerror(-r));
1586 r = sd_rtnl_message_append_ether_addr(m, IFLA_ADDRESS, &mac);
1588 log_error("Failed to add netlink MAC address: %s", strerror(-r));
1592 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1594 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1598 r = sd_rtnl_message_close_container(m);
1600 log_error("Failed to close netlink container: %s", strerror(-r));
1604 r = sd_rtnl_message_close_container(m);
1606 log_error("Failed to close netlink container: %s", strerror(-r));
1610 r = sd_rtnl_message_close_container(m);
1612 log_error("Failed to close netlink container: %s", strerror(-r));
1616 r = sd_rtnl_call(rtnl, m, 0, NULL);
1618 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1625 static int setup_bridge(const char veth_name[]) {
1626 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1627 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1630 if (!arg_private_network)
1633 if (!arg_network_veth)
1636 if (!arg_network_bridge)
1639 bridge = (int) if_nametoindex(arg_network_bridge);
1641 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1645 r = sd_rtnl_open(&rtnl, 0);
1647 log_error("Failed to connect to netlink: %s", strerror(-r));
1651 r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
1653 log_error("Failed to allocate netlink message: %s", strerror(-r));
1657 r = sd_rtnl_message_link_set_flags(m, IFF_UP, IFF_UP);
1659 log_error("Failed to set IFF_UP flag: %s", strerror(-r));
1663 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1665 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1669 r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1671 log_error("Failed to add netlink master field: %s", strerror(-r));
1675 r = sd_rtnl_call(rtnl, m, 0, NULL);
1677 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1684 static int parse_interface(struct udev *udev, const char *name) {
1685 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1686 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1689 ifi = (int) if_nametoindex(name);
1691 log_error("Failed to resolve interface %s: %m", name);
1695 sprintf(ifi_str, "n%i", ifi);
1696 d = udev_device_new_from_device_id(udev, ifi_str);
1698 log_error("Failed to get udev device for interface %s: %m", name);
1702 if (udev_device_get_is_initialized(d) <= 0) {
1703 log_error("Network interface %s is not initialized yet.", name);
1710 static int move_network_interfaces(pid_t pid) {
1711 _cleanup_udev_unref_ struct udev *udev = NULL;
1712 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1716 if (!arg_private_network)
1719 if (strv_isempty(arg_network_interfaces))
1722 r = sd_rtnl_open(&rtnl, 0);
1724 log_error("Failed to connect to netlink: %s", strerror(-r));
1730 log_error("Failed to connect to udev.");
1734 STRV_FOREACH(i, arg_network_interfaces) {
1735 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1738 ifi = parse_interface(udev, *i);
1742 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
1744 log_error("Failed to allocate netlink message: %s", strerror(-r));
1748 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1750 log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1754 r = sd_rtnl_call(rtnl, m, 0, NULL);
1756 log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1764 static int setup_macvlan(pid_t pid) {
1765 _cleanup_udev_unref_ struct udev *udev = NULL;
1766 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1770 if (!arg_private_network)
1773 if (strv_isempty(arg_network_macvlan))
1776 r = sd_rtnl_open(&rtnl, 0);
1778 log_error("Failed to connect to netlink: %s", strerror(-r));
1784 log_error("Failed to connect to udev.");
1788 STRV_FOREACH(i, arg_network_macvlan) {
1789 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1790 _cleanup_free_ char *n = NULL;
1793 ifi = parse_interface(udev, *i);
1797 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1799 log_error("Failed to allocate netlink message: %s", strerror(-r));
1803 r = sd_rtnl_message_append_u32(m, IFLA_LINK, ifi);
1805 log_error("Failed to add netlink interface index: %s", strerror(-r));
1809 n = strappend("mv-", *i);
1813 strshorten(n, IFNAMSIZ-1);
1815 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, n);
1817 log_error("Failed to add netlink interface name: %s", strerror(-r));
1821 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1823 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1827 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1829 log_error("Failed to open netlink container: %s", strerror(-r));
1833 r = sd_rtnl_message_open_container_union(m, IFLA_INFO_DATA, "macvlan");
1835 log_error("Failed to open netlink container: %s", strerror(-r));
1839 r = sd_rtnl_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
1841 log_error("Failed to append macvlan mode: %s", strerror(-r));
1845 r = sd_rtnl_message_close_container(m);
1847 log_error("Failed to close netlink container: %s", strerror(-r));
1851 r = sd_rtnl_message_close_container(m);
1853 log_error("Failed to close netlink container: %s", strerror(-r));
1857 r = sd_rtnl_call(rtnl, m, 0, NULL);
1859 log_error("Failed to add new macvlan interfaces: %s", strerror(-r));
1867 static int audit_still_doesnt_work_in_containers(void) {
1870 scmp_filter_ctx seccomp;
1874 Audit is broken in containers, much of the userspace audit
1875 hookup will fail if running inside a container. We don't
1876 care and just turn off creation of audit sockets.
1878 This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1879 with EAFNOSUPPORT which audit userspace uses as indication
1880 that audit is disabled in the kernel.
1883 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1887 r = seccomp_add_secondary_archs(seccomp);
1889 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1893 r = seccomp_rule_add(
1895 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1898 SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1899 SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1901 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1905 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1907 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1911 r = seccomp_load(seccomp);
1913 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1916 seccomp_release(seccomp);
1924 static int setup_image(char **device_path, int *loop_nr) {
1925 struct loop_info64 info = {
1926 .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN
1928 _cleanup_close_ int fd = -1, control = -1, loop = -1;
1929 _cleanup_free_ char* loopdev = NULL;
1933 assert(device_path);
1936 fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1938 log_error("Failed to open %s: %m", arg_image);
1942 if (fstat(fd, &st) < 0) {
1943 log_error("Failed to stat %s: %m", arg_image);
1947 if (S_ISBLK(st.st_mode)) {
1950 p = strdup(arg_image);
1964 if (!S_ISREG(st.st_mode)) {
1965 log_error("%s is not a regular file or block device: %m", arg_image);
1969 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1971 log_error("Failed to open /dev/loop-control: %m");
1975 nr = ioctl(control, LOOP_CTL_GET_FREE);
1977 log_error("Failed to allocate loop device: %m");
1981 if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
1984 loop = open(loopdev, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1986 log_error("Failed to open loop device %s: %m", loopdev);
1990 if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
1991 log_error("Failed to set loopback file descriptor on %s: %m", loopdev);
1996 info.lo_flags |= LO_FLAGS_READ_ONLY;
1998 if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
1999 log_error("Failed to set loopback settings on %s: %m", loopdev);
2003 *device_path = loopdev;
2014 static int dissect_image(
2016 char **root_device, bool *root_device_rw,
2017 char **home_device, bool *home_device_rw,
2018 char **srv_device, bool *srv_device_rw,
2022 int home_nr = -1, root_nr = -1, secondary_root_nr = -1, srv_nr = -1;
2023 _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL;
2024 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
2025 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
2026 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
2027 _cleanup_udev_unref_ struct udev *udev = NULL;
2028 struct udev_list_entry *first, *item;
2029 bool home_rw = true, root_rw = true, secondary_root_rw = true, srv_rw = true;
2030 const char *pttype = NULL;
2036 assert(root_device);
2037 assert(home_device);
2041 b = blkid_new_probe();
2046 r = blkid_probe_set_device(b, fd, 0, 0);
2051 log_error("Failed to set device on blkid probe: %m");
2055 blkid_probe_enable_partitions(b, 1);
2056 blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
2059 r = blkid_do_safeprobe(b);
2060 if (r == -2 || r == 1) {
2061 log_error("Failed to identify any partition table on %s.\n"
2062 "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
2064 } else if (r != 0) {
2067 log_error("Failed to probe: %m");
2071 blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
2072 if (!streq_ptr(pttype, "gpt")) {
2073 log_error("Image %s does not carry a GUID Partition Table.\n"
2074 "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
2079 pl = blkid_probe_get_partitions(b);
2084 log_error("Failed to list partitions of %s", arg_image);
2092 if (fstat(fd, &st) < 0) {
2093 log_error("Failed to stat block device: %m");
2097 d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
2101 e = udev_enumerate_new(udev);
2105 r = udev_enumerate_add_match_parent(e, d);
2109 r = udev_enumerate_scan_devices(e);
2111 log_error("Failed to scan for partition devices of %s: %s", arg_image, strerror(-r));
2115 first = udev_enumerate_get_list_entry(e);
2116 udev_list_entry_foreach(item, first) {
2117 _cleanup_udev_device_unref_ struct udev_device *q;
2118 const char *stype, *node;
2119 unsigned long long flags;
2126 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
2131 log_error("Failed to get partition device of %s: %m", arg_image);
2135 qn = udev_device_get_devnum(q);
2139 if (st.st_rdev == qn)
2142 node = udev_device_get_devnode(q);
2146 pp = blkid_partlist_devno_to_partition(pl, qn);
2150 flags = blkid_partition_get_flags(pp);
2151 if (flags & GPT_FLAG_NO_AUTO)
2154 nr = blkid_partition_get_partno(pp);
2158 stype = blkid_partition_get_type_string(pp);
2162 if (sd_id128_from_string(stype, &type_id) < 0)
2165 if (sd_id128_equal(type_id, GPT_HOME)) {
2167 if (home && nr >= home_nr)
2171 home_rw = !(flags & GPT_FLAG_READ_ONLY);
2174 home = strdup(node);
2177 } else if (sd_id128_equal(type_id, GPT_SRV)) {
2179 if (srv && nr >= srv_nr)
2183 srv_rw = !(flags & GPT_FLAG_READ_ONLY);
2190 #ifdef GPT_ROOT_NATIVE
2191 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
2193 if (root && nr >= root_nr)
2197 root_rw = !(flags & GPT_FLAG_READ_ONLY);
2200 root = strdup(node);
2205 #ifdef GPT_ROOT_SECONDARY
2206 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
2208 if (secondary_root && nr >= secondary_root_nr)
2211 secondary_root_nr = nr;
2212 secondary_root_rw = !(flags & GPT_FLAG_READ_ONLY);
2215 free(secondary_root);
2216 secondary_root = strdup(node);
2217 if (!secondary_root)
2223 if (!root && !secondary_root) {
2224 log_error("Failed to identify root partition in disk image %s.\n"
2225 "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
2230 *root_device = root;
2233 *root_device_rw = root_rw;
2235 } else if (secondary_root) {
2236 *root_device = secondary_root;
2237 secondary_root = NULL;
2239 *root_device_rw = secondary_root_rw;
2244 *home_device = home;
2247 *home_device_rw = home_rw;
2254 *srv_device_rw = srv_rw;
2259 log_error("--image= is not supported, compiled without blkid support.");
2264 static int mount_device(const char *what, const char *where, const char *directory, bool rw) {
2266 _cleanup_blkid_free_probe_ blkid_probe b = NULL;
2267 const char *fstype, *p;
2277 p = strappenda(where, directory);
2282 b = blkid_new_probe_from_filename(what);
2286 log_error("Failed to allocate prober for %s: %m", what);
2290 blkid_probe_enable_superblocks(b, 1);
2291 blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
2294 r = blkid_do_safeprobe(b);
2295 if (r == -1 || r == 1) {
2296 log_error("Cannot determine file system type of %s", what);
2298 } else if (r != 0) {
2301 log_error("Failed to probe %s: %m", what);
2306 if (blkid_probe_lookup_value(b, "TYPE", &fstype, NULL) < 0) {
2309 log_error("Failed to determine file system type of %s", what);
2313 if (streq(fstype, "crypto_LUKS")) {
2314 log_error("nspawn currently does not support LUKS disk images.");
2318 if (mount(what, p, fstype, MS_NODEV|(rw ? 0 : MS_RDONLY), NULL) < 0) {
2319 log_error("Failed to mount %s: %m", what);
2325 log_error("--image= is not supported, compiled without blkid support.");
2330 static int mount_devices(
2332 const char *root_device, bool root_device_rw,
2333 const char *home_device, bool home_device_rw,
2334 const char *srv_device, bool srv_device_rw) {
2340 r = mount_device(root_device, arg_directory, NULL, root_device_rw);
2342 log_error("Failed to mount root directory: %s", strerror(-r));
2348 r = mount_device(home_device, arg_directory, "/home", home_device_rw);
2350 log_error("Failed to mount home directory: %s", strerror(-r));
2356 r = mount_device(srv_device, arg_directory, "/srv", srv_device_rw);
2358 log_error("Failed to mount server data directory: %s", strerror(-r));
2366 static void loop_remove(int nr, int *image_fd) {
2367 _cleanup_close_ int control = -1;
2372 if (image_fd && *image_fd >= 0) {
2373 ioctl(*image_fd, LOOP_CLR_FD);
2374 *image_fd = safe_close(*image_fd);
2377 control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2381 ioctl(control, LOOP_CTL_REMOVE, nr);
2384 static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
2392 if (pipe2(pipe_fds, O_CLOEXEC) < 0) {
2393 log_error("Failed to allocate pipe: %m");
2399 log_error("Failed to fork getent child: %m");
2401 } else if (pid == 0) {
2403 char *empty_env = NULL;
2405 if (dup3(pipe_fds[1], STDOUT_FILENO, 0) < 0)
2406 _exit(EXIT_FAILURE);
2408 if (pipe_fds[0] > 2)
2409 safe_close(pipe_fds[0]);
2410 if (pipe_fds[1] > 2)
2411 safe_close(pipe_fds[1]);
2413 nullfd = open("/dev/null", O_RDWR);
2415 _exit(EXIT_FAILURE);
2417 if (dup3(nullfd, STDIN_FILENO, 0) < 0)
2418 _exit(EXIT_FAILURE);
2420 if (dup3(nullfd, STDERR_FILENO, 0) < 0)
2421 _exit(EXIT_FAILURE);
2426 reset_all_signal_handlers();
2427 close_all_fds(NULL, 0);
2429 execle("/usr/bin/getent", "getent", database, key, NULL, &empty_env);
2430 execle("/bin/getent", "getent", database, key, NULL, &empty_env);
2431 _exit(EXIT_FAILURE);
2434 pipe_fds[1] = safe_close(pipe_fds[1]);
2441 static int change_uid_gid(char **_home) {
2442 char line[LINE_MAX], *w, *x, *state, *u, *g, *h;
2443 _cleanup_free_ uid_t *uids = NULL;
2444 _cleanup_free_ char *home = NULL;
2445 _cleanup_fclose_ FILE *f = NULL;
2446 _cleanup_close_ int fd = -1;
2447 unsigned n_uids = 0;
2456 if (!arg_user || streq(arg_user, "root") || streq(arg_user, "0")) {
2457 /* Reset everything fully to 0, just in case */
2459 if (setgroups(0, NULL) < 0) {
2460 log_error("setgroups() failed: %m");
2464 if (setresgid(0, 0, 0) < 0) {
2465 log_error("setregid() failed: %m");
2469 if (setresuid(0, 0, 0) < 0) {
2470 log_error("setreuid() failed: %m");
2478 /* First, get user credentials */
2479 fd = spawn_getent("passwd", arg_user, &pid);
2483 f = fdopen(fd, "r");
2488 if (!fgets(line, sizeof(line), f)) {
2491 log_error("Failed to resolve user %s.", arg_user);
2495 log_error("Failed to read from getent: %m");
2501 wait_for_terminate_and_warn("getent passwd", pid);
2503 x = strchr(line, ':');
2505 log_error("/etc/passwd entry has invalid user field.");
2509 u = strchr(x+1, ':');
2511 log_error("/etc/passwd entry has invalid password field.");
2518 log_error("/etc/passwd entry has invalid UID field.");
2526 log_error("/etc/passwd entry has invalid GID field.");
2531 h = strchr(x+1, ':');
2533 log_error("/etc/passwd entry has invalid GECOS field.");
2540 log_error("/etc/passwd entry has invalid home directory field.");
2546 r = parse_uid(u, &uid);
2548 log_error("Failed to parse UID of user.");
2552 r = parse_gid(g, &gid);
2554 log_error("Failed to parse GID of user.");
2562 /* Second, get group memberships */
2563 fd = spawn_getent("initgroups", arg_user, &pid);
2568 f = fdopen(fd, "r");
2573 if (!fgets(line, sizeof(line), f)) {
2575 log_error("Failed to resolve user %s.", arg_user);
2579 log_error("Failed to read from getent: %m");
2585 wait_for_terminate_and_warn("getent initgroups", pid);
2587 /* Skip over the username and subsequent separator whitespace */
2589 x += strcspn(x, WHITESPACE);
2590 x += strspn(x, WHITESPACE);
2592 FOREACH_WORD(w, l, x, state) {
2598 if (!GREEDY_REALLOC(uids, sz, n_uids+1))
2601 r = parse_uid(c, &uids[n_uids++]);
2603 log_error("Failed to parse group data from getent.");
2608 r = mkdir_parents(home, 0775);
2610 log_error("Failed to make home root directory: %s", strerror(-r));
2614 r = mkdir_safe(home, 0755, uid, gid);
2615 if (r < 0 && r != -EEXIST) {
2616 log_error("Failed to make home directory: %s", strerror(-r));
2620 fchown(STDIN_FILENO, uid, gid);
2621 fchown(STDOUT_FILENO, uid, gid);
2622 fchown(STDERR_FILENO, uid, gid);
2624 if (setgroups(n_uids, uids) < 0) {
2625 log_error("Failed to set auxiliary groups: %m");
2629 if (setresgid(gid, gid, gid) < 0) {
2630 log_error("setregid() failed: %m");
2634 if (setresuid(uid, uid, uid) < 0) {
2635 log_error("setreuid() failed: %m");
2648 * Return 0 in case the container is being rebooted, has been shut
2649 * down or exited successfully. On failures a negative value is
2652 * The status of the container "CONTAINER_TERMINATED" or
2653 * "CONTAINER_REBOOTED" will be saved in the container argument
2655 static int wait_for_container(pid_t pid, ContainerStatus *container) {
2659 r = wait_for_terminate(pid, &status);
2663 switch (status.si_code) {
2665 r = status.si_status;
2668 log_debug("Container %s exited successfully.",
2671 *container = CONTAINER_TERMINATED;
2673 log_error("Container %s failed with error code %i.",
2674 arg_machine, status.si_status);
2680 if (status.si_status == SIGINT) {
2682 log_info("Container %s has been shut down.",
2685 *container = CONTAINER_TERMINATED;
2688 } else if (status.si_status == SIGHUP) {
2690 log_info("Container %s is being rebooted.",
2693 *container = CONTAINER_REBOOTED;
2697 /* CLD_KILLED fallthrough */
2700 log_error("Container %s terminated by signal %s.",
2701 arg_machine, signal_to_string(status.si_status));
2706 log_error("Container %s failed due to unknown reason.",
2715 static void nop_handler(int sig) {}
2717 int main(int argc, char *argv[]) {
2719 _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
2720 bool root_device_rw = true, home_device_rw = true, srv_device_rw = true;
2721 _cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
2722 _cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 };
2723 _cleanup_fdset_free_ FDSet *fds = NULL;
2724 int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
2725 const char *console = NULL;
2726 char veth_name[IFNAMSIZ];
2727 bool secondary = false;
2728 sigset_t mask, mask_chld;
2731 log_parse_environment();
2734 k = parse_argv(argc, argv);
2743 if (arg_directory) {
2746 p = path_make_absolute_cwd(arg_directory);
2747 free(arg_directory);
2750 arg_directory = get_current_dir_name();
2752 if (!arg_directory) {
2753 log_error("Failed to determine path, please use -D.");
2756 path_kill_slashes(arg_directory);
2760 arg_machine = strdup(basename(arg_image ? arg_image : arg_directory));
2766 hostname_cleanup(arg_machine, false);
2767 if (isempty(arg_machine)) {
2768 log_error("Failed to determine machine name automatically, please use -M.");
2773 if (geteuid() != 0) {
2774 log_error("Need to be root.");
2778 if (sd_booted() <= 0) {
2779 log_error("Not running on a systemd system.");
2784 n_fd_passed = sd_listen_fds(false);
2785 if (n_fd_passed > 0) {
2786 k = fdset_new_listen_fds(&fds, false);
2788 log_error("Failed to collect file descriptors: %s", strerror(-k));
2792 fdset_close_others(fds);
2795 if (arg_directory) {
2796 if (path_equal(arg_directory, "/")) {
2797 log_error("Spawning container on root directory not supported.");
2802 if (path_is_os_tree(arg_directory) <= 0) {
2803 log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", arg_directory);
2809 p = strappenda(arg_directory,
2810 argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
2811 if (access(p, F_OK) < 0) {
2812 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
2818 char template[] = "/tmp/nspawn-root-XXXXXX";
2820 if (!mkdtemp(template)) {
2821 log_error("Failed to create temporary directory: %m");
2826 arg_directory = strdup(template);
2827 if (!arg_directory) {
2832 image_fd = setup_image(&device_path, &loop_nr);
2838 r = dissect_image(image_fd, &root_device, &root_device_rw, &home_device, &home_device_rw, &srv_device, &srv_device_rw, &secondary);
2843 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
2845 log_error("Failed to acquire pseudo tty: %m");
2849 console = ptsname(master);
2851 log_error("Failed to determine tty name: %m");
2856 log_info("Spawning container %s on %s.\nPress ^] three times within 1s to kill container.",
2857 arg_machine, arg_image ? arg_image : arg_directory);
2859 if (unlockpt(master) < 0) {
2860 log_error("Failed to unlock tty: %m");
2864 if (access("/dev/kdbus/control", F_OK) >= 0) {
2866 if (arg_share_system) {
2867 kdbus_domain = strdup("/dev/kdbus");
2868 if (!kdbus_domain) {
2875 ns = strappenda("machine-", arg_machine);
2876 kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
2878 log_debug("Failed to create kdbus domain: %s", strerror(-r));
2880 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
2884 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
2885 log_error("Failed to create kmsg socket pair: %m");
2889 sd_notify(0, "READY=1");
2891 assert_se(sigemptyset(&mask) == 0);
2892 assert_se(sigemptyset(&mask_chld) == 0);
2893 sigaddset(&mask_chld, SIGCHLD);
2894 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
2895 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
2898 ContainerStatus container_status;
2899 int eventfds[2] = { -1, -1 };
2900 struct sigaction sa = {
2901 .sa_handler = nop_handler,
2902 .sa_flags = SA_NOCLDSTOP,
2905 /* Child can be killed before execv(), so handle SIGCHLD
2906 * in order to interrupt parent's blocking calls and
2907 * give it a chance to call wait() and terminate. */
2908 r = sigprocmask(SIG_UNBLOCK, &mask_chld, NULL);
2910 log_error("Failed to change the signal mask: %m");
2914 r = sigaction(SIGCHLD, &sa, NULL);
2916 log_error("Failed to install SIGCHLD handler: %m");
2920 pid = clone_with_eventfd(SIGCHLD|CLONE_NEWNS|
2921 (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
2922 (arg_private_network ? CLONE_NEWNET : 0), eventfds);
2924 if (errno == EINVAL)
2925 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
2927 log_error("clone() failed: %m");
2935 _cleanup_free_ char *home = NULL;
2937 const char *envp[] = {
2938 "PATH=" DEFAULT_PATH_SPLIT_USR,
2939 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
2944 NULL, /* container_uuid */
2945 NULL, /* LISTEN_FDS */
2946 NULL, /* LISTEN_PID */
2951 envp[n_env] = strv_find_prefix(environ, "TERM=");
2955 master = safe_close(master);
2957 close_nointr(STDIN_FILENO);
2958 close_nointr(STDOUT_FILENO);
2959 close_nointr(STDERR_FILENO);
2961 kmsg_socket_pair[0] = safe_close(kmsg_socket_pair[0]);
2963 reset_all_signal_handlers();
2965 assert_se(sigemptyset(&mask) == 0);
2966 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2968 k = open_terminal(console, O_RDWR);
2969 if (k != STDIN_FILENO) {
2975 log_error("Failed to open console: %s", strerror(-k));
2979 if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
2980 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
2981 log_error("Failed to duplicate console: %m");
2986 log_error("setsid() failed: %m");
2990 if (reset_audit_loginuid() < 0)
2993 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
2994 log_error("PR_SET_PDEATHSIG failed: %m");
2998 /* Mark everything as slave, so that we still
2999 * receive mounts from the real root, but don't
3000 * propagate mounts to the real root. */
3001 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
3002 log_error("MS_SLAVE|MS_REC failed: %m");
3006 if (mount_devices(arg_directory,
3007 root_device, root_device_rw,
3008 home_device, home_device_rw,
3009 srv_device, srv_device_rw) < 0)
3012 r = base_filesystem_create(arg_directory);
3014 log_error("Failed to create the base filesystem: %s", strerror(-r));
3018 /* Turn directory into bind mount */
3019 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
3020 log_error("Failed to make bind mount: %m");
3024 if (arg_read_only) {
3025 k = bind_remount_recursive(arg_directory, true);
3027 log_error("Failed to make tree read-only: %s", strerror(-k));
3032 if (mount_all(arg_directory) < 0)
3035 if (copy_devnodes(arg_directory) < 0)
3038 if (setup_ptmx(arg_directory) < 0)
3041 dev_setup(arg_directory);
3043 if (audit_still_doesnt_work_in_containers() < 0)
3046 if (setup_dev_console(arg_directory, console) < 0)
3049 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
3052 kmsg_socket_pair[1] = safe_close(kmsg_socket_pair[1]);
3054 if (setup_boot_id(arg_directory) < 0)
3057 if (setup_timezone(arg_directory) < 0)
3060 if (setup_resolv_conf(arg_directory) < 0)
3063 if (setup_journal(arg_directory) < 0)
3066 if (mount_binds(arg_directory, arg_bind, false) < 0)
3069 if (mount_binds(arg_directory, arg_bind_ro, true) < 0)
3072 if (mount_tmpfs(arg_directory) < 0)
3075 if (setup_kdbus(arg_directory, kdbus_domain) < 0)
3078 /* Tell the parent that we are ready, and that
3079 * it can cgroupify us to that we lack access
3080 * to certain devices and resources. */
3081 r = eventfd_send_state(eventfds[1],
3082 EVENTFD_CHILD_SUCCEEDED);
3083 eventfds[1] = safe_close(eventfds[1]);
3087 if (chdir(arg_directory) < 0) {
3088 log_error("chdir(%s) failed: %m", arg_directory);
3092 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
3093 log_error("mount(MS_MOVE) failed: %m");
3097 if (chroot(".") < 0) {
3098 log_error("chroot() failed: %m");
3102 if (chdir("/") < 0) {
3103 log_error("chdir() failed: %m");
3109 if (arg_private_network)
3112 if (drop_capabilities() < 0) {
3113 log_error("drop_capabilities() failed: %m");
3117 r = change_uid_gid(&home);
3121 if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
3122 (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
3123 (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
3128 if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
3131 if (asprintf((char**)(envp + n_env++), "container_uuid=%s", id128_format_as_uuid(arg_uuid, as_uuid)) < 0) {
3137 if (fdset_size(fds) > 0) {
3138 k = fdset_cloexec(fds, false);
3140 log_error("Failed to unset O_CLOEXEC for file descriptors.");
3144 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
3145 (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
3153 if (arg_personality != 0xffffffffLU) {
3154 if (personality(arg_personality) < 0) {
3155 log_error("personality() failed: %m");
3158 } else if (secondary) {
3159 if (personality(PER_LINUX32) < 0) {
3160 log_error("personality() failed: %m");
3166 if (arg_selinux_context)
3167 if (setexeccon((security_context_t) arg_selinux_context) < 0) {
3168 log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
3173 if (!strv_isempty(arg_setenv)) {
3176 n = strv_env_merge(2, envp, arg_setenv);
3184 env_use = (char**) envp;
3186 /* Wait until the parent is ready with the setup, too... */
3187 r = eventfd_parent_succeeded(eventfds[0]);
3188 eventfds[0] = safe_close(eventfds[0]);
3196 /* Automatically search for the init system */
3198 l = 1 + argc - optind;
3199 a = newa(char*, l + 1);
3200 memcpy(a + 1, argv + optind, l * sizeof(char*));
3202 a[0] = (char*) "/usr/lib/systemd/systemd";
3203 execve(a[0], a, env_use);
3205 a[0] = (char*) "/lib/systemd/systemd";
3206 execve(a[0], a, env_use);
3208 a[0] = (char*) "/sbin/init";
3209 execve(a[0], a, env_use);
3210 } else if (argc > optind)
3211 execvpe(argv[optind], argv + optind, env_use);
3213 chdir(home ? home : "/root");
3214 execle("/bin/bash", "-bash", NULL, env_use);
3215 execle("/bin/sh", "-sh", NULL, env_use);
3218 log_error("execv() failed: %m");
3221 /* Tell the parent that the setup failed, so he
3222 * can clean up resources and terminate. */
3223 if (eventfds[1] != -1)
3224 eventfd_send_state(eventfds[1],
3225 EVENTFD_CHILD_FAILED);
3226 _exit(EXIT_FAILURE);
3232 /* Wait for the child event:
3233 * If EVENTFD_CHILD_FAILED, the child will terminate soon.
3234 * If EVENTFD_CHILD_SUCCEEDED, the child is reporting that
3235 * it is ready with all it needs to do with priviliges.
3236 * After we got the notification we can make the process
3237 * join its cgroup which might limit what it can do */
3238 r = eventfd_child_succeeded(eventfds[1]);
3239 eventfds[1] = safe_close(eventfds[1]);
3241 goto check_container_status;
3243 r = register_machine(pid);
3247 r = move_network_interfaces(pid);
3251 r = setup_veth(pid, veth_name);
3255 r = setup_bridge(veth_name);
3259 r = setup_macvlan(pid);
3263 /* Block SIGCHLD here, before notifying child.
3264 * process_pty() will handle it with the other signals. */
3265 r = sigprocmask(SIG_BLOCK, &mask_chld, NULL);
3269 /* Reset signal to default */
3270 r = default_signals(SIGCHLD, -1);
3274 /* Notify the child that the parent is ready with all
3275 * its setup, and that the child can now hand over
3276 * control to the code to run inside the container. */
3277 r = eventfd_send_state(eventfds[0],
3278 EVENTFD_PARENT_SUCCEEDED);
3279 eventfds[0] = safe_close(eventfds[0]);
3283 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
3292 /* Kill if it is not dead yet anyway */
3293 terminate_machine(pid);
3295 check_container_status:
3296 /* Redundant, but better safe than sorry */
3299 r = wait_for_container(pid, &container_status);
3305 } else if (container_status == CONTAINER_TERMINATED)
3308 /* CONTAINER_REBOOTED, loop again */
3312 loop_remove(loop_nr, &image_fd);
3317 free(arg_directory);
3320 strv_free(arg_setenv);
3321 strv_free(arg_network_interfaces);
3322 strv_free(arg_network_macvlan);
3323 strv_free(arg_bind);
3324 strv_free(arg_bind_ro);
3325 strv_free(arg_tmpfs);