1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
37 #include <sys/signalfd.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
49 #include <selinux/selinux.h>
56 #include "sd-daemon.h"
66 #include "cgroup-util.h"
68 #include "path-util.h"
69 #include "loopback-setup.h"
70 #include "dev-setup.h"
75 #include "bus-error.h"
77 #include "bus-kernel.h"
80 #include "rtnl-util.h"
81 #include "udev-util.h"
84 #include "seccomp-util.h"
87 typedef enum LinkJournal {
94 static char *arg_directory = NULL;
95 static char *arg_user = NULL;
96 static sd_id128_t arg_uuid = {};
97 static char *arg_machine = NULL;
98 static char *arg_selinux_context = NULL;
99 static char *arg_selinux_apifs_context = NULL;
100 static const char *arg_slice = NULL;
101 static bool arg_private_network = false;
102 static bool arg_read_only = false;
103 static bool arg_boot = false;
104 static LinkJournal arg_link_journal = LINK_AUTO;
105 static uint64_t arg_retain =
106 (1ULL << CAP_CHOWN) |
107 (1ULL << CAP_DAC_OVERRIDE) |
108 (1ULL << CAP_DAC_READ_SEARCH) |
109 (1ULL << CAP_FOWNER) |
110 (1ULL << CAP_FSETID) |
111 (1ULL << CAP_IPC_OWNER) |
113 (1ULL << CAP_LEASE) |
114 (1ULL << CAP_LINUX_IMMUTABLE) |
115 (1ULL << CAP_NET_BIND_SERVICE) |
116 (1ULL << CAP_NET_BROADCAST) |
117 (1ULL << CAP_NET_RAW) |
118 (1ULL << CAP_SETGID) |
119 (1ULL << CAP_SETFCAP) |
120 (1ULL << CAP_SETPCAP) |
121 (1ULL << CAP_SETUID) |
122 (1ULL << CAP_SYS_ADMIN) |
123 (1ULL << CAP_SYS_CHROOT) |
124 (1ULL << CAP_SYS_NICE) |
125 (1ULL << CAP_SYS_PTRACE) |
126 (1ULL << CAP_SYS_TTY_CONFIG) |
127 (1ULL << CAP_SYS_RESOURCE) |
128 (1ULL << CAP_SYS_BOOT) |
129 (1ULL << CAP_AUDIT_WRITE) |
130 (1ULL << CAP_AUDIT_CONTROL) |
132 static char **arg_bind = NULL;
133 static char **arg_bind_ro = NULL;
134 static char **arg_setenv = NULL;
135 static bool arg_quiet = false;
136 static bool arg_share_system = false;
137 static bool arg_register = true;
138 static bool arg_keep_unit = false;
139 static char **arg_network_interfaces = NULL;
140 static bool arg_network_veth = false;
141 static char *arg_network_bridge = NULL;
142 static unsigned long arg_personality = 0xffffffffLU;
144 static int help(void) {
146 printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
147 "Spawn a minimal namespace container for debugging, testing and building.\n\n"
148 " -h --help Show this help\n"
149 " --version Print version string\n"
150 " -q --quiet Do not show status information\n"
151 " -D --directory=NAME Root directory for the container\n"
152 " -b --boot Boot up full system (i.e. invoke init)\n"
153 " -u --user=USER Run the command under specified user or uid\n"
154 " -M --machine=NAME Set the machine name for the container\n"
155 " --uuid=UUID Set a specific machine UUID for the container\n"
156 " -S --slice=SLICE Place the container in the specified slice\n"
157 " --private-network Disable network in container\n"
158 " --network-interface=INTERFACE\n"
159 " Assign an existing network interface to the\n"
161 " --network-veth Add a virtual ethernet connection between host\n"
163 " --network-bridge=INTERFACE\n"
164 " Add a virtual ethernet connection between host\n"
165 " and container and add it to an existing bridge on\n"
167 " -Z --selinux-context=SECLABEL\n"
168 " Set the SELinux security context to be used by\n"
169 " processes in the container\n"
170 " -L --selinux-apifs-context=SECLABEL\n"
171 " Set the SELinux security context to be used by\n"
172 " API/tmpfs file systems in the container\n"
173 " --capability=CAP In addition to the default, retain specified\n"
175 " --drop-capability=CAP Drop the specified capability from the default set\n"
176 " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
177 " -j Equivalent to --link-journal=host\n"
178 " --read-only Mount the root directory read-only\n"
179 " --bind=PATH[:PATH] Bind mount a file or directory from the host into\n"
181 " --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n"
182 " --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
183 " --share-system Share system namespaces with host\n"
184 " --register=BOOLEAN Register container as machine\n"
185 " --keep-unit Do not register a scope for the machine, reuse\n"
186 " the service unit nspawn is running in\n",
187 program_invocation_short_name);
192 static int parse_argv(int argc, char *argv[]) {
208 ARG_NETWORK_INTERFACE,
214 static const struct option options[] = {
215 { "help", no_argument, NULL, 'h' },
216 { "version", no_argument, NULL, ARG_VERSION },
217 { "directory", required_argument, NULL, 'D' },
218 { "user", required_argument, NULL, 'u' },
219 { "private-network", no_argument, NULL, ARG_PRIVATE_NETWORK },
220 { "boot", no_argument, NULL, 'b' },
221 { "uuid", required_argument, NULL, ARG_UUID },
222 { "read-only", no_argument, NULL, ARG_READ_ONLY },
223 { "capability", required_argument, NULL, ARG_CAPABILITY },
224 { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
225 { "link-journal", required_argument, NULL, ARG_LINK_JOURNAL },
226 { "bind", required_argument, NULL, ARG_BIND },
227 { "bind-ro", required_argument, NULL, ARG_BIND_RO },
228 { "machine", required_argument, NULL, 'M' },
229 { "slice", required_argument, NULL, 'S' },
230 { "setenv", required_argument, NULL, ARG_SETENV },
231 { "selinux-context", required_argument, NULL, 'Z' },
232 { "selinux-apifs-context", required_argument, NULL, 'L' },
233 { "quiet", no_argument, NULL, 'q' },
234 { "share-system", no_argument, NULL, ARG_SHARE_SYSTEM },
235 { "register", required_argument, NULL, ARG_REGISTER },
236 { "keep-unit", no_argument, NULL, ARG_KEEP_UNIT },
237 { "network-interface", required_argument, NULL, ARG_NETWORK_INTERFACE },
238 { "network-veth", no_argument, NULL, ARG_NETWORK_VETH },
239 { "network-bridge", required_argument, NULL, ARG_NETWORK_BRIDGE },
240 { "personality", required_argument, NULL, ARG_PERSONALITY },
245 uint64_t plus = 0, minus = 0;
250 while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
258 puts(PACKAGE_STRING);
259 puts(SYSTEMD_FEATURES);
264 arg_directory = canonicalize_file_name(optarg);
265 if (!arg_directory) {
266 log_error("Invalid root directory: %m");
274 arg_user = strdup(optarg);
280 case ARG_NETWORK_BRIDGE:
281 arg_network_bridge = strdup(optarg);
282 if (!arg_network_bridge)
287 case ARG_NETWORK_VETH:
288 arg_network_veth = true;
289 arg_private_network = true;
292 case ARG_NETWORK_INTERFACE:
293 if (strv_push(&arg_network_interfaces, optarg) < 0)
298 case ARG_PRIVATE_NETWORK:
299 arg_private_network = true;
307 r = sd_id128_from_string(optarg, &arg_uuid);
309 log_error("Invalid UUID: %s", optarg);
315 arg_slice = strdup(optarg);
322 if (isempty(optarg)) {
327 if (!hostname_is_valid(optarg)) {
328 log_error("Invalid machine name: %s", optarg);
333 arg_machine = strdup(optarg);
341 arg_selinux_context = optarg;
345 arg_selinux_apifs_context = optarg;
349 arg_read_only = true;
353 case ARG_DROP_CAPABILITY: {
357 FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
358 _cleanup_free_ char *t;
361 t = strndup(word, length);
365 if (streq(t, "all")) {
366 if (c == ARG_CAPABILITY)
367 plus = (uint64_t) -1;
369 minus = (uint64_t) -1;
371 if (cap_from_name(t, &cap) < 0) {
372 log_error("Failed to parse capability %s.", t);
376 if (c == ARG_CAPABILITY)
377 plus |= 1ULL << (uint64_t) cap;
379 minus |= 1ULL << (uint64_t) cap;
387 arg_link_journal = LINK_GUEST;
390 case ARG_LINK_JOURNAL:
391 if (streq(optarg, "auto"))
392 arg_link_journal = LINK_AUTO;
393 else if (streq(optarg, "no"))
394 arg_link_journal = LINK_NO;
395 else if (streq(optarg, "guest"))
396 arg_link_journal = LINK_GUEST;
397 else if (streq(optarg, "host"))
398 arg_link_journal = LINK_HOST;
400 log_error("Failed to parse link journal mode %s", optarg);
408 _cleanup_free_ char *a = NULL, *b = NULL;
412 x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
414 e = strchr(optarg, ':');
416 a = strndup(optarg, e - optarg);
426 if (!path_is_absolute(a) || !path_is_absolute(b)) {
427 log_error("Invalid bind mount specification: %s", optarg);
431 r = strv_extend(x, a);
435 r = strv_extend(x, b);
445 if (!env_assignment_is_valid(optarg)) {
446 log_error("Environment variable assignment '%s' is not valid.", optarg);
450 n = strv_env_set(arg_setenv, optarg);
454 strv_free(arg_setenv);
463 case ARG_SHARE_SYSTEM:
464 arg_share_system = true;
468 r = parse_boolean(optarg);
470 log_error("Failed to parse --register= argument: %s", optarg);
478 arg_keep_unit = true;
481 case ARG_PERSONALITY:
483 arg_personality = personality_from_string(optarg);
484 if (arg_personality == 0xffffffffLU) {
485 log_error("Unknown or unsupported personality '%s'.", optarg);
495 assert_not_reached("Unhandled option");
499 if (arg_share_system)
500 arg_register = false;
502 if (arg_boot && arg_share_system) {
503 log_error("--boot and --share-system may not be combined.");
507 if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
508 log_error("--keep-unit may not be used when invoked from a user session.");
512 arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
517 static int mount_all(const char *dest) {
519 typedef struct MountPoint {
528 static const MountPoint mount_table[] = {
529 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
530 { "/proc/sys", "/proc/sys", NULL, NULL, MS_BIND, true }, /* Bind mount first */
531 { NULL, "/proc/sys", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, true }, /* Then, make it r/o */
532 { "sysfs", "/sys", "sysfs", NULL, MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
533 { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
534 { "devpts", "/dev/pts", "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
535 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
536 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
538 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND, false }, /* Bind mount first */
539 { NULL, "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false }, /* Then, make it r/o */
546 for (k = 0; k < ELEMENTSOF(mount_table); k++) {
547 _cleanup_free_ char *where = NULL;
549 _cleanup_free_ char *options = NULL;
554 where = strjoin(dest, "/", mount_table[k].where, NULL);
558 t = path_is_mount_point(where, true);
560 log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
568 /* Skip this entry if it is not a remount. */
569 if (mount_table[k].what && t > 0)
572 mkdir_p(where, 0755);
575 if (arg_selinux_apifs_context &&
576 (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
577 options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
584 o = mount_table[k].options;
587 if (mount(mount_table[k].what,
590 mount_table[k].flags,
592 mount_table[k].fatal) {
594 log_error("mount(%s) failed: %m", where);
604 static int mount_binds(const char *dest, char **l, unsigned long flags) {
607 STRV_FOREACH_PAIR(x, y, l) {
609 struct stat source_st, dest_st;
612 if (stat(*x, &source_st) < 0) {
613 log_error("failed to stat %s: %m", *x);
617 where = strappenda(dest, *y);
618 r = stat(where, &dest_st);
620 if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
621 log_error("The file types of %s and %s do not match. Refusing bind mount",
625 } else if (errno == ENOENT) {
626 r = mkdir_parents_label(where, 0755);
628 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
632 log_error("Failed to bind mount %s: %s", *x, strerror(errno));
635 /* Create the mount point, but be conservative -- refuse to create block
636 * and char devices. */
637 if (S_ISDIR(source_st.st_mode))
638 mkdir_label(where, 0755);
639 else if (S_ISFIFO(source_st.st_mode))
641 else if (S_ISSOCK(source_st.st_mode))
642 mknod(where, 0644 | S_IFSOCK, 0);
643 else if (S_ISREG(source_st.st_mode))
646 log_error("Refusing to create mountpoint for file: %s", *x);
650 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
651 log_error("mount(%s) failed: %m", where);
655 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
656 log_error("mount(%s) failed: %m", where);
664 static int setup_timezone(const char *dest) {
665 _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
671 /* Fix the timezone, if possible */
672 r = readlink_malloc("/etc/localtime", &p);
674 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
678 z = path_startswith(p, "../usr/share/zoneinfo/");
680 z = path_startswith(p, "/usr/share/zoneinfo/");
682 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
686 where = strappend(dest, "/etc/localtime");
690 r = readlink_malloc(where, &q);
692 y = path_startswith(q, "../usr/share/zoneinfo/");
694 y = path_startswith(q, "/usr/share/zoneinfo/");
697 /* Already pointing to the right place? Then do nothing .. */
698 if (y && streq(y, z))
702 check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
706 if (access(check, F_OK) < 0) {
707 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
711 what = strappend("../usr/share/zoneinfo/", z);
716 if (symlink(what, where) < 0) {
717 log_error("Failed to correct timezone of container: %m");
724 static int setup_resolv_conf(const char *dest) {
725 char _cleanup_free_ *where = NULL;
729 if (arg_private_network)
732 /* Fix resolv.conf, if possible */
733 where = strappend(dest, "/etc/resolv.conf");
737 /* We don't really care for the results of this really. If it
738 * fails, it fails, but meh... */
739 copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
744 static int setup_boot_id(const char *dest) {
745 _cleanup_free_ char *from = NULL, *to = NULL;
752 if (arg_share_system)
755 /* Generate a new randomized boot ID, so that each boot-up of
756 * the container gets a new one */
758 from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
759 to = strappend(dest, "/proc/sys/kernel/random/boot_id");
763 r = sd_id128_randomize(&rnd);
765 log_error("Failed to generate random boot id: %s", strerror(-r));
769 snprintf(as_uuid, sizeof(as_uuid),
770 "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
771 SD_ID128_FORMAT_VAL(rnd));
772 char_array_0(as_uuid);
774 r = write_string_file(from, as_uuid);
776 log_error("Failed to write boot id: %s", strerror(-r));
780 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
781 log_error("Failed to bind mount boot id: %m");
783 } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
784 log_warning("Failed to make boot id read-only: %m");
790 static int copy_devnodes(const char *dest) {
792 static const char devnodes[] =
802 _cleanup_umask_ mode_t u;
808 NULSTR_FOREACH(d, devnodes) {
809 _cleanup_free_ char *from = NULL, *to = NULL;
812 from = strappend("/dev/", d);
813 to = strjoin(dest, "/dev/", d, NULL);
817 if (stat(from, &st) < 0) {
819 if (errno != ENOENT) {
820 log_error("Failed to stat %s: %m", from);
824 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
826 log_error("%s is not a char or block device, cannot copy", from);
829 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
831 log_error("mknod(%s) failed: %m", dest);
839 static int setup_ptmx(const char *dest) {
840 _cleanup_free_ char *p = NULL;
842 p = strappend(dest, "/dev/ptmx");
846 if (symlink("pts/ptmx", p) < 0) {
847 log_error("Failed to create /dev/ptmx symlink: %m");
854 static int setup_dev_console(const char *dest, const char *console) {
856 _cleanup_free_ char *to = NULL;
858 _cleanup_umask_ mode_t u;
865 if (stat(console, &st) < 0) {
866 log_error("Failed to stat %s: %m", console);
869 } else if (!S_ISCHR(st.st_mode)) {
870 log_error("/dev/console is not a char device");
874 r = chmod_and_chown(console, 0600, 0, 0);
876 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
880 if (asprintf(&to, "%s/dev/console", dest) < 0)
883 /* We need to bind mount the right tty to /dev/console since
884 * ptys can only exist on pts file systems. To have something
885 * to bind mount things on we create a device node first, that
886 * has the right major/minor (note that the major minor
887 * doesn't actually matter here, since we mount it over
890 if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
891 log_error("mknod() for /dev/console failed: %m");
895 if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
896 log_error("Bind mount for /dev/console failed: %m");
903 static int setup_kmsg(const char *dest, int kmsg_socket) {
904 _cleanup_free_ char *from = NULL, *to = NULL;
906 _cleanup_umask_ mode_t u;
908 struct cmsghdr cmsghdr;
909 uint8_t buf[CMSG_SPACE(sizeof(int))];
912 .msg_control = &control,
913 .msg_controllen = sizeof(control),
915 struct cmsghdr *cmsg;
918 assert(kmsg_socket >= 0);
922 /* We create the kmsg FIFO as /dev/kmsg, but immediately
923 * delete it after bind mounting it to /proc/kmsg. While FIFOs
924 * on the reading side behave very similar to /proc/kmsg,
925 * their writing side behaves differently from /dev/kmsg in
926 * that writing blocks when nothing is reading. In order to
927 * avoid any problems with containers deadlocking due to this
928 * we simply make /dev/kmsg unavailable to the container. */
929 if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
930 asprintf(&to, "%s/proc/kmsg", dest) < 0)
933 if (mkfifo(from, 0600) < 0) {
934 log_error("mkfifo() for /dev/kmsg failed: %m");
938 r = chmod_and_chown(from, 0600, 0, 0);
940 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
944 if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
945 log_error("Bind mount for /proc/kmsg failed: %m");
949 fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
951 log_error("Failed to open fifo: %m");
955 cmsg = CMSG_FIRSTHDR(&mh);
956 cmsg->cmsg_level = SOL_SOCKET;
957 cmsg->cmsg_type = SCM_RIGHTS;
958 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
959 memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
961 mh.msg_controllen = cmsg->cmsg_len;
963 /* Store away the fd in the socket, so that it stays open as
964 * long as we run the child */
965 k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
966 close_nointr_nofail(fd);
969 log_error("Failed to send FIFO fd: %m");
973 /* And now make the FIFO unavailable as /dev/kmsg... */
978 static int setup_hostname(void) {
980 if (arg_share_system)
983 if (sethostname(arg_machine, strlen(arg_machine)) < 0)
989 static int setup_journal(const char *directory) {
990 sd_id128_t machine_id, this_id;
991 _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
995 p = strappend(directory, "/etc/machine-id");
999 r = read_one_line_file(p, &b);
1000 if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1003 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1008 if (isempty(id) && arg_link_journal == LINK_AUTO)
1011 /* Verify validity */
1012 r = sd_id128_from_string(id, &machine_id);
1014 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1018 r = sd_id128_get_machine(&this_id);
1020 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1024 if (sd_id128_equal(machine_id, this_id)) {
1025 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1026 "Host and machine ids are equal (%s): refusing to link journals", id);
1027 if (arg_link_journal == LINK_AUTO)
1033 if (arg_link_journal == LINK_NO)
1037 p = strappend("/var/log/journal/", id);
1038 q = strjoin(directory, "/var/log/journal/", id, NULL);
1042 if (path_is_mount_point(p, false) > 0) {
1043 if (arg_link_journal != LINK_AUTO) {
1044 log_error("%s: already a mount point, refusing to use for journal", p);
1051 if (path_is_mount_point(q, false) > 0) {
1052 if (arg_link_journal != LINK_AUTO) {
1053 log_error("%s: already a mount point, refusing to use for journal", q);
1060 r = readlink_and_make_absolute(p, &d);
1062 if ((arg_link_journal == LINK_GUEST ||
1063 arg_link_journal == LINK_AUTO) &&
1066 r = mkdir_p(q, 0755);
1068 log_warning("failed to create directory %s: %m", q);
1072 if (unlink(p) < 0) {
1073 log_error("Failed to remove symlink %s: %m", p);
1076 } else if (r == -EINVAL) {
1078 if (arg_link_journal == LINK_GUEST &&
1081 if (errno == ENOTDIR) {
1082 log_error("%s already exists and is neither a symlink nor a directory", p);
1085 log_error("Failed to remove %s: %m", p);
1089 } else if (r != -ENOENT) {
1090 log_error("readlink(%s) failed: %m", p);
1094 if (arg_link_journal == LINK_GUEST) {
1096 if (symlink(q, p) < 0) {
1097 log_error("Failed to symlink %s to %s: %m", q, p);
1101 r = mkdir_p(q, 0755);
1103 log_warning("failed to create directory %s: %m", q);
1107 if (arg_link_journal == LINK_HOST) {
1108 r = mkdir_p(p, 0755);
1110 log_error("Failed to create %s: %m", p);
1114 } else if (access(p, F_OK) < 0)
1117 if (dir_is_empty(q) == 0) {
1118 log_error("%s not empty.", q);
1122 r = mkdir_p(q, 0755);
1124 log_error("Failed to create %s: %m", q);
1128 if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1129 log_error("Failed to bind mount journal from host into guest: %m");
1136 static int setup_kdbus(const char *dest, const char *path) {
1142 p = strappenda(dest, "/dev/kdbus");
1143 if (mkdir(p, 0755) < 0) {
1144 log_error("Failed to create kdbus path: %m");
1148 if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1149 log_error("Failed to mount kdbus domain path: %m");
1156 static int drop_capabilities(void) {
1157 return capability_bounding_set_drop(~arg_retain, false);
1160 static int register_machine(pid_t pid) {
1161 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1162 _cleanup_bus_unref_ sd_bus *bus = NULL;
1168 r = sd_bus_default_system(&bus);
1170 log_error("Failed to open system bus: %s", strerror(-r));
1174 if (arg_keep_unit) {
1175 r = sd_bus_call_method(
1177 "org.freedesktop.machine1",
1178 "/org/freedesktop/machine1",
1179 "org.freedesktop.machine1.Manager",
1185 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1189 strempty(arg_directory));
1191 _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
1193 r = sd_bus_message_new_method_call(
1196 "org.freedesktop.machine1",
1197 "/org/freedesktop/machine1",
1198 "org.freedesktop.machine1.Manager",
1201 log_error("Failed to create message: %s", strerror(-r));
1205 r = sd_bus_message_append(
1209 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1213 strempty(arg_directory));
1215 log_error("Failed to append message arguments: %s", strerror(-r));
1219 r = sd_bus_message_open_container(m, 'a', "(sv)");
1221 log_error("Failed to open container: %s", strerror(-r));
1225 if (!isempty(arg_slice)) {
1226 r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
1228 log_error("Failed to append slice: %s", strerror(-r));
1233 r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
1235 log_error("Failed to add device policy: %s", strerror(-r));
1239 r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
1240 /* Allow the container to
1241 * access and create the API
1242 * device nodes, so that
1243 * PrivateDevices= in the
1244 * container can work
1249 "/dev/random", "rwm",
1250 "/dev/urandom", "rwm",
1252 /* Allow the container
1253 * access to ptys. However,
1255 * container to ever create
1256 * these device nodes. */
1257 "/dev/pts/ptmx", "rw",
1260 log_error("Failed to add device whitelist: %s", strerror(-r));
1264 r = sd_bus_message_close_container(m);
1266 log_error("Failed to close container: %s", strerror(-r));
1270 r = sd_bus_call(bus, m, 0, &error, NULL);
1274 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1281 static int terminate_machine(pid_t pid) {
1282 _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1283 _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1284 _cleanup_bus_unref_ sd_bus *bus = NULL;
1291 r = sd_bus_default_system(&bus);
1293 log_error("Failed to open system bus: %s", strerror(-r));
1297 r = sd_bus_call_method(
1299 "org.freedesktop.machine1",
1300 "/org/freedesktop/machine1",
1301 "org.freedesktop.machine1.Manager",
1308 /* Note that the machine might already have been
1309 * cleaned up automatically, hence don't consider it a
1310 * failure if we cannot get the machine object. */
1311 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1315 r = sd_bus_message_read(reply, "o", &path);
1317 return bus_log_parse_error(r);
1319 r = sd_bus_call_method(
1321 "org.freedesktop.machine1",
1323 "org.freedesktop.machine1.Machine",
1329 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1336 static int reset_audit_loginuid(void) {
1337 _cleanup_free_ char *p = NULL;
1340 if (arg_share_system)
1343 r = read_one_line_file("/proc/self/loginuid", &p);
1347 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1351 /* Already reset? */
1352 if (streq(p, "4294967295"))
1355 r = write_string_file("/proc/self/loginuid", "4294967295");
1357 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1358 "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1359 "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1360 "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1361 "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1369 static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
1370 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1371 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1374 if (!arg_private_network)
1377 if (!arg_network_veth)
1380 /* Use two different interface name prefixes depending whether
1381 * we are in bridge mode or not. */
1382 if (arg_network_bridge)
1383 memcpy(iface_name, "vb-", 3);
1385 memcpy(iface_name, "ve-", 3);
1387 strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1389 r = sd_rtnl_open(&rtnl, 0);
1391 log_error("Failed to connect to netlink: %s", strerror(-r));
1395 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1397 log_error("Failed to allocate netlink message: %s", strerror(-r));
1401 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1403 log_error("Failed to add netlink interface name: %s", strerror(-r));
1407 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1409 log_error("Failed to open netlink container: %s", strerror(-r));
1413 r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1415 log_error("Failed to append netlink kind: %s", strerror(-r));
1419 r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1421 log_error("Failed to open netlink container: %s", strerror(-r));
1425 r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1427 log_error("Failed to open netlink container: %s", strerror(-r));
1431 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1433 log_error("Failed to add netlink interface name: %s", strerror(-r));
1437 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1439 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1443 r = sd_rtnl_message_close_container(m);
1445 log_error("Failed to close netlink container: %s", strerror(-r));
1449 r = sd_rtnl_message_close_container(m);
1451 log_error("Failed to close netlink container: %s", strerror(-r));
1455 r = sd_rtnl_message_close_container(m);
1457 log_error("Failed to close netlink container: %s", strerror(-r));
1461 r = sd_rtnl_call(rtnl, m, 0, NULL);
1463 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1470 static int setup_bridge(const char veth_name[]) {
1471 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1472 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1475 if (!arg_private_network)
1478 if (!arg_network_veth)
1481 if (!arg_network_bridge)
1484 bridge = (int) if_nametoindex(arg_network_bridge);
1486 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1490 r = sd_rtnl_open(&rtnl, 0);
1492 log_error("Failed to connect to netlink: %s", strerror(-r));
1496 r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
1498 log_error("Failed to allocate netlink message: %s", strerror(-r));
1502 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1504 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1508 r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1510 log_error("Failed to add netlink master field: %s", strerror(-r));
1514 r = sd_rtnl_call(rtnl, m, 0, NULL);
1516 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1523 static int move_network_interfaces(pid_t pid) {
1524 _cleanup_udev_unref_ struct udev *udev = NULL;
1525 _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1529 if (!arg_private_network)
1532 if (strv_isempty(arg_network_interfaces))
1535 r = sd_rtnl_open(&rtnl, 0);
1537 log_error("Failed to connect to netlink: %s", strerror(-r));
1543 log_error("Failed to connect to udev.");
1547 STRV_FOREACH(i, arg_network_interfaces) {
1548 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1549 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1550 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1553 ifi = (int) if_nametoindex(*i);
1555 log_error("Failed to resolve interface %s: %m", *i);
1559 sprintf(ifi_str, "n%i", ifi);
1560 d = udev_device_new_from_device_id(udev, ifi_str);
1562 log_error("Failed to get udev device for interface %s: %m", *i);
1566 if (udev_device_get_is_initialized(d) <= 0) {
1567 log_error("Network interface %s is not initialized yet.", *i);
1571 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
1573 log_error("Failed to allocate netlink message: %s", strerror(-r));
1577 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1579 log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1583 r = sd_rtnl_call(rtnl, m, 0, NULL);
1585 log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1593 static int audit_still_doesnt_work_in_containers(void) {
1596 scmp_filter_ctx seccomp;
1600 Audit is broken in containers, much of the userspace audit
1601 hookup will fail if running inside a container. We don't
1602 care and just turn off creation of audit sockets.
1604 This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1605 with EAFNOSUPPORT which audit userspace uses as indication
1606 that audit is disabled in the kernel.
1609 seccomp = seccomp_init(SCMP_ACT_ALLOW);
1613 r = seccomp_add_secondary_archs(seccomp);
1614 if (r < 0 && r != -EEXIST) {
1615 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1619 r = seccomp_rule_add(
1621 SCMP_ACT_ERRNO(EAFNOSUPPORT),
1624 SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1625 SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1627 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1631 r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1633 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1637 r = seccomp_load(seccomp);
1639 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1642 seccomp_release(seccomp);
1650 int main(int argc, char *argv[]) {
1652 _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
1653 _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1654 _cleanup_free_ char *kdbus_domain = NULL;
1655 _cleanup_fdset_free_ FDSet *fds = NULL;
1656 const char *console = NULL;
1657 int r = EXIT_FAILURE, k;
1661 char veth_name[IFNAMSIZ];
1663 log_parse_environment();
1666 k = parse_argv(argc, argv);
1674 if (arg_directory) {
1677 p = path_make_absolute_cwd(arg_directory);
1678 free(arg_directory);
1681 arg_directory = get_current_dir_name();
1683 if (!arg_directory) {
1684 log_error("Failed to determine path, please use -D.");
1688 path_kill_slashes(arg_directory);
1691 arg_machine = strdup(basename(arg_directory));
1697 hostname_cleanup(arg_machine, false);
1698 if (isempty(arg_machine)) {
1699 log_error("Failed to determine machine name automatically, please use -M.");
1704 if (geteuid() != 0) {
1705 log_error("Need to be root.");
1709 if (sd_booted() <= 0) {
1710 log_error("Not running on a systemd system.");
1714 if (path_equal(arg_directory, "/")) {
1715 log_error("Spawning container on root directory not supported.");
1720 if (path_is_os_tree(arg_directory) <= 0) {
1721 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1727 p = strappenda(arg_directory,
1728 argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1729 if (access(p, F_OK) < 0) {
1730 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1737 n_fd_passed = sd_listen_fds(false);
1738 if (n_fd_passed > 0) {
1739 k = fdset_new_listen_fds(&fds, false);
1741 log_error("Failed to collect file descriptors: %s", strerror(-k));
1745 fdset_close_others(fds);
1748 master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1750 log_error("Failed to acquire pseudo tty: %m");
1754 console = ptsname(master);
1756 log_error("Failed to determine tty name: %m");
1761 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1763 if (unlockpt(master) < 0) {
1764 log_error("Failed to unlock tty: %m");
1768 if (access("/dev/kdbus/control", F_OK) >= 0) {
1770 if (arg_share_system) {
1771 kdbus_domain = strdup("/dev/kdbus");
1772 if (!kdbus_domain) {
1779 ns = strappenda("machine-", arg_machine);
1780 kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1782 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1784 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1788 if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1789 log_error("Failed to create kmsg socket pair: %m");
1793 sd_notify(0, "READY=1");
1795 assert_se(sigemptyset(&mask) == 0);
1796 sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1797 assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1802 sync_fd = eventfd(0, EFD_CLOEXEC);
1804 log_error("Failed to create event fd: %m");
1808 pid = syscall(__NR_clone,
1809 SIGCHLD|CLONE_NEWNS|
1810 (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1811 (arg_private_network ? CLONE_NEWNET : 0), NULL);
1813 if (errno == EINVAL)
1814 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1816 log_error("clone() failed: %m");
1823 const char *home = NULL;
1824 uid_t uid = (uid_t) -1;
1825 gid_t gid = (gid_t) -1;
1827 const char *envp[] = {
1828 "PATH=" DEFAULT_PATH_SPLIT_USR,
1829 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1834 NULL, /* container_uuid */
1835 NULL, /* LISTEN_FDS */
1836 NULL, /* LISTEN_PID */
1842 envp[n_env] = strv_find_prefix(environ, "TERM=");
1846 close_nointr_nofail(master);
1849 close_nointr(STDIN_FILENO);
1850 close_nointr(STDOUT_FILENO);
1851 close_nointr(STDERR_FILENO);
1853 close_nointr_nofail(kmsg_socket_pair[0]);
1854 kmsg_socket_pair[0] = -1;
1856 reset_all_signal_handlers();
1858 assert_se(sigemptyset(&mask) == 0);
1859 assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1861 k = open_terminal(console, O_RDWR);
1862 if (k != STDIN_FILENO) {
1864 close_nointr_nofail(k);
1868 log_error("Failed to open console: %s", strerror(-k));
1872 if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1873 dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1874 log_error("Failed to duplicate console: %m");
1879 log_error("setsid() failed: %m");
1883 if (reset_audit_loginuid() < 0)
1886 if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1887 log_error("PR_SET_PDEATHSIG failed: %m");
1891 /* Mark everything as slave, so that we still
1892 * receive mounts from the real root, but don't
1893 * propagate mounts to the real root. */
1894 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1895 log_error("MS_SLAVE|MS_REC failed: %m");
1899 /* Turn directory into bind mount */
1900 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1901 log_error("Failed to make bind mount.");
1906 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1907 log_error("Failed to make read-only.");
1911 if (mount_all(arg_directory) < 0)
1914 if (copy_devnodes(arg_directory) < 0)
1917 if (setup_ptmx(arg_directory) < 0)
1920 dev_setup(arg_directory);
1922 if (audit_still_doesnt_work_in_containers() < 0)
1925 if (setup_dev_console(arg_directory, console) < 0)
1928 if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1931 close_nointr_nofail(kmsg_socket_pair[1]);
1932 kmsg_socket_pair[1] = -1;
1934 if (setup_boot_id(arg_directory) < 0)
1937 if (setup_timezone(arg_directory) < 0)
1940 if (setup_resolv_conf(arg_directory) < 0)
1943 if (setup_journal(arg_directory) < 0)
1946 if (mount_binds(arg_directory, arg_bind, 0) < 0)
1949 if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1952 if (setup_kdbus(arg_directory, kdbus_domain) < 0)
1955 if (chdir(arg_directory) < 0) {
1956 log_error("chdir(%s) failed: %m", arg_directory);
1960 if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1961 log_error("mount(MS_MOVE) failed: %m");
1965 if (chroot(".") < 0) {
1966 log_error("chroot() failed: %m");
1970 if (chdir("/") < 0) {
1971 log_error("chdir() failed: %m");
1977 if (arg_private_network)
1980 if (drop_capabilities() < 0) {
1981 log_error("drop_capabilities() failed: %m");
1987 /* Note that this resolves user names
1988 * inside the container, and hence
1989 * accesses the NSS modules from the
1990 * container and not the host. This is
1993 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1994 log_error("get_user_creds() failed: %m");
1998 if (mkdir_parents_label(home, 0775) < 0) {
1999 log_error("mkdir_parents_label() failed: %m");
2003 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
2004 log_error("mkdir_safe_label() failed: %m");
2008 if (initgroups((const char*)arg_user, gid) < 0) {
2009 log_error("initgroups() failed: %m");
2013 if (setresgid(gid, gid, gid) < 0) {
2014 log_error("setregid() failed: %m");
2018 if (setresuid(uid, uid, uid) < 0) {
2019 log_error("setreuid() failed: %m");
2023 /* Reset everything fully to 0, just in case */
2025 if (setgroups(0, NULL) < 0) {
2026 log_error("setgroups() failed: %m");
2030 if (setresgid(0, 0, 0) < 0) {
2031 log_error("setregid() failed: %m");
2035 if (setresuid(0, 0, 0) < 0) {
2036 log_error("setreuid() failed: %m");
2041 if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
2042 (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
2043 (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
2048 if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
2049 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
2055 if (fdset_size(fds) > 0) {
2056 k = fdset_cloexec(fds, false);
2058 log_error("Failed to unset O_CLOEXEC for file descriptors.");
2062 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
2063 (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
2071 if (arg_personality != 0xffffffffLU) {
2072 if (personality(arg_personality) < 0) {
2073 log_error("personality() failed: %m");
2078 eventfd_read(sync_fd, &x);
2079 close_nointr_nofail(sync_fd);
2082 if (!strv_isempty(arg_setenv)) {
2085 n = strv_env_merge(2, envp, arg_setenv);
2093 env_use = (char**) envp;
2096 if (arg_selinux_context)
2097 if (setexeccon(arg_selinux_context) < 0)
2098 log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2104 /* Automatically search for the init system */
2106 l = 1 + argc - optind;
2107 a = newa(char*, l + 1);
2108 memcpy(a + 1, argv + optind, l * sizeof(char*));
2110 a[0] = (char*) "/usr/lib/systemd/systemd";
2111 execve(a[0], a, env_use);
2113 a[0] = (char*) "/lib/systemd/systemd";
2114 execve(a[0], a, env_use);
2116 a[0] = (char*) "/sbin/init";
2117 execve(a[0], a, env_use);
2118 } else if (argc > optind)
2119 execvpe(argv[optind], argv + optind, env_use);
2121 chdir(home ? home : "/root");
2122 execle("/bin/bash", "-bash", NULL, env_use);
2123 execle("/bin/sh", "-sh", NULL, env_use);
2126 log_error("execv() failed: %m");
2129 _exit(EXIT_FAILURE);
2135 r = register_machine(pid);
2139 r = move_network_interfaces(pid);
2143 r = setup_veth(pid, veth_name);
2147 r = setup_bridge(veth_name);
2151 eventfd_write(sync_fd, 1);
2152 close_nointr_nofail(sync_fd);
2155 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2164 /* Kill if it is not dead yet anyway */
2165 terminate_machine(pid);
2167 /* Redundant, but better safe than sorry */
2170 k = wait_for_terminate(pid, &status);
2178 if (status.si_code == CLD_EXITED) {
2179 r = status.si_status;
2180 if (status.si_status != 0) {
2181 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2186 log_debug("Container %s exited successfully.", arg_machine);
2188 } else if (status.si_code == CLD_KILLED &&
2189 status.si_status == SIGINT) {
2192 log_info("Container %s has been shut down.", arg_machine);
2195 } else if (status.si_code == CLD_KILLED &&
2196 status.si_status == SIGHUP) {
2199 log_info("Container %s is being rebooted.", arg_machine);
2201 } else if (status.si_code == CLD_KILLED ||
2202 status.si_code == CLD_DUMPED) {
2204 log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2208 log_error("Container %s failed due to unknown reason.", arg_machine);
2218 free(arg_directory);
2221 free(arg_network_interfaces);