chiark / gitweb /
nspawn: allow 32-bit chroots from 64-bit hosts
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46
47 #ifdef HAVE_SELINUX
48 #include <selinux/selinux.h>
49 #endif
50
51 #ifdef HAVE_SECCOMP
52 #include <seccomp.h>
53 #endif
54
55 #include "sd-daemon.h"
56 #include "sd-bus.h"
57 #include "sd-id128.h"
58 #include "sd-rtnl.h"
59 #include "log.h"
60 #include "util.h"
61 #include "mkdir.h"
62 #include "macro.h"
63 #include "audit.h"
64 #include "missing.h"
65 #include "cgroup-util.h"
66 #include "strv.h"
67 #include "path-util.h"
68 #include "loopback-setup.h"
69 #include "dev-setup.h"
70 #include "fdset.h"
71 #include "build.h"
72 #include "fileio.h"
73 #include "bus-util.h"
74 #include "bus-error.h"
75 #include "ptyfwd.h"
76 #include "bus-kernel.h"
77 #include "env-util.h"
78 #include "def.h"
79 #include "rtnl-util.h"
80 #include "udev-util.h"
81
82 typedef enum LinkJournal {
83         LINK_NO,
84         LINK_AUTO,
85         LINK_HOST,
86         LINK_GUEST
87 } LinkJournal;
88
89 static char *arg_directory = NULL;
90 static char *arg_user = NULL;
91 static sd_id128_t arg_uuid = {};
92 static char *arg_machine = NULL;
93 static char *arg_selinux_context = NULL;
94 static char *arg_selinux_apifs_context = NULL;
95 static const char *arg_slice = NULL;
96 static bool arg_private_network = false;
97 static bool arg_read_only = false;
98 static bool arg_boot = false;
99 static LinkJournal arg_link_journal = LINK_AUTO;
100 static uint64_t arg_retain =
101         (1ULL << CAP_CHOWN) |
102         (1ULL << CAP_DAC_OVERRIDE) |
103         (1ULL << CAP_DAC_READ_SEARCH) |
104         (1ULL << CAP_FOWNER) |
105         (1ULL << CAP_FSETID) |
106         (1ULL << CAP_IPC_OWNER) |
107         (1ULL << CAP_KILL) |
108         (1ULL << CAP_LEASE) |
109         (1ULL << CAP_LINUX_IMMUTABLE) |
110         (1ULL << CAP_NET_BIND_SERVICE) |
111         (1ULL << CAP_NET_BROADCAST) |
112         (1ULL << CAP_NET_RAW) |
113         (1ULL << CAP_SETGID) |
114         (1ULL << CAP_SETFCAP) |
115         (1ULL << CAP_SETPCAP) |
116         (1ULL << CAP_SETUID) |
117         (1ULL << CAP_SYS_ADMIN) |
118         (1ULL << CAP_SYS_CHROOT) |
119         (1ULL << CAP_SYS_NICE) |
120         (1ULL << CAP_SYS_PTRACE) |
121         (1ULL << CAP_SYS_TTY_CONFIG) |
122         (1ULL << CAP_SYS_RESOURCE) |
123         (1ULL << CAP_SYS_BOOT) |
124         (1ULL << CAP_AUDIT_WRITE) |
125         (1ULL << CAP_AUDIT_CONTROL) |
126         (1ULL << CAP_MKNOD);
127 static char **arg_bind = NULL;
128 static char **arg_bind_ro = NULL;
129 static char **arg_setenv = NULL;
130 static bool arg_quiet = false;
131 static bool arg_share_system = false;
132 static bool arg_register = true;
133 static bool arg_keep_unit = false;
134 static char **arg_network_interfaces = NULL;
135 static bool arg_network_veth = false;
136 static char *arg_network_bridge = NULL;
137
138 static int help(void) {
139
140         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
141                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
142                "  -h --help                 Show this help\n"
143                "     --version              Print version string\n"
144                "  -q --quiet                Do not show status information\n"
145                "  -D --directory=NAME       Root directory for the container\n"
146                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
147                "  -u --user=USER            Run the command under specified user or uid\n"
148                "  -M --machine=NAME         Set the machine name for the container\n"
149                "     --uuid=UUID            Set a specific machine UUID for the container\n"
150                "  -S --slice=SLICE          Place the container in the specified slice\n"
151                "     --private-network      Disable network in container\n"
152                "     --network-interface=INTERFACE\n"
153                "                            Assign an existing network interface to the\n"
154                "                            container\n"
155                "     --network-veth         Add a virtual ethernet connection between host\n"
156                "                            and container\n"
157                "     --network-bridge=INTERFACE\n"
158                "                            Add a virtual ethernet connection between host\n"
159                "                            and container and add it to an existing bridge on\n"
160                "                            the host\n"
161                "  -Z --selinux-context=SECLABEL\n"
162                "                            Set the SELinux security context to be used by\n"
163                "                            processes in the container\n"
164                "  -L --selinux-apifs-context=SECLABEL\n"
165                "                            Set the SELinux security context to be used by\n"
166                "                            API/tmpfs file systems in the container\n"
167                "     --capability=CAP       In addition to the default, retain specified\n"
168                "                            capability\n"
169                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
170                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
171                "  -j                        Equivalent to --link-journal=host\n"
172                "     --read-only            Mount the root directory read-only\n"
173                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
174                "                            the container\n"
175                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
176                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
177                "     --share-system         Share system namespaces with host\n"
178                "     --register=BOOLEAN     Register container as machine\n"
179                "     --keep-unit            Do not register a scope for the machine, reuse\n"
180                "                            the service unit nspawn is running in\n",
181                program_invocation_short_name);
182
183         return 0;
184 }
185
186 static int parse_argv(int argc, char *argv[]) {
187
188         enum {
189                 ARG_VERSION = 0x100,
190                 ARG_PRIVATE_NETWORK,
191                 ARG_UUID,
192                 ARG_READ_ONLY,
193                 ARG_CAPABILITY,
194                 ARG_DROP_CAPABILITY,
195                 ARG_LINK_JOURNAL,
196                 ARG_BIND,
197                 ARG_BIND_RO,
198                 ARG_SETENV,
199                 ARG_SHARE_SYSTEM,
200                 ARG_REGISTER,
201                 ARG_KEEP_UNIT,
202                 ARG_NETWORK_INTERFACE,
203                 ARG_NETWORK_VETH,
204                 ARG_NETWORK_BRIDGE,
205         };
206
207         static const struct option options[] = {
208                 { "help",                  no_argument,       NULL, 'h'                   },
209                 { "version",               no_argument,       NULL, ARG_VERSION           },
210                 { "directory",             required_argument, NULL, 'D'                   },
211                 { "user",                  required_argument, NULL, 'u'                   },
212                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
213                 { "boot",                  no_argument,       NULL, 'b'                   },
214                 { "uuid",                  required_argument, NULL, ARG_UUID              },
215                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
216                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
217                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
218                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
219                 { "bind",                  required_argument, NULL, ARG_BIND              },
220                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
221                 { "machine",               required_argument, NULL, 'M'                   },
222                 { "slice",                 required_argument, NULL, 'S'                   },
223                 { "setenv",                required_argument, NULL, ARG_SETENV            },
224                 { "selinux-context",       required_argument, NULL, 'Z'                   },
225                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
226                 { "quiet",                 no_argument,       NULL, 'q'                   },
227                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
228                 { "register",              required_argument, NULL, ARG_REGISTER          },
229                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
230                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
231                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
232                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
233                 {}
234         };
235
236         int c, r;
237         uint64_t plus = 0, minus = 0;
238
239         assert(argc >= 0);
240         assert(argv);
241
242         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
243
244                 switch (c) {
245
246                 case 'h':
247                         return help();
248
249                 case ARG_VERSION:
250                         puts(PACKAGE_STRING);
251                         puts(SYSTEMD_FEATURES);
252                         return 0;
253
254                 case 'D':
255                         free(arg_directory);
256                         arg_directory = canonicalize_file_name(optarg);
257                         if (!arg_directory) {
258                                 log_error("Invalid root directory: %m");
259                                 return -ENOMEM;
260                         }
261
262                         break;
263
264                 case 'u':
265                         free(arg_user);
266                         arg_user = strdup(optarg);
267                         if (!arg_user)
268                                 return log_oom();
269
270                         break;
271
272                 case ARG_NETWORK_BRIDGE:
273                         arg_network_bridge = strdup(optarg);
274                         if (!arg_network_bridge)
275                                 return log_oom();
276
277                         /* fall through */
278
279                 case ARG_NETWORK_VETH:
280                         arg_network_veth = true;
281                         arg_private_network = true;
282                         break;
283
284                 case ARG_NETWORK_INTERFACE:
285                         if (strv_push(&arg_network_interfaces, optarg) < 0)
286                                 return log_oom();
287
288                         /* fall through */
289
290                 case ARG_PRIVATE_NETWORK:
291                         arg_private_network = true;
292                         break;
293
294                 case 'b':
295                         arg_boot = true;
296                         break;
297
298                 case ARG_UUID:
299                         r = sd_id128_from_string(optarg, &arg_uuid);
300                         if (r < 0) {
301                                 log_error("Invalid UUID: %s", optarg);
302                                 return r;
303                         }
304                         break;
305
306                 case 'S':
307                         arg_slice = strdup(optarg);
308                         if (!arg_slice)
309                                 return log_oom();
310
311                         break;
312
313                 case 'M':
314                         if (isempty(optarg)) {
315                                 free(arg_machine);
316                                 arg_machine = NULL;
317                         } else {
318
319                                 if (!hostname_is_valid(optarg)) {
320                                         log_error("Invalid machine name: %s", optarg);
321                                         return -EINVAL;
322                                 }
323
324                                 free(arg_machine);
325                                 arg_machine = strdup(optarg);
326                                 if (!arg_machine)
327                                         return log_oom();
328
329                                 break;
330                         }
331
332                 case 'Z':
333                         arg_selinux_context = optarg;
334                         break;
335
336                 case 'L':
337                         arg_selinux_apifs_context = optarg;
338                         break;
339
340                 case ARG_READ_ONLY:
341                         arg_read_only = true;
342                         break;
343
344                 case ARG_CAPABILITY:
345                 case ARG_DROP_CAPABILITY: {
346                         char *state, *word;
347                         size_t length;
348
349                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
350                                 _cleanup_free_ char *t;
351                                 cap_value_t cap;
352
353                                 t = strndup(word, length);
354                                 if (!t)
355                                         return log_oom();
356
357                                 if (streq(t, "all")) {
358                                         if (c == ARG_CAPABILITY)
359                                                 plus = (uint64_t) -1;
360                                         else
361                                                 minus = (uint64_t) -1;
362                                 } else {
363                                         if (cap_from_name(t, &cap) < 0) {
364                                                 log_error("Failed to parse capability %s.", t);
365                                                 return -EINVAL;
366                                         }
367
368                                         if (c == ARG_CAPABILITY)
369                                                 plus |= 1ULL << (uint64_t) cap;
370                                         else
371                                                 minus |= 1ULL << (uint64_t) cap;
372                                 }
373                         }
374
375                         break;
376                 }
377
378                 case 'j':
379                         arg_link_journal = LINK_GUEST;
380                         break;
381
382                 case ARG_LINK_JOURNAL:
383                         if (streq(optarg, "auto"))
384                                 arg_link_journal = LINK_AUTO;
385                         else if (streq(optarg, "no"))
386                                 arg_link_journal = LINK_NO;
387                         else if (streq(optarg, "guest"))
388                                 arg_link_journal = LINK_GUEST;
389                         else if (streq(optarg, "host"))
390                                 arg_link_journal = LINK_HOST;
391                         else {
392                                 log_error("Failed to parse link journal mode %s", optarg);
393                                 return -EINVAL;
394                         }
395
396                         break;
397
398                 case ARG_BIND:
399                 case ARG_BIND_RO: {
400                         _cleanup_free_ char *a = NULL, *b = NULL;
401                         char *e;
402                         char ***x;
403
404                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
405
406                         e = strchr(optarg, ':');
407                         if (e) {
408                                 a = strndup(optarg, e - optarg);
409                                 b = strdup(e + 1);
410                         } else {
411                                 a = strdup(optarg);
412                                 b = strdup(optarg);
413                         }
414
415                         if (!a || !b)
416                                 return log_oom();
417
418                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
419                                 log_error("Invalid bind mount specification: %s", optarg);
420                                 return -EINVAL;
421                         }
422
423                         r = strv_extend(x, a);
424                         if (r < 0)
425                                 return log_oom();
426
427                         r = strv_extend(x, b);
428                         if (r < 0)
429                                 return log_oom();
430
431                         break;
432                 }
433
434                 case ARG_SETENV: {
435                         char **n;
436
437                         if (!env_assignment_is_valid(optarg)) {
438                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
439                                 return -EINVAL;
440                         }
441
442                         n = strv_env_set(arg_setenv, optarg);
443                         if (!n)
444                                 return log_oom();
445
446                         strv_free(arg_setenv);
447                         arg_setenv = n;
448                         break;
449                 }
450
451                 case 'q':
452                         arg_quiet = true;
453                         break;
454
455                 case ARG_SHARE_SYSTEM:
456                         arg_share_system = true;
457                         break;
458
459                 case ARG_REGISTER:
460                         r = parse_boolean(optarg);
461                         if (r < 0) {
462                                 log_error("Failed to parse --register= argument: %s", optarg);
463                                 return r;
464                         }
465
466                         arg_register = r;
467                         break;
468
469                 case ARG_KEEP_UNIT:
470                         arg_keep_unit = true;
471                         break;
472
473                 case '?':
474                         return -EINVAL;
475
476                 default:
477                         assert_not_reached("Unhandled option");
478                 }
479         }
480
481         if (arg_share_system)
482                 arg_register = false;
483
484         if (arg_boot && arg_share_system) {
485                 log_error("--boot and --share-system may not be combined.");
486                 return -EINVAL;
487         }
488
489         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
490                 log_error("--keep-unit may not be used when invoked from a user session.");
491                 return -EINVAL;
492         }
493
494         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
495
496         return 1;
497 }
498
499 static int mount_all(const char *dest) {
500
501         typedef struct MountPoint {
502                 const char *what;
503                 const char *where;
504                 const char *type;
505                 const char *options;
506                 unsigned long flags;
507                 bool fatal;
508         } MountPoint;
509
510         static const MountPoint mount_table[] = {
511                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
512                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
513                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
514                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
515                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
516                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
517                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
518                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
519 #ifdef HAVE_SELINUX
520                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
521                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
522 #endif
523         };
524
525         unsigned k;
526         int r = 0;
527
528         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
529                 _cleanup_free_ char *where = NULL;
530 #ifdef HAVE_SELINUX
531                 _cleanup_free_ char *options = NULL;
532 #endif
533                 const char *o;
534                 int t;
535
536                 where = strjoin(dest, "/", mount_table[k].where, NULL);
537                 if (!where)
538                         return log_oom();
539
540                 t = path_is_mount_point(where, true);
541                 if (t < 0) {
542                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
543
544                         if (r == 0)
545                                 r = t;
546
547                         continue;
548                 }
549
550                 /* Skip this entry if it is not a remount. */
551                 if (mount_table[k].what && t > 0)
552                         continue;
553
554                 mkdir_p(where, 0755);
555
556 #ifdef HAVE_SELINUX
557                 if (arg_selinux_apifs_context &&
558                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
559                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
560                         if (!options)
561                                 return log_oom();
562
563                         o = options;
564                 } else
565 #endif
566                         o = mount_table[k].options;
567
568
569                 if (mount(mount_table[k].what,
570                           where,
571                           mount_table[k].type,
572                           mount_table[k].flags,
573                           o) < 0 &&
574                     mount_table[k].fatal) {
575
576                         log_error("mount(%s) failed: %m", where);
577
578                         if (r == 0)
579                                 r = -errno;
580                 }
581         }
582
583         return r;
584 }
585
586 static int mount_binds(const char *dest, char **l, unsigned long flags) {
587         char **x, **y;
588
589         STRV_FOREACH_PAIR(x, y, l) {
590                 char *where;
591                 struct stat source_st, dest_st;
592                 int r;
593
594                 if (stat(*x, &source_st) < 0) {
595                         log_error("failed to stat %s: %m", *x);
596                         return -errno;
597                 }
598
599                 where = strappenda(dest, *y);
600                 r = stat(where, &dest_st);
601                 if (r == 0) {
602                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
603                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
604                                                 *x, where);
605                                 return -EINVAL;
606                         }
607                 } else if (errno == ENOENT) {
608                         r = mkdir_parents_label(where, 0755);
609                         if (r < 0) {
610                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
611                                 return r;
612                         }
613                 } else {
614                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
615                         return -errno;
616                 }
617                 /* Create the mount point, but be conservative -- refuse to create block
618                 * and char devices. */
619                 if (S_ISDIR(source_st.st_mode))
620                         mkdir_label(where, 0755);
621                 else if (S_ISFIFO(source_st.st_mode))
622                         mkfifo(where, 0644);
623                 else if (S_ISSOCK(source_st.st_mode))
624                         mknod(where, 0644 | S_IFSOCK, 0);
625                 else if (S_ISREG(source_st.st_mode))
626                         touch(where);
627                 else {
628                         log_error("Refusing to create mountpoint for file: %s", *x);
629                         return -ENOTSUP;
630                 }
631
632                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
633                         log_error("mount(%s) failed: %m", where);
634                         return -errno;
635                 }
636
637                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
638                         log_error("mount(%s) failed: %m", where);
639                         return -errno;
640                 }
641         }
642
643         return 0;
644 }
645
646 static int setup_timezone(const char *dest) {
647         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
648         char *z, *y;
649         int r;
650
651         assert(dest);
652
653         /* Fix the timezone, if possible */
654         r = readlink_malloc("/etc/localtime", &p);
655         if (r < 0) {
656                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
657                 return 0;
658         }
659
660         z = path_startswith(p, "../usr/share/zoneinfo/");
661         if (!z)
662                 z = path_startswith(p, "/usr/share/zoneinfo/");
663         if (!z) {
664                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
665                 return 0;
666         }
667
668         where = strappend(dest, "/etc/localtime");
669         if (!where)
670                 return log_oom();
671
672         r = readlink_malloc(where, &q);
673         if (r >= 0) {
674                 y = path_startswith(q, "../usr/share/zoneinfo/");
675                 if (!y)
676                         y = path_startswith(q, "/usr/share/zoneinfo/");
677
678
679                 /* Already pointing to the right place? Then do nothing .. */
680                 if (y && streq(y, z))
681                         return 0;
682         }
683
684         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
685         if (!check)
686                 return log_oom();
687
688         if (access(check, F_OK) < 0) {
689                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
690                 return 0;
691         }
692
693         what = strappend("../usr/share/zoneinfo/", z);
694         if (!what)
695                 return log_oom();
696
697         unlink(where);
698         if (symlink(what, where) < 0) {
699                 log_error("Failed to correct timezone of container: %m");
700                 return 0;
701         }
702
703         return 0;
704 }
705
706 static int setup_resolv_conf(const char *dest) {
707         char _cleanup_free_ *where = NULL;
708
709         assert(dest);
710
711         if (arg_private_network)
712                 return 0;
713
714         /* Fix resolv.conf, if possible */
715         where = strappend(dest, "/etc/resolv.conf");
716         if (!where)
717                 return log_oom();
718
719         /* We don't really care for the results of this really. If it
720          * fails, it fails, but meh... */
721         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
722
723         return 0;
724 }
725
726 static int setup_boot_id(const char *dest) {
727         _cleanup_free_ char *from = NULL, *to = NULL;
728         sd_id128_t rnd;
729         char as_uuid[37];
730         int r;
731
732         assert(dest);
733
734         if (arg_share_system)
735                 return 0;
736
737         /* Generate a new randomized boot ID, so that each boot-up of
738          * the container gets a new one */
739
740         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
741         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
742         if (!from || !to)
743                 return log_oom();
744
745         r = sd_id128_randomize(&rnd);
746         if (r < 0) {
747                 log_error("Failed to generate random boot id: %s", strerror(-r));
748                 return r;
749         }
750
751         snprintf(as_uuid, sizeof(as_uuid),
752                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
753                  SD_ID128_FORMAT_VAL(rnd));
754         char_array_0(as_uuid);
755
756         r = write_string_file(from, as_uuid);
757         if (r < 0) {
758                 log_error("Failed to write boot id: %s", strerror(-r));
759                 return r;
760         }
761
762         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
763                 log_error("Failed to bind mount boot id: %m");
764                 r = -errno;
765         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
766                 log_warning("Failed to make boot id read-only: %m");
767
768         unlink(from);
769         return r;
770 }
771
772 static int copy_devnodes(const char *dest) {
773
774         static const char devnodes[] =
775                 "null\0"
776                 "zero\0"
777                 "full\0"
778                 "random\0"
779                 "urandom\0"
780                 "tty\0";
781
782         const char *d;
783         int r = 0;
784         _cleanup_umask_ mode_t u;
785
786         assert(dest);
787
788         u = umask(0000);
789
790         NULSTR_FOREACH(d, devnodes) {
791                 _cleanup_free_ char *from = NULL, *to = NULL;
792                 struct stat st;
793
794                 from = strappend("/dev/", d);
795                 to = strjoin(dest, "/dev/", d, NULL);
796                 if (!from || !to)
797                         return log_oom();
798
799                 if (stat(from, &st) < 0) {
800
801                         if (errno != ENOENT) {
802                                 log_error("Failed to stat %s: %m", from);
803                                 return -errno;
804                         }
805
806                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
807
808                         log_error("%s is not a char or block device, cannot copy", from);
809                         return -EIO;
810
811                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
812
813                         log_error("mknod(%s) failed: %m", dest);
814                         return  -errno;
815                 }
816         }
817
818         return r;
819 }
820
821 static int setup_ptmx(const char *dest) {
822         _cleanup_free_ char *p = NULL;
823
824         p = strappend(dest, "/dev/ptmx");
825         if (!p)
826                 return log_oom();
827
828         if (symlink("pts/ptmx", p) < 0) {
829                 log_error("Failed to create /dev/ptmx symlink: %m");
830                 return -errno;
831         }
832
833         return 0;
834 }
835
836 static int setup_dev_console(const char *dest, const char *console) {
837         struct stat st;
838         _cleanup_free_ char *to = NULL;
839         int r;
840         _cleanup_umask_ mode_t u;
841
842         assert(dest);
843         assert(console);
844
845         u = umask(0000);
846
847         if (stat(console, &st) < 0) {
848                 log_error("Failed to stat %s: %m", console);
849                 return -errno;
850
851         } else if (!S_ISCHR(st.st_mode)) {
852                 log_error("/dev/console is not a char device");
853                 return -EIO;
854         }
855
856         r = chmod_and_chown(console, 0600, 0, 0);
857         if (r < 0) {
858                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
859                 return r;
860         }
861
862         if (asprintf(&to, "%s/dev/console", dest) < 0)
863                 return log_oom();
864
865         /* We need to bind mount the right tty to /dev/console since
866          * ptys can only exist on pts file systems. To have something
867          * to bind mount things on we create a device node first, that
868          * has the right major/minor (note that the major minor
869          * doesn't actually matter here, since we mount it over
870          * anyway). */
871
872         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
873                 log_error("mknod() for /dev/console failed: %m");
874                 return -errno;
875         }
876
877         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
878                 log_error("Bind mount for /dev/console failed: %m");
879                 return -errno;
880         }
881
882         return 0;
883 }
884
885 static int setup_kmsg(const char *dest, int kmsg_socket) {
886         _cleanup_free_ char *from = NULL, *to = NULL;
887         int r, fd, k;
888         _cleanup_umask_ mode_t u;
889         union {
890                 struct cmsghdr cmsghdr;
891                 uint8_t buf[CMSG_SPACE(sizeof(int))];
892         } control = {};
893         struct msghdr mh = {
894                 .msg_control = &control,
895                 .msg_controllen = sizeof(control),
896         };
897         struct cmsghdr *cmsg;
898
899         assert(dest);
900         assert(kmsg_socket >= 0);
901
902         u = umask(0000);
903
904         /* We create the kmsg FIFO as /dev/kmsg, but immediately
905          * delete it after bind mounting it to /proc/kmsg. While FIFOs
906          * on the reading side behave very similar to /proc/kmsg,
907          * their writing side behaves differently from /dev/kmsg in
908          * that writing blocks when nothing is reading. In order to
909          * avoid any problems with containers deadlocking due to this
910          * we simply make /dev/kmsg unavailable to the container. */
911         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
912             asprintf(&to, "%s/proc/kmsg", dest) < 0)
913                 return log_oom();
914
915         if (mkfifo(from, 0600) < 0) {
916                 log_error("mkfifo() for /dev/kmsg failed: %m");
917                 return -errno;
918         }
919
920         r = chmod_and_chown(from, 0600, 0, 0);
921         if (r < 0) {
922                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
923                 return r;
924         }
925
926         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
927                 log_error("Bind mount for /proc/kmsg failed: %m");
928                 return -errno;
929         }
930
931         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
932         if (fd < 0) {
933                 log_error("Failed to open fifo: %m");
934                 return -errno;
935         }
936
937         cmsg = CMSG_FIRSTHDR(&mh);
938         cmsg->cmsg_level = SOL_SOCKET;
939         cmsg->cmsg_type = SCM_RIGHTS;
940         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
941         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
942
943         mh.msg_controllen = cmsg->cmsg_len;
944
945         /* Store away the fd in the socket, so that it stays open as
946          * long as we run the child */
947         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
948         close_nointr_nofail(fd);
949
950         if (k < 0) {
951                 log_error("Failed to send FIFO fd: %m");
952                 return -errno;
953         }
954
955         /* And now make the FIFO unavailable as /dev/kmsg... */
956         unlink(from);
957         return 0;
958 }
959
960 static int setup_hostname(void) {
961
962         if (arg_share_system)
963                 return 0;
964
965         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
966                 return -errno;
967
968         return 0;
969 }
970
971 static int setup_journal(const char *directory) {
972         sd_id128_t machine_id, this_id;
973         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
974         char *id;
975         int r;
976
977         p = strappend(directory, "/etc/machine-id");
978         if (!p)
979                 return log_oom();
980
981         r = read_one_line_file(p, &b);
982         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
983                 return 0;
984         else if (r < 0) {
985                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
986                 return r;
987         }
988
989         id = strstrip(b);
990         if (isempty(id) && arg_link_journal == LINK_AUTO)
991                 return 0;
992
993         /* Verify validity */
994         r = sd_id128_from_string(id, &machine_id);
995         if (r < 0) {
996                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
997                 return r;
998         }
999
1000         r = sd_id128_get_machine(&this_id);
1001         if (r < 0) {
1002                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1003                 return r;
1004         }
1005
1006         if (sd_id128_equal(machine_id, this_id)) {
1007                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1008                          "Host and machine ids are equal (%s): refusing to link journals", id);
1009                 if (arg_link_journal == LINK_AUTO)
1010                         return 0;
1011                 return
1012                         -EEXIST;
1013         }
1014
1015         if (arg_link_journal == LINK_NO)
1016                 return 0;
1017
1018         free(p);
1019         p = strappend("/var/log/journal/", id);
1020         q = strjoin(directory, "/var/log/journal/", id, NULL);
1021         if (!p || !q)
1022                 return log_oom();
1023
1024         if (path_is_mount_point(p, false) > 0) {
1025                 if (arg_link_journal != LINK_AUTO) {
1026                         log_error("%s: already a mount point, refusing to use for journal", p);
1027                         return -EEXIST;
1028                 }
1029
1030                 return 0;
1031         }
1032
1033         if (path_is_mount_point(q, false) > 0) {
1034                 if (arg_link_journal != LINK_AUTO) {
1035                         log_error("%s: already a mount point, refusing to use for journal", q);
1036                         return -EEXIST;
1037                 }
1038
1039                 return 0;
1040         }
1041
1042         r = readlink_and_make_absolute(p, &d);
1043         if (r >= 0) {
1044                 if ((arg_link_journal == LINK_GUEST ||
1045                      arg_link_journal == LINK_AUTO) &&
1046                     path_equal(d, q)) {
1047
1048                         r = mkdir_p(q, 0755);
1049                         if (r < 0)
1050                                 log_warning("failed to create directory %s: %m", q);
1051                         return 0;
1052                 }
1053
1054                 if (unlink(p) < 0) {
1055                         log_error("Failed to remove symlink %s: %m", p);
1056                         return -errno;
1057                 }
1058         } else if (r == -EINVAL) {
1059
1060                 if (arg_link_journal == LINK_GUEST &&
1061                     rmdir(p) < 0) {
1062
1063                         if (errno == ENOTDIR) {
1064                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1065                                 return r;
1066                         } else {
1067                                 log_error("Failed to remove %s: %m", p);
1068                                 return -errno;
1069                         }
1070                 }
1071         } else if (r != -ENOENT) {
1072                 log_error("readlink(%s) failed: %m", p);
1073                 return r;
1074         }
1075
1076         if (arg_link_journal == LINK_GUEST) {
1077
1078                 if (symlink(q, p) < 0) {
1079                         log_error("Failed to symlink %s to %s: %m", q, p);
1080                         return -errno;
1081                 }
1082
1083                 r = mkdir_p(q, 0755);
1084                 if (r < 0)
1085                         log_warning("failed to create directory %s: %m", q);
1086                 return 0;
1087         }
1088
1089         if (arg_link_journal == LINK_HOST) {
1090                 r = mkdir_p(p, 0755);
1091                 if (r < 0) {
1092                         log_error("Failed to create %s: %m", p);
1093                         return r;
1094                 }
1095
1096         } else if (access(p, F_OK) < 0)
1097                 return 0;
1098
1099         if (dir_is_empty(q) == 0) {
1100                 log_error("%s not empty.", q);
1101                 return -ENOTEMPTY;
1102         }
1103
1104         r = mkdir_p(q, 0755);
1105         if (r < 0) {
1106                 log_error("Failed to create %s: %m", q);
1107                 return r;
1108         }
1109
1110         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1111                 log_error("Failed to bind mount journal from host into guest: %m");
1112                 return -errno;
1113         }
1114
1115         return 0;
1116 }
1117
1118 static int setup_kdbus(const char *dest, const char *path) {
1119         const char *p;
1120
1121         if (!path)
1122                 return 0;
1123
1124         p = strappenda(dest, "/dev/kdbus");
1125         if (mkdir(p, 0755) < 0) {
1126                 log_error("Failed to create kdbus path: %m");
1127                 return  -errno;
1128         }
1129
1130         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1131                 log_error("Failed to mount kdbus domain path: %m");
1132                 return -errno;
1133         }
1134
1135         return 0;
1136 }
1137
1138 static int drop_capabilities(void) {
1139         return capability_bounding_set_drop(~arg_retain, false);
1140 }
1141
1142 static int register_machine(pid_t pid) {
1143         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1144         _cleanup_bus_unref_ sd_bus *bus = NULL;
1145         int r;
1146
1147         if (!arg_register)
1148                 return 0;
1149
1150         r = sd_bus_default_system(&bus);
1151         if (r < 0) {
1152                 log_error("Failed to open system bus: %s", strerror(-r));
1153                 return r;
1154         }
1155
1156         if (arg_keep_unit) {
1157                 r = sd_bus_call_method(
1158                                 bus,
1159                                 "org.freedesktop.machine1",
1160                                 "/org/freedesktop/machine1",
1161                                 "org.freedesktop.machine1.Manager",
1162                                 "RegisterMachine",
1163                                 &error,
1164                                 NULL,
1165                                 "sayssus",
1166                                 arg_machine,
1167                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1168                                 "nspawn",
1169                                 "container",
1170                                 (uint32_t) pid,
1171                                 strempty(arg_directory));
1172         } else {
1173                 r = sd_bus_call_method(
1174                                 bus,
1175                                 "org.freedesktop.machine1",
1176                                 "/org/freedesktop/machine1",
1177                                 "org.freedesktop.machine1.Manager",
1178                                 "CreateMachine",
1179                                 &error,
1180                                 NULL,
1181                                 "sayssusa(sv)",
1182                                 arg_machine,
1183                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1184                                 "nspawn",
1185                                 "container",
1186                                 (uint32_t) pid,
1187                                 strempty(arg_directory),
1188                                 !isempty(arg_slice), "Slice", "s", arg_slice);
1189         }
1190
1191         if (r < 0) {
1192                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1193                 return r;
1194         }
1195
1196         return 0;
1197 }
1198
1199 static int terminate_machine(pid_t pid) {
1200         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1201         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1202         _cleanup_bus_unref_ sd_bus *bus = NULL;
1203         const char *path;
1204         int r;
1205
1206         if (!arg_register)
1207                 return 0;
1208
1209         r = sd_bus_default_system(&bus);
1210         if (r < 0) {
1211                 log_error("Failed to open system bus: %s", strerror(-r));
1212                 return r;
1213         }
1214
1215         r = sd_bus_call_method(
1216                         bus,
1217                         "org.freedesktop.machine1",
1218                         "/org/freedesktop/machine1",
1219                         "org.freedesktop.machine1.Manager",
1220                         "GetMachineByPID",
1221                         &error,
1222                         &reply,
1223                         "u",
1224                         (uint32_t) pid);
1225         if (r < 0) {
1226                 /* Note that the machine might already have been
1227                  * cleaned up automatically, hence don't consider it a
1228                  * failure if we cannot get the machine object. */
1229                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1230                 return 0;
1231         }
1232
1233         r = sd_bus_message_read(reply, "o", &path);
1234         if (r < 0)
1235                 return bus_log_parse_error(r);
1236
1237         r = sd_bus_call_method(
1238                         bus,
1239                         "org.freedesktop.machine1",
1240                         path,
1241                         "org.freedesktop.machine1.Machine",
1242                         "Terminate",
1243                         &error,
1244                         NULL,
1245                         NULL);
1246         if (r < 0) {
1247                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1248                 return 0;
1249         }
1250
1251         return 0;
1252 }
1253
1254 static int reset_audit_loginuid(void) {
1255         _cleanup_free_ char *p = NULL;
1256         int r;
1257
1258         if (arg_share_system)
1259                 return 0;
1260
1261         r = read_one_line_file("/proc/self/loginuid", &p);
1262         if (r == -EEXIST)
1263                 return 0;
1264         if (r < 0) {
1265                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1266                 return r;
1267         }
1268
1269         /* Already reset? */
1270         if (streq(p, "4294967295"))
1271                 return 0;
1272
1273         r = write_string_file("/proc/self/loginuid", "4294967295");
1274         if (r < 0) {
1275                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1276                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1277                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1278                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1279                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1280
1281                 sleep(5);
1282         }
1283
1284         return 0;
1285 }
1286
1287 static int setup_veth(pid_t pid, char iface_name[]) {
1288         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1289         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1290         int r;
1291
1292         if (!arg_private_network)
1293                 return 0;
1294
1295         if (!arg_network_veth)
1296                 return 0;
1297
1298         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1299
1300         r = sd_rtnl_open(0, &rtnl);
1301         if (r < 0) {
1302                 log_error("Failed to connect to netlink: %s", strerror(-r));
1303                 return r;
1304         }
1305
1306         r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, 0, &m);
1307         if (r < 0) {
1308                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1309                 return r;
1310         }
1311
1312         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1313         if (r < 0) {
1314                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1315                 return r;
1316         }
1317
1318         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1319         if (r < 0) {
1320                 log_error("Failed to open netlink container: %s", strerror(-r));
1321                 return r;
1322         }
1323
1324         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1325         if (r < 0) {
1326                 log_error("Failed to append netlink kind: %s", strerror(-r));
1327                 return r;
1328         }
1329
1330         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1331         if (r < 0) {
1332                 log_error("Failed to open netlink container: %s", strerror(-r));
1333                 return r;
1334         }
1335
1336         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1337         if (r < 0) {
1338                 log_error("Failed to open netlink container: %s", strerror(-r));
1339                 return r;
1340         }
1341
1342         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1343         if (r < 0) {
1344                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1345                 return r;
1346         }
1347
1348         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1349         if (r < 0) {
1350                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1351                 return r;
1352         }
1353
1354         r = sd_rtnl_message_close_container(m);
1355         if (r < 0) {
1356                 log_error("Failed to close netlink container: %s", strerror(-r));
1357                 return r;
1358         }
1359
1360         r = sd_rtnl_message_close_container(m);
1361         if (r < 0) {
1362                 log_error("Failed to close netlink container: %s", strerror(-r));
1363                 return r;
1364         }
1365
1366         r = sd_rtnl_message_close_container(m);
1367         if (r < 0) {
1368                 log_error("Failed to close netlink container: %s", strerror(-r));
1369                 return r;
1370         }
1371
1372         r = sd_rtnl_call(rtnl, m, 0, NULL);
1373         if (r < 0) {
1374                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1375                 return r;
1376         }
1377
1378         return 0;
1379 }
1380
1381 static int setup_bridge(const char veth_name[]) {
1382         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1383         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1384         int r, bridge;
1385
1386         if (!arg_private_network)
1387                 return 0;
1388
1389         if (!arg_network_veth)
1390                 return 0;
1391
1392         if (!arg_network_bridge)
1393                 return 0;
1394
1395         bridge = (int) if_nametoindex(arg_network_bridge);
1396         if (bridge <= 0) {
1397                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1398                 return -errno;
1399         }
1400
1401         r = sd_rtnl_open(0, &rtnl);
1402         if (r < 0) {
1403                 log_error("Failed to connect to netlink: %s", strerror(-r));
1404                 return r;
1405         }
1406
1407         r = sd_rtnl_message_new_link(rtnl, RTM_SETLINK, 0, &m);
1408         if (r < 0) {
1409                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1410                 return r;
1411         }
1412
1413         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1414         if (r < 0) {
1415                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1416                 return r;
1417         }
1418
1419         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1420         if (r < 0) {
1421                 log_error("Failed to add netlink master field: %s", strerror(-r));
1422                 return r;
1423         }
1424
1425         r = sd_rtnl_call(rtnl, m, 0, NULL);
1426         if (r < 0) {
1427                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1428                 return r;
1429         }
1430
1431         return 0;
1432 }
1433
1434 static int move_network_interfaces(pid_t pid) {
1435         _cleanup_udev_unref_ struct udev *udev = NULL;
1436         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1437         char **i;
1438         int r;
1439
1440         if (!arg_private_network)
1441                 return 0;
1442
1443         if (strv_isempty(arg_network_interfaces))
1444                 return 0;
1445
1446         r = sd_rtnl_open(0, &rtnl);
1447         if (r < 0) {
1448                 log_error("Failed to connect to netlink: %s", strerror(-r));
1449                 return r;
1450         }
1451
1452         udev = udev_new();
1453         if (!udev) {
1454                 log_error("Failed to connect to udev.");
1455                 return -ENOMEM;
1456         }
1457
1458         STRV_FOREACH(i, arg_network_interfaces) {
1459                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1460                 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1461                 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1462                 int ifi;
1463
1464                 ifi = (int) if_nametoindex(*i);
1465                 if (ifi <= 0) {
1466                         log_error("Failed to resolve interface %s: %m", *i);
1467                         return -errno;
1468                 }
1469
1470                 sprintf(ifi_str, "n%i", ifi);
1471                 d = udev_device_new_from_device_id(udev, ifi_str);
1472                 if (!d) {
1473                         log_error("Failed to get udev device for interface %s: %m", *i);
1474                         return -errno;
1475                 }
1476
1477                 if (udev_device_get_is_initialized(d) <= 0) {
1478                         log_error("Network interface %s is not initialized yet.", *i);
1479                         return -EBUSY;
1480                 }
1481
1482                 r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, ifi, &m);
1483                 if (r < 0) {
1484                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1485                         return r;
1486                 }
1487
1488                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1489                 if (r < 0) {
1490                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1491                         return r;
1492                 }
1493
1494                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1495                 if (r < 0) {
1496                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1497                         return r;
1498                 }
1499         }
1500
1501         return 0;
1502 }
1503
1504 static int audit_still_doesnt_work_in_containers(void) {
1505
1506 #ifdef HAVE_SECCOMP
1507         scmp_filter_ctx seccomp;
1508         int r;
1509
1510         /*
1511            Audit is broken in containers, much of the userspace audit
1512            hookup will fail if running inside a container. We don't
1513            care and just turn off creation of audit sockets.
1514
1515            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1516            with EAFNOSUPPORT which audit userspace uses as indication
1517            that audit is disabled in the kernel.
1518          */
1519
1520         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1521         if (!seccomp)
1522                 return log_oom();
1523
1524         r = seccomp_rule_add_exact(
1525                         seccomp,
1526                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1527                         SCMP_SYS(socket),
1528                         2,
1529                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1530                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1531         if (r < 0) {
1532                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1533                 goto finish;
1534         }
1535
1536         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1537         if (r < 0) {
1538                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1539                 goto finish;
1540         }
1541
1542 #ifdef __x86_64__
1543         r = seccomp_arch_add(seccomp, SCMP_ARCH_X86);
1544         if (r < 0 && r != -EEXIST) {
1545                 log_error("Failed to add x86 to seccomp filter: %s", strerror(-r));
1546                 goto finish;
1547         }
1548 #endif
1549
1550         r = seccomp_load(seccomp);
1551         if (r < 0)
1552                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1553
1554 finish:
1555         seccomp_release(seccomp);
1556         return r;
1557 #else
1558         return 0;
1559 #endif
1560
1561 }
1562
1563 int main(int argc, char *argv[]) {
1564
1565         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
1566         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1567         _cleanup_free_ char *kdbus_domain = NULL;
1568         _cleanup_fdset_free_ FDSet *fds = NULL;
1569         const char *console = NULL;
1570         int r = EXIT_FAILURE, k;
1571         int n_fd_passed;
1572         pid_t pid = 0;
1573         sigset_t mask;
1574         char veth_name[IFNAMSIZ] = "ve-";
1575
1576         log_parse_environment();
1577         log_open();
1578
1579         k = parse_argv(argc, argv);
1580         if (k < 0)
1581                 goto finish;
1582         else if (k == 0) {
1583                 r = EXIT_SUCCESS;
1584                 goto finish;
1585         }
1586
1587         if (arg_directory) {
1588                 char *p;
1589
1590                 p = path_make_absolute_cwd(arg_directory);
1591                 free(arg_directory);
1592                 arg_directory = p;
1593         } else
1594                 arg_directory = get_current_dir_name();
1595
1596         if (!arg_directory) {
1597                 log_error("Failed to determine path, please use -D.");
1598                 goto finish;
1599         }
1600
1601         path_kill_slashes(arg_directory);
1602
1603         if (!arg_machine) {
1604                 arg_machine = strdup(basename(arg_directory));
1605                 if (!arg_machine) {
1606                         log_oom();
1607                         goto finish;
1608                 }
1609
1610                 hostname_cleanup(arg_machine, false);
1611                 if (isempty(arg_machine)) {
1612                         log_error("Failed to determine machine name automatically, please use -M.");
1613                         goto finish;
1614                 }
1615         }
1616
1617         if (geteuid() != 0) {
1618                 log_error("Need to be root.");
1619                 goto finish;
1620         }
1621
1622         if (sd_booted() <= 0) {
1623                 log_error("Not running on a systemd system.");
1624                 goto finish;
1625         }
1626
1627         if (path_equal(arg_directory, "/")) {
1628                 log_error("Spawning container on root directory not supported.");
1629                 goto finish;
1630         }
1631
1632         if (arg_boot) {
1633                 if (path_is_os_tree(arg_directory) <= 0) {
1634                         log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1635                         goto finish;
1636                 }
1637         } else {
1638                 const char *p;
1639
1640                 p = strappenda(arg_directory,
1641                                argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1642                 if (access(p, F_OK) < 0) {
1643                         log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1644                         goto finish;
1645
1646                 }
1647         }
1648
1649         log_close();
1650         n_fd_passed = sd_listen_fds(false);
1651         if (n_fd_passed > 0) {
1652                 k = fdset_new_listen_fds(&fds, false);
1653                 if (k < 0) {
1654                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1655                         goto finish;
1656                 }
1657         }
1658         fdset_close_others(fds);
1659         log_open();
1660
1661         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1662         if (master < 0) {
1663                 log_error("Failed to acquire pseudo tty: %m");
1664                 goto finish;
1665         }
1666
1667         console = ptsname(master);
1668         if (!console) {
1669                 log_error("Failed to determine tty name: %m");
1670                 goto finish;
1671         }
1672
1673         if (!arg_quiet)
1674                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1675
1676         if (unlockpt(master) < 0) {
1677                 log_error("Failed to unlock tty: %m");
1678                 goto finish;
1679         }
1680
1681         if (access("/dev/kdbus/control", F_OK) >= 0) {
1682
1683                 if (arg_share_system) {
1684                         kdbus_domain = strdup("/dev/kdbus");
1685                         if (!kdbus_domain) {
1686                                 log_oom();
1687                                 goto finish;
1688                         }
1689                 } else {
1690                         const char *ns;
1691
1692                         ns = strappenda("machine-", arg_machine);
1693                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1694                         if (r < 0)
1695                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1696                         else
1697                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1698                 }
1699         }
1700
1701         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1702                 log_error("Failed to create kmsg socket pair: %m");
1703                 goto finish;
1704         }
1705
1706         sd_notify(0, "READY=1");
1707
1708         assert_se(sigemptyset(&mask) == 0);
1709         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1710         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1711
1712         for (;;) {
1713                 siginfo_t status;
1714
1715                 sync_fd = eventfd(0, EFD_CLOEXEC);
1716                 if (sync_fd < 0) {
1717                         log_error("Failed to create event fd: %m");
1718                         goto finish;
1719                 }
1720
1721                 pid = syscall(__NR_clone,
1722                               SIGCHLD|CLONE_NEWNS|
1723                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1724                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
1725                 if (pid < 0) {
1726                         if (errno == EINVAL)
1727                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1728                         else
1729                                 log_error("clone() failed: %m");
1730
1731                         goto finish;
1732                 }
1733
1734                 if (pid == 0) {
1735                         /* child */
1736                         const char *home = NULL;
1737                         uid_t uid = (uid_t) -1;
1738                         gid_t gid = (gid_t) -1;
1739                         unsigned n_env = 2;
1740                         const char *envp[] = {
1741                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
1742                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1743                                 NULL, /* TERM */
1744                                 NULL, /* HOME */
1745                                 NULL, /* USER */
1746                                 NULL, /* LOGNAME */
1747                                 NULL, /* container_uuid */
1748                                 NULL, /* LISTEN_FDS */
1749                                 NULL, /* LISTEN_PID */
1750                                 NULL
1751                         };
1752                         char **env_use;
1753                         eventfd_t x;
1754
1755                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1756                         if (envp[n_env])
1757                                 n_env ++;
1758
1759                         close_nointr_nofail(master);
1760                         master = -1;
1761
1762                         close_nointr(STDIN_FILENO);
1763                         close_nointr(STDOUT_FILENO);
1764                         close_nointr(STDERR_FILENO);
1765
1766                         close_nointr_nofail(kmsg_socket_pair[0]);
1767                         kmsg_socket_pair[0] = -1;
1768
1769                         reset_all_signal_handlers();
1770
1771                         assert_se(sigemptyset(&mask) == 0);
1772                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1773
1774                         k = open_terminal(console, O_RDWR);
1775                         if (k != STDIN_FILENO) {
1776                                 if (k >= 0) {
1777                                         close_nointr_nofail(k);
1778                                         k = -EINVAL;
1779                                 }
1780
1781                                 log_error("Failed to open console: %s", strerror(-k));
1782                                 goto child_fail;
1783                         }
1784
1785                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1786                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1787                                 log_error("Failed to duplicate console: %m");
1788                                 goto child_fail;
1789                         }
1790
1791                         if (setsid() < 0) {
1792                                 log_error("setsid() failed: %m");
1793                                 goto child_fail;
1794                         }
1795
1796                         if (reset_audit_loginuid() < 0)
1797                                 goto child_fail;
1798
1799                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1800                                 log_error("PR_SET_PDEATHSIG failed: %m");
1801                                 goto child_fail;
1802                         }
1803
1804                         /* Mark everything as slave, so that we still
1805                          * receive mounts from the real root, but don't
1806                          * propagate mounts to the real root. */
1807                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1808                                 log_error("MS_SLAVE|MS_REC failed: %m");
1809                                 goto child_fail;
1810                         }
1811
1812                         /* Turn directory into bind mount */
1813                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1814                                 log_error("Failed to make bind mount.");
1815                                 goto child_fail;
1816                         }
1817
1818                         if (arg_read_only)
1819                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1820                                         log_error("Failed to make read-only.");
1821                                         goto child_fail;
1822                                 }
1823
1824                         if (mount_all(arg_directory) < 0)
1825                                 goto child_fail;
1826
1827                         if (copy_devnodes(arg_directory) < 0)
1828                                 goto child_fail;
1829
1830                         if (setup_ptmx(arg_directory) < 0)
1831                                 goto child_fail;
1832
1833                         dev_setup(arg_directory);
1834
1835                         if (audit_still_doesnt_work_in_containers() < 0)
1836                                 goto child_fail;
1837
1838                         if (setup_dev_console(arg_directory, console) < 0)
1839                                 goto child_fail;
1840
1841                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1842                                 goto child_fail;
1843
1844                         close_nointr_nofail(kmsg_socket_pair[1]);
1845                         kmsg_socket_pair[1] = -1;
1846
1847                         if (setup_boot_id(arg_directory) < 0)
1848                                 goto child_fail;
1849
1850                         if (setup_timezone(arg_directory) < 0)
1851                                 goto child_fail;
1852
1853                         if (setup_resolv_conf(arg_directory) < 0)
1854                                 goto child_fail;
1855
1856                         if (setup_journal(arg_directory) < 0)
1857                                 goto child_fail;
1858
1859                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1860                                 goto child_fail;
1861
1862                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1863                                 goto child_fail;
1864
1865                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
1866                                 goto child_fail;
1867
1868                         if (chdir(arg_directory) < 0) {
1869                                 log_error("chdir(%s) failed: %m", arg_directory);
1870                                 goto child_fail;
1871                         }
1872
1873                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1874                                 log_error("mount(MS_MOVE) failed: %m");
1875                                 goto child_fail;
1876                         }
1877
1878                         if (chroot(".") < 0) {
1879                                 log_error("chroot() failed: %m");
1880                                 goto child_fail;
1881                         }
1882
1883                         if (chdir("/") < 0) {
1884                                 log_error("chdir() failed: %m");
1885                                 goto child_fail;
1886                         }
1887
1888                         umask(0022);
1889
1890                         if (arg_private_network)
1891                                 loopback_setup();
1892
1893                         if (drop_capabilities() < 0) {
1894                                 log_error("drop_capabilities() failed: %m");
1895                                 goto child_fail;
1896                         }
1897
1898                         if (arg_user) {
1899
1900                                 /* Note that this resolves user names
1901                                  * inside the container, and hence
1902                                  * accesses the NSS modules from the
1903                                  * container and not the host. This is
1904                                  * a bit weird... */
1905
1906                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1907                                         log_error("get_user_creds() failed: %m");
1908                                         goto child_fail;
1909                                 }
1910
1911                                 if (mkdir_parents_label(home, 0775) < 0) {
1912                                         log_error("mkdir_parents_label() failed: %m");
1913                                         goto child_fail;
1914                                 }
1915
1916                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1917                                         log_error("mkdir_safe_label() failed: %m");
1918                                         goto child_fail;
1919                                 }
1920
1921                                 if (initgroups((const char*)arg_user, gid) < 0) {
1922                                         log_error("initgroups() failed: %m");
1923                                         goto child_fail;
1924                                 }
1925
1926                                 if (setresgid(gid, gid, gid) < 0) {
1927                                         log_error("setregid() failed: %m");
1928                                         goto child_fail;
1929                                 }
1930
1931                                 if (setresuid(uid, uid, uid) < 0) {
1932                                         log_error("setreuid() failed: %m");
1933                                         goto child_fail;
1934                                 }
1935                         } else {
1936                                 /* Reset everything fully to 0, just in case */
1937
1938                                 if (setgroups(0, NULL) < 0) {
1939                                         log_error("setgroups() failed: %m");
1940                                         goto child_fail;
1941                                 }
1942
1943                                 if (setresgid(0, 0, 0) < 0) {
1944                                         log_error("setregid() failed: %m");
1945                                         goto child_fail;
1946                                 }
1947
1948                                 if (setresuid(0, 0, 0) < 0) {
1949                                         log_error("setreuid() failed: %m");
1950                                         goto child_fail;
1951                                 }
1952                         }
1953
1954                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1955                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1956                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1957                                 log_oom();
1958                                 goto child_fail;
1959                         }
1960
1961                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1962                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1963                                         log_oom();
1964                                         goto child_fail;
1965                                 }
1966                         }
1967
1968                         if (fdset_size(fds) > 0) {
1969                                 k = fdset_cloexec(fds, false);
1970                                 if (k < 0) {
1971                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1972                                         goto child_fail;
1973                                 }
1974
1975                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1976                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1977                                         log_oom();
1978                                         goto child_fail;
1979                                 }
1980                         }
1981
1982                         setup_hostname();
1983
1984                         eventfd_read(sync_fd, &x);
1985                         close_nointr_nofail(sync_fd);
1986                         sync_fd = -1;
1987
1988                         if (!strv_isempty(arg_setenv)) {
1989                                 char **n;
1990
1991                                 n = strv_env_merge(2, envp, arg_setenv);
1992                                 if (!n) {
1993                                         log_oom();
1994                                         goto child_fail;
1995                                 }
1996
1997                                 env_use = n;
1998                         } else
1999                                 env_use = (char**) envp;
2000
2001 #ifdef HAVE_SELINUX
2002                         if (arg_selinux_context)
2003                                 if (setexeccon(arg_selinux_context) < 0)
2004                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2005 #endif
2006                         if (arg_boot) {
2007                                 char **a;
2008                                 size_t l;
2009
2010                                 /* Automatically search for the init system */
2011
2012                                 l = 1 + argc - optind;
2013                                 a = newa(char*, l + 1);
2014                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2015
2016                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2017                                 execve(a[0], a, env_use);
2018
2019                                 a[0] = (char*) "/lib/systemd/systemd";
2020                                 execve(a[0], a, env_use);
2021
2022                                 a[0] = (char*) "/sbin/init";
2023                                 execve(a[0], a, env_use);
2024                         } else if (argc > optind)
2025                                 execvpe(argv[optind], argv + optind, env_use);
2026                         else {
2027                                 chdir(home ? home : "/root");
2028                                 execle("/bin/bash", "-bash", NULL, env_use);
2029                                 execle("/bin/sh", "-sh", NULL, env_use);
2030                         }
2031
2032                         log_error("execv() failed: %m");
2033
2034                 child_fail:
2035                         _exit(EXIT_FAILURE);
2036                 }
2037
2038                 fdset_free(fds);
2039                 fds = NULL;
2040
2041                 r = register_machine(pid);
2042                 if (r < 0)
2043                         goto finish;
2044
2045                 r = move_network_interfaces(pid);
2046                 if (r < 0)
2047                         goto finish;
2048
2049                 r = setup_veth(pid, veth_name);
2050                 if (r < 0)
2051                         goto finish;
2052
2053                 r = setup_bridge(veth_name);
2054                 if (r < 0)
2055                         goto finish;
2056
2057                 eventfd_write(sync_fd, 1);
2058                 close_nointr_nofail(sync_fd);
2059                 sync_fd = -1;
2060
2061                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2062                 if (k < 0) {
2063                         r = EXIT_FAILURE;
2064                         break;
2065                 }
2066
2067                 if (!arg_quiet)
2068                         putc('\n', stdout);
2069
2070                 /* Kill if it is not dead yet anyway */
2071                 terminate_machine(pid);
2072
2073                 /* Redundant, but better safe than sorry */
2074                 kill(pid, SIGKILL);
2075
2076                 k = wait_for_terminate(pid, &status);
2077                 pid = 0;
2078
2079                 if (k < 0) {
2080                         r = EXIT_FAILURE;
2081                         break;
2082                 }
2083
2084                 if (status.si_code == CLD_EXITED) {
2085                         r = status.si_status;
2086                         if (status.si_status != 0) {
2087                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2088                                 break;
2089                         }
2090
2091                         if (!arg_quiet)
2092                                 log_debug("Container %s exited successfully.", arg_machine);
2093                         break;
2094                 } else if (status.si_code == CLD_KILLED &&
2095                            status.si_status == SIGINT) {
2096
2097                         if (!arg_quiet)
2098                                 log_info("Container %s has been shut down.", arg_machine);
2099                         r = 0;
2100                         break;
2101                 } else if (status.si_code == CLD_KILLED &&
2102                            status.si_status == SIGHUP) {
2103
2104                         if (!arg_quiet)
2105                                 log_info("Container %s is being rebooted.", arg_machine);
2106                         continue;
2107                 } else if (status.si_code == CLD_KILLED ||
2108                            status.si_code == CLD_DUMPED) {
2109
2110                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2111                         r = EXIT_FAILURE;
2112                         break;
2113                 } else {
2114                         log_error("Container %s failed due to unknown reason.", arg_machine);
2115                         r = EXIT_FAILURE;
2116                         break;
2117                 }
2118         }
2119
2120 finish:
2121         if (pid > 0)
2122                 kill(pid, SIGKILL);
2123
2124         free(arg_directory);
2125         free(arg_machine);
2126         free(arg_setenv);
2127         free(arg_network_interfaces);
2128
2129         return r;
2130 }