chiark / gitweb /
nspawn: x86 is special with its socketcall() semantics, be permissive in the seccomp...
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46
47 #ifdef HAVE_SELINUX
48 #include <selinux/selinux.h>
49 #endif
50
51 #ifdef HAVE_SECCOMP
52 #include <seccomp.h>
53 #endif
54
55 #include "sd-daemon.h"
56 #include "sd-bus.h"
57 #include "sd-id128.h"
58 #include "sd-rtnl.h"
59 #include "log.h"
60 #include "util.h"
61 #include "mkdir.h"
62 #include "macro.h"
63 #include "audit.h"
64 #include "missing.h"
65 #include "cgroup-util.h"
66 #include "strv.h"
67 #include "path-util.h"
68 #include "loopback-setup.h"
69 #include "dev-setup.h"
70 #include "fdset.h"
71 #include "build.h"
72 #include "fileio.h"
73 #include "bus-util.h"
74 #include "bus-error.h"
75 #include "ptyfwd.h"
76 #include "bus-kernel.h"
77 #include "env-util.h"
78 #include "def.h"
79 #include "rtnl-util.h"
80 #include "udev-util.h"
81
82 #ifdef HAVE_SECCOMP
83 #include "seccomp-util.h"
84 #endif
85
86 typedef enum LinkJournal {
87         LINK_NO,
88         LINK_AUTO,
89         LINK_HOST,
90         LINK_GUEST
91 } LinkJournal;
92
93 static char *arg_directory = NULL;
94 static char *arg_user = NULL;
95 static sd_id128_t arg_uuid = {};
96 static char *arg_machine = NULL;
97 static char *arg_selinux_context = NULL;
98 static char *arg_selinux_apifs_context = NULL;
99 static const char *arg_slice = NULL;
100 static bool arg_private_network = false;
101 static bool arg_read_only = false;
102 static bool arg_boot = false;
103 static LinkJournal arg_link_journal = LINK_AUTO;
104 static uint64_t arg_retain =
105         (1ULL << CAP_CHOWN) |
106         (1ULL << CAP_DAC_OVERRIDE) |
107         (1ULL << CAP_DAC_READ_SEARCH) |
108         (1ULL << CAP_FOWNER) |
109         (1ULL << CAP_FSETID) |
110         (1ULL << CAP_IPC_OWNER) |
111         (1ULL << CAP_KILL) |
112         (1ULL << CAP_LEASE) |
113         (1ULL << CAP_LINUX_IMMUTABLE) |
114         (1ULL << CAP_NET_BIND_SERVICE) |
115         (1ULL << CAP_NET_BROADCAST) |
116         (1ULL << CAP_NET_RAW) |
117         (1ULL << CAP_SETGID) |
118         (1ULL << CAP_SETFCAP) |
119         (1ULL << CAP_SETPCAP) |
120         (1ULL << CAP_SETUID) |
121         (1ULL << CAP_SYS_ADMIN) |
122         (1ULL << CAP_SYS_CHROOT) |
123         (1ULL << CAP_SYS_NICE) |
124         (1ULL << CAP_SYS_PTRACE) |
125         (1ULL << CAP_SYS_TTY_CONFIG) |
126         (1ULL << CAP_SYS_RESOURCE) |
127         (1ULL << CAP_SYS_BOOT) |
128         (1ULL << CAP_AUDIT_WRITE) |
129         (1ULL << CAP_AUDIT_CONTROL) |
130         (1ULL << CAP_MKNOD);
131 static char **arg_bind = NULL;
132 static char **arg_bind_ro = NULL;
133 static char **arg_setenv = NULL;
134 static bool arg_quiet = false;
135 static bool arg_share_system = false;
136 static bool arg_register = true;
137 static bool arg_keep_unit = false;
138 static char **arg_network_interfaces = NULL;
139 static bool arg_network_veth = false;
140 static char *arg_network_bridge = NULL;
141
142 static int help(void) {
143
144         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
145                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
146                "  -h --help                 Show this help\n"
147                "     --version              Print version string\n"
148                "  -q --quiet                Do not show status information\n"
149                "  -D --directory=NAME       Root directory for the container\n"
150                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
151                "  -u --user=USER            Run the command under specified user or uid\n"
152                "  -M --machine=NAME         Set the machine name for the container\n"
153                "     --uuid=UUID            Set a specific machine UUID for the container\n"
154                "  -S --slice=SLICE          Place the container in the specified slice\n"
155                "     --private-network      Disable network in container\n"
156                "     --network-interface=INTERFACE\n"
157                "                            Assign an existing network interface to the\n"
158                "                            container\n"
159                "     --network-veth         Add a virtual ethernet connection between host\n"
160                "                            and container\n"
161                "     --network-bridge=INTERFACE\n"
162                "                            Add a virtual ethernet connection between host\n"
163                "                            and container and add it to an existing bridge on\n"
164                "                            the host\n"
165                "  -Z --selinux-context=SECLABEL\n"
166                "                            Set the SELinux security context to be used by\n"
167                "                            processes in the container\n"
168                "  -L --selinux-apifs-context=SECLABEL\n"
169                "                            Set the SELinux security context to be used by\n"
170                "                            API/tmpfs file systems in the container\n"
171                "     --capability=CAP       In addition to the default, retain specified\n"
172                "                            capability\n"
173                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
174                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
175                "  -j                        Equivalent to --link-journal=host\n"
176                "     --read-only            Mount the root directory read-only\n"
177                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
178                "                            the container\n"
179                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
180                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
181                "     --share-system         Share system namespaces with host\n"
182                "     --register=BOOLEAN     Register container as machine\n"
183                "     --keep-unit            Do not register a scope for the machine, reuse\n"
184                "                            the service unit nspawn is running in\n",
185                program_invocation_short_name);
186
187         return 0;
188 }
189
190 static int parse_argv(int argc, char *argv[]) {
191
192         enum {
193                 ARG_VERSION = 0x100,
194                 ARG_PRIVATE_NETWORK,
195                 ARG_UUID,
196                 ARG_READ_ONLY,
197                 ARG_CAPABILITY,
198                 ARG_DROP_CAPABILITY,
199                 ARG_LINK_JOURNAL,
200                 ARG_BIND,
201                 ARG_BIND_RO,
202                 ARG_SETENV,
203                 ARG_SHARE_SYSTEM,
204                 ARG_REGISTER,
205                 ARG_KEEP_UNIT,
206                 ARG_NETWORK_INTERFACE,
207                 ARG_NETWORK_VETH,
208                 ARG_NETWORK_BRIDGE,
209         };
210
211         static const struct option options[] = {
212                 { "help",                  no_argument,       NULL, 'h'                   },
213                 { "version",               no_argument,       NULL, ARG_VERSION           },
214                 { "directory",             required_argument, NULL, 'D'                   },
215                 { "user",                  required_argument, NULL, 'u'                   },
216                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
217                 { "boot",                  no_argument,       NULL, 'b'                   },
218                 { "uuid",                  required_argument, NULL, ARG_UUID              },
219                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
220                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
221                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
222                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
223                 { "bind",                  required_argument, NULL, ARG_BIND              },
224                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
225                 { "machine",               required_argument, NULL, 'M'                   },
226                 { "slice",                 required_argument, NULL, 'S'                   },
227                 { "setenv",                required_argument, NULL, ARG_SETENV            },
228                 { "selinux-context",       required_argument, NULL, 'Z'                   },
229                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
230                 { "quiet",                 no_argument,       NULL, 'q'                   },
231                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
232                 { "register",              required_argument, NULL, ARG_REGISTER          },
233                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
234                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
235                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
236                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
237                 {}
238         };
239
240         int c, r;
241         uint64_t plus = 0, minus = 0;
242
243         assert(argc >= 0);
244         assert(argv);
245
246         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
247
248                 switch (c) {
249
250                 case 'h':
251                         return help();
252
253                 case ARG_VERSION:
254                         puts(PACKAGE_STRING);
255                         puts(SYSTEMD_FEATURES);
256                         return 0;
257
258                 case 'D':
259                         free(arg_directory);
260                         arg_directory = canonicalize_file_name(optarg);
261                         if (!arg_directory) {
262                                 log_error("Invalid root directory: %m");
263                                 return -ENOMEM;
264                         }
265
266                         break;
267
268                 case 'u':
269                         free(arg_user);
270                         arg_user = strdup(optarg);
271                         if (!arg_user)
272                                 return log_oom();
273
274                         break;
275
276                 case ARG_NETWORK_BRIDGE:
277                         arg_network_bridge = strdup(optarg);
278                         if (!arg_network_bridge)
279                                 return log_oom();
280
281                         /* fall through */
282
283                 case ARG_NETWORK_VETH:
284                         arg_network_veth = true;
285                         arg_private_network = true;
286                         break;
287
288                 case ARG_NETWORK_INTERFACE:
289                         if (strv_push(&arg_network_interfaces, optarg) < 0)
290                                 return log_oom();
291
292                         /* fall through */
293
294                 case ARG_PRIVATE_NETWORK:
295                         arg_private_network = true;
296                         break;
297
298                 case 'b':
299                         arg_boot = true;
300                         break;
301
302                 case ARG_UUID:
303                         r = sd_id128_from_string(optarg, &arg_uuid);
304                         if (r < 0) {
305                                 log_error("Invalid UUID: %s", optarg);
306                                 return r;
307                         }
308                         break;
309
310                 case 'S':
311                         arg_slice = strdup(optarg);
312                         if (!arg_slice)
313                                 return log_oom();
314
315                         break;
316
317                 case 'M':
318                         if (isempty(optarg)) {
319                                 free(arg_machine);
320                                 arg_machine = NULL;
321                         } else {
322
323                                 if (!hostname_is_valid(optarg)) {
324                                         log_error("Invalid machine name: %s", optarg);
325                                         return -EINVAL;
326                                 }
327
328                                 free(arg_machine);
329                                 arg_machine = strdup(optarg);
330                                 if (!arg_machine)
331                                         return log_oom();
332
333                                 break;
334                         }
335
336                 case 'Z':
337                         arg_selinux_context = optarg;
338                         break;
339
340                 case 'L':
341                         arg_selinux_apifs_context = optarg;
342                         break;
343
344                 case ARG_READ_ONLY:
345                         arg_read_only = true;
346                         break;
347
348                 case ARG_CAPABILITY:
349                 case ARG_DROP_CAPABILITY: {
350                         char *state, *word;
351                         size_t length;
352
353                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
354                                 _cleanup_free_ char *t;
355                                 cap_value_t cap;
356
357                                 t = strndup(word, length);
358                                 if (!t)
359                                         return log_oom();
360
361                                 if (streq(t, "all")) {
362                                         if (c == ARG_CAPABILITY)
363                                                 plus = (uint64_t) -1;
364                                         else
365                                                 minus = (uint64_t) -1;
366                                 } else {
367                                         if (cap_from_name(t, &cap) < 0) {
368                                                 log_error("Failed to parse capability %s.", t);
369                                                 return -EINVAL;
370                                         }
371
372                                         if (c == ARG_CAPABILITY)
373                                                 plus |= 1ULL << (uint64_t) cap;
374                                         else
375                                                 minus |= 1ULL << (uint64_t) cap;
376                                 }
377                         }
378
379                         break;
380                 }
381
382                 case 'j':
383                         arg_link_journal = LINK_GUEST;
384                         break;
385
386                 case ARG_LINK_JOURNAL:
387                         if (streq(optarg, "auto"))
388                                 arg_link_journal = LINK_AUTO;
389                         else if (streq(optarg, "no"))
390                                 arg_link_journal = LINK_NO;
391                         else if (streq(optarg, "guest"))
392                                 arg_link_journal = LINK_GUEST;
393                         else if (streq(optarg, "host"))
394                                 arg_link_journal = LINK_HOST;
395                         else {
396                                 log_error("Failed to parse link journal mode %s", optarg);
397                                 return -EINVAL;
398                         }
399
400                         break;
401
402                 case ARG_BIND:
403                 case ARG_BIND_RO: {
404                         _cleanup_free_ char *a = NULL, *b = NULL;
405                         char *e;
406                         char ***x;
407
408                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
409
410                         e = strchr(optarg, ':');
411                         if (e) {
412                                 a = strndup(optarg, e - optarg);
413                                 b = strdup(e + 1);
414                         } else {
415                                 a = strdup(optarg);
416                                 b = strdup(optarg);
417                         }
418
419                         if (!a || !b)
420                                 return log_oom();
421
422                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
423                                 log_error("Invalid bind mount specification: %s", optarg);
424                                 return -EINVAL;
425                         }
426
427                         r = strv_extend(x, a);
428                         if (r < 0)
429                                 return log_oom();
430
431                         r = strv_extend(x, b);
432                         if (r < 0)
433                                 return log_oom();
434
435                         break;
436                 }
437
438                 case ARG_SETENV: {
439                         char **n;
440
441                         if (!env_assignment_is_valid(optarg)) {
442                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
443                                 return -EINVAL;
444                         }
445
446                         n = strv_env_set(arg_setenv, optarg);
447                         if (!n)
448                                 return log_oom();
449
450                         strv_free(arg_setenv);
451                         arg_setenv = n;
452                         break;
453                 }
454
455                 case 'q':
456                         arg_quiet = true;
457                         break;
458
459                 case ARG_SHARE_SYSTEM:
460                         arg_share_system = true;
461                         break;
462
463                 case ARG_REGISTER:
464                         r = parse_boolean(optarg);
465                         if (r < 0) {
466                                 log_error("Failed to parse --register= argument: %s", optarg);
467                                 return r;
468                         }
469
470                         arg_register = r;
471                         break;
472
473                 case ARG_KEEP_UNIT:
474                         arg_keep_unit = true;
475                         break;
476
477                 case '?':
478                         return -EINVAL;
479
480                 default:
481                         assert_not_reached("Unhandled option");
482                 }
483         }
484
485         if (arg_share_system)
486                 arg_register = false;
487
488         if (arg_boot && arg_share_system) {
489                 log_error("--boot and --share-system may not be combined.");
490                 return -EINVAL;
491         }
492
493         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
494                 log_error("--keep-unit may not be used when invoked from a user session.");
495                 return -EINVAL;
496         }
497
498         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
499
500         return 1;
501 }
502
503 static int mount_all(const char *dest) {
504
505         typedef struct MountPoint {
506                 const char *what;
507                 const char *where;
508                 const char *type;
509                 const char *options;
510                 unsigned long flags;
511                 bool fatal;
512         } MountPoint;
513
514         static const MountPoint mount_table[] = {
515                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
516                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
517                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
518                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
519                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
520                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
521                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
522                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
523 #ifdef HAVE_SELINUX
524                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
525                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
526 #endif
527         };
528
529         unsigned k;
530         int r = 0;
531
532         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
533                 _cleanup_free_ char *where = NULL;
534 #ifdef HAVE_SELINUX
535                 _cleanup_free_ char *options = NULL;
536 #endif
537                 const char *o;
538                 int t;
539
540                 where = strjoin(dest, "/", mount_table[k].where, NULL);
541                 if (!where)
542                         return log_oom();
543
544                 t = path_is_mount_point(where, true);
545                 if (t < 0) {
546                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
547
548                         if (r == 0)
549                                 r = t;
550
551                         continue;
552                 }
553
554                 /* Skip this entry if it is not a remount. */
555                 if (mount_table[k].what && t > 0)
556                         continue;
557
558                 mkdir_p(where, 0755);
559
560 #ifdef HAVE_SELINUX
561                 if (arg_selinux_apifs_context &&
562                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
563                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
564                         if (!options)
565                                 return log_oom();
566
567                         o = options;
568                 } else
569 #endif
570                         o = mount_table[k].options;
571
572
573                 if (mount(mount_table[k].what,
574                           where,
575                           mount_table[k].type,
576                           mount_table[k].flags,
577                           o) < 0 &&
578                     mount_table[k].fatal) {
579
580                         log_error("mount(%s) failed: %m", where);
581
582                         if (r == 0)
583                                 r = -errno;
584                 }
585         }
586
587         return r;
588 }
589
590 static int mount_binds(const char *dest, char **l, unsigned long flags) {
591         char **x, **y;
592
593         STRV_FOREACH_PAIR(x, y, l) {
594                 char *where;
595                 struct stat source_st, dest_st;
596                 int r;
597
598                 if (stat(*x, &source_st) < 0) {
599                         log_error("failed to stat %s: %m", *x);
600                         return -errno;
601                 }
602
603                 where = strappenda(dest, *y);
604                 r = stat(where, &dest_st);
605                 if (r == 0) {
606                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
607                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
608                                                 *x, where);
609                                 return -EINVAL;
610                         }
611                 } else if (errno == ENOENT) {
612                         r = mkdir_parents_label(where, 0755);
613                         if (r < 0) {
614                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
615                                 return r;
616                         }
617                 } else {
618                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
619                         return -errno;
620                 }
621                 /* Create the mount point, but be conservative -- refuse to create block
622                 * and char devices. */
623                 if (S_ISDIR(source_st.st_mode))
624                         mkdir_label(where, 0755);
625                 else if (S_ISFIFO(source_st.st_mode))
626                         mkfifo(where, 0644);
627                 else if (S_ISSOCK(source_st.st_mode))
628                         mknod(where, 0644 | S_IFSOCK, 0);
629                 else if (S_ISREG(source_st.st_mode))
630                         touch(where);
631                 else {
632                         log_error("Refusing to create mountpoint for file: %s", *x);
633                         return -ENOTSUP;
634                 }
635
636                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
637                         log_error("mount(%s) failed: %m", where);
638                         return -errno;
639                 }
640
641                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
642                         log_error("mount(%s) failed: %m", where);
643                         return -errno;
644                 }
645         }
646
647         return 0;
648 }
649
650 static int setup_timezone(const char *dest) {
651         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
652         char *z, *y;
653         int r;
654
655         assert(dest);
656
657         /* Fix the timezone, if possible */
658         r = readlink_malloc("/etc/localtime", &p);
659         if (r < 0) {
660                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
661                 return 0;
662         }
663
664         z = path_startswith(p, "../usr/share/zoneinfo/");
665         if (!z)
666                 z = path_startswith(p, "/usr/share/zoneinfo/");
667         if (!z) {
668                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
669                 return 0;
670         }
671
672         where = strappend(dest, "/etc/localtime");
673         if (!where)
674                 return log_oom();
675
676         r = readlink_malloc(where, &q);
677         if (r >= 0) {
678                 y = path_startswith(q, "../usr/share/zoneinfo/");
679                 if (!y)
680                         y = path_startswith(q, "/usr/share/zoneinfo/");
681
682
683                 /* Already pointing to the right place? Then do nothing .. */
684                 if (y && streq(y, z))
685                         return 0;
686         }
687
688         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
689         if (!check)
690                 return log_oom();
691
692         if (access(check, F_OK) < 0) {
693                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
694                 return 0;
695         }
696
697         what = strappend("../usr/share/zoneinfo/", z);
698         if (!what)
699                 return log_oom();
700
701         unlink(where);
702         if (symlink(what, where) < 0) {
703                 log_error("Failed to correct timezone of container: %m");
704                 return 0;
705         }
706
707         return 0;
708 }
709
710 static int setup_resolv_conf(const char *dest) {
711         char _cleanup_free_ *where = NULL;
712
713         assert(dest);
714
715         if (arg_private_network)
716                 return 0;
717
718         /* Fix resolv.conf, if possible */
719         where = strappend(dest, "/etc/resolv.conf");
720         if (!where)
721                 return log_oom();
722
723         /* We don't really care for the results of this really. If it
724          * fails, it fails, but meh... */
725         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
726
727         return 0;
728 }
729
730 static int setup_boot_id(const char *dest) {
731         _cleanup_free_ char *from = NULL, *to = NULL;
732         sd_id128_t rnd;
733         char as_uuid[37];
734         int r;
735
736         assert(dest);
737
738         if (arg_share_system)
739                 return 0;
740
741         /* Generate a new randomized boot ID, so that each boot-up of
742          * the container gets a new one */
743
744         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
745         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
746         if (!from || !to)
747                 return log_oom();
748
749         r = sd_id128_randomize(&rnd);
750         if (r < 0) {
751                 log_error("Failed to generate random boot id: %s", strerror(-r));
752                 return r;
753         }
754
755         snprintf(as_uuid, sizeof(as_uuid),
756                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
757                  SD_ID128_FORMAT_VAL(rnd));
758         char_array_0(as_uuid);
759
760         r = write_string_file(from, as_uuid);
761         if (r < 0) {
762                 log_error("Failed to write boot id: %s", strerror(-r));
763                 return r;
764         }
765
766         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
767                 log_error("Failed to bind mount boot id: %m");
768                 r = -errno;
769         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
770                 log_warning("Failed to make boot id read-only: %m");
771
772         unlink(from);
773         return r;
774 }
775
776 static int copy_devnodes(const char *dest) {
777
778         static const char devnodes[] =
779                 "null\0"
780                 "zero\0"
781                 "full\0"
782                 "random\0"
783                 "urandom\0"
784                 "tty\0";
785
786         const char *d;
787         int r = 0;
788         _cleanup_umask_ mode_t u;
789
790         assert(dest);
791
792         u = umask(0000);
793
794         NULSTR_FOREACH(d, devnodes) {
795                 _cleanup_free_ char *from = NULL, *to = NULL;
796                 struct stat st;
797
798                 from = strappend("/dev/", d);
799                 to = strjoin(dest, "/dev/", d, NULL);
800                 if (!from || !to)
801                         return log_oom();
802
803                 if (stat(from, &st) < 0) {
804
805                         if (errno != ENOENT) {
806                                 log_error("Failed to stat %s: %m", from);
807                                 return -errno;
808                         }
809
810                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
811
812                         log_error("%s is not a char or block device, cannot copy", from);
813                         return -EIO;
814
815                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
816
817                         log_error("mknod(%s) failed: %m", dest);
818                         return  -errno;
819                 }
820         }
821
822         return r;
823 }
824
825 static int setup_ptmx(const char *dest) {
826         _cleanup_free_ char *p = NULL;
827
828         p = strappend(dest, "/dev/ptmx");
829         if (!p)
830                 return log_oom();
831
832         if (symlink("pts/ptmx", p) < 0) {
833                 log_error("Failed to create /dev/ptmx symlink: %m");
834                 return -errno;
835         }
836
837         return 0;
838 }
839
840 static int setup_dev_console(const char *dest, const char *console) {
841         struct stat st;
842         _cleanup_free_ char *to = NULL;
843         int r;
844         _cleanup_umask_ mode_t u;
845
846         assert(dest);
847         assert(console);
848
849         u = umask(0000);
850
851         if (stat(console, &st) < 0) {
852                 log_error("Failed to stat %s: %m", console);
853                 return -errno;
854
855         } else if (!S_ISCHR(st.st_mode)) {
856                 log_error("/dev/console is not a char device");
857                 return -EIO;
858         }
859
860         r = chmod_and_chown(console, 0600, 0, 0);
861         if (r < 0) {
862                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
863                 return r;
864         }
865
866         if (asprintf(&to, "%s/dev/console", dest) < 0)
867                 return log_oom();
868
869         /* We need to bind mount the right tty to /dev/console since
870          * ptys can only exist on pts file systems. To have something
871          * to bind mount things on we create a device node first, that
872          * has the right major/minor (note that the major minor
873          * doesn't actually matter here, since we mount it over
874          * anyway). */
875
876         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
877                 log_error("mknod() for /dev/console failed: %m");
878                 return -errno;
879         }
880
881         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
882                 log_error("Bind mount for /dev/console failed: %m");
883                 return -errno;
884         }
885
886         return 0;
887 }
888
889 static int setup_kmsg(const char *dest, int kmsg_socket) {
890         _cleanup_free_ char *from = NULL, *to = NULL;
891         int r, fd, k;
892         _cleanup_umask_ mode_t u;
893         union {
894                 struct cmsghdr cmsghdr;
895                 uint8_t buf[CMSG_SPACE(sizeof(int))];
896         } control = {};
897         struct msghdr mh = {
898                 .msg_control = &control,
899                 .msg_controllen = sizeof(control),
900         };
901         struct cmsghdr *cmsg;
902
903         assert(dest);
904         assert(kmsg_socket >= 0);
905
906         u = umask(0000);
907
908         /* We create the kmsg FIFO as /dev/kmsg, but immediately
909          * delete it after bind mounting it to /proc/kmsg. While FIFOs
910          * on the reading side behave very similar to /proc/kmsg,
911          * their writing side behaves differently from /dev/kmsg in
912          * that writing blocks when nothing is reading. In order to
913          * avoid any problems with containers deadlocking due to this
914          * we simply make /dev/kmsg unavailable to the container. */
915         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
916             asprintf(&to, "%s/proc/kmsg", dest) < 0)
917                 return log_oom();
918
919         if (mkfifo(from, 0600) < 0) {
920                 log_error("mkfifo() for /dev/kmsg failed: %m");
921                 return -errno;
922         }
923
924         r = chmod_and_chown(from, 0600, 0, 0);
925         if (r < 0) {
926                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
927                 return r;
928         }
929
930         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
931                 log_error("Bind mount for /proc/kmsg failed: %m");
932                 return -errno;
933         }
934
935         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
936         if (fd < 0) {
937                 log_error("Failed to open fifo: %m");
938                 return -errno;
939         }
940
941         cmsg = CMSG_FIRSTHDR(&mh);
942         cmsg->cmsg_level = SOL_SOCKET;
943         cmsg->cmsg_type = SCM_RIGHTS;
944         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
945         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
946
947         mh.msg_controllen = cmsg->cmsg_len;
948
949         /* Store away the fd in the socket, so that it stays open as
950          * long as we run the child */
951         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
952         close_nointr_nofail(fd);
953
954         if (k < 0) {
955                 log_error("Failed to send FIFO fd: %m");
956                 return -errno;
957         }
958
959         /* And now make the FIFO unavailable as /dev/kmsg... */
960         unlink(from);
961         return 0;
962 }
963
964 static int setup_hostname(void) {
965
966         if (arg_share_system)
967                 return 0;
968
969         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
970                 return -errno;
971
972         return 0;
973 }
974
975 static int setup_journal(const char *directory) {
976         sd_id128_t machine_id, this_id;
977         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
978         char *id;
979         int r;
980
981         p = strappend(directory, "/etc/machine-id");
982         if (!p)
983                 return log_oom();
984
985         r = read_one_line_file(p, &b);
986         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
987                 return 0;
988         else if (r < 0) {
989                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
990                 return r;
991         }
992
993         id = strstrip(b);
994         if (isempty(id) && arg_link_journal == LINK_AUTO)
995                 return 0;
996
997         /* Verify validity */
998         r = sd_id128_from_string(id, &machine_id);
999         if (r < 0) {
1000                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1001                 return r;
1002         }
1003
1004         r = sd_id128_get_machine(&this_id);
1005         if (r < 0) {
1006                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1007                 return r;
1008         }
1009
1010         if (sd_id128_equal(machine_id, this_id)) {
1011                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1012                          "Host and machine ids are equal (%s): refusing to link journals", id);
1013                 if (arg_link_journal == LINK_AUTO)
1014                         return 0;
1015                 return
1016                         -EEXIST;
1017         }
1018
1019         if (arg_link_journal == LINK_NO)
1020                 return 0;
1021
1022         free(p);
1023         p = strappend("/var/log/journal/", id);
1024         q = strjoin(directory, "/var/log/journal/", id, NULL);
1025         if (!p || !q)
1026                 return log_oom();
1027
1028         if (path_is_mount_point(p, false) > 0) {
1029                 if (arg_link_journal != LINK_AUTO) {
1030                         log_error("%s: already a mount point, refusing to use for journal", p);
1031                         return -EEXIST;
1032                 }
1033
1034                 return 0;
1035         }
1036
1037         if (path_is_mount_point(q, false) > 0) {
1038                 if (arg_link_journal != LINK_AUTO) {
1039                         log_error("%s: already a mount point, refusing to use for journal", q);
1040                         return -EEXIST;
1041                 }
1042
1043                 return 0;
1044         }
1045
1046         r = readlink_and_make_absolute(p, &d);
1047         if (r >= 0) {
1048                 if ((arg_link_journal == LINK_GUEST ||
1049                      arg_link_journal == LINK_AUTO) &&
1050                     path_equal(d, q)) {
1051
1052                         r = mkdir_p(q, 0755);
1053                         if (r < 0)
1054                                 log_warning("failed to create directory %s: %m", q);
1055                         return 0;
1056                 }
1057
1058                 if (unlink(p) < 0) {
1059                         log_error("Failed to remove symlink %s: %m", p);
1060                         return -errno;
1061                 }
1062         } else if (r == -EINVAL) {
1063
1064                 if (arg_link_journal == LINK_GUEST &&
1065                     rmdir(p) < 0) {
1066
1067                         if (errno == ENOTDIR) {
1068                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1069                                 return r;
1070                         } else {
1071                                 log_error("Failed to remove %s: %m", p);
1072                                 return -errno;
1073                         }
1074                 }
1075         } else if (r != -ENOENT) {
1076                 log_error("readlink(%s) failed: %m", p);
1077                 return r;
1078         }
1079
1080         if (arg_link_journal == LINK_GUEST) {
1081
1082                 if (symlink(q, p) < 0) {
1083                         log_error("Failed to symlink %s to %s: %m", q, p);
1084                         return -errno;
1085                 }
1086
1087                 r = mkdir_p(q, 0755);
1088                 if (r < 0)
1089                         log_warning("failed to create directory %s: %m", q);
1090                 return 0;
1091         }
1092
1093         if (arg_link_journal == LINK_HOST) {
1094                 r = mkdir_p(p, 0755);
1095                 if (r < 0) {
1096                         log_error("Failed to create %s: %m", p);
1097                         return r;
1098                 }
1099
1100         } else if (access(p, F_OK) < 0)
1101                 return 0;
1102
1103         if (dir_is_empty(q) == 0) {
1104                 log_error("%s not empty.", q);
1105                 return -ENOTEMPTY;
1106         }
1107
1108         r = mkdir_p(q, 0755);
1109         if (r < 0) {
1110                 log_error("Failed to create %s: %m", q);
1111                 return r;
1112         }
1113
1114         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1115                 log_error("Failed to bind mount journal from host into guest: %m");
1116                 return -errno;
1117         }
1118
1119         return 0;
1120 }
1121
1122 static int setup_kdbus(const char *dest, const char *path) {
1123         const char *p;
1124
1125         if (!path)
1126                 return 0;
1127
1128         p = strappenda(dest, "/dev/kdbus");
1129         if (mkdir(p, 0755) < 0) {
1130                 log_error("Failed to create kdbus path: %m");
1131                 return  -errno;
1132         }
1133
1134         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1135                 log_error("Failed to mount kdbus domain path: %m");
1136                 return -errno;
1137         }
1138
1139         return 0;
1140 }
1141
1142 static int drop_capabilities(void) {
1143         return capability_bounding_set_drop(~arg_retain, false);
1144 }
1145
1146 static int register_machine(pid_t pid) {
1147         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1148         _cleanup_bus_unref_ sd_bus *bus = NULL;
1149         int r;
1150
1151         if (!arg_register)
1152                 return 0;
1153
1154         r = sd_bus_default_system(&bus);
1155         if (r < 0) {
1156                 log_error("Failed to open system bus: %s", strerror(-r));
1157                 return r;
1158         }
1159
1160         if (arg_keep_unit) {
1161                 r = sd_bus_call_method(
1162                                 bus,
1163                                 "org.freedesktop.machine1",
1164                                 "/org/freedesktop/machine1",
1165                                 "org.freedesktop.machine1.Manager",
1166                                 "RegisterMachine",
1167                                 &error,
1168                                 NULL,
1169                                 "sayssus",
1170                                 arg_machine,
1171                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1172                                 "nspawn",
1173                                 "container",
1174                                 (uint32_t) pid,
1175                                 strempty(arg_directory));
1176         } else {
1177                 r = sd_bus_call_method(
1178                                 bus,
1179                                 "org.freedesktop.machine1",
1180                                 "/org/freedesktop/machine1",
1181                                 "org.freedesktop.machine1.Manager",
1182                                 "CreateMachine",
1183                                 &error,
1184                                 NULL,
1185                                 "sayssusa(sv)",
1186                                 arg_machine,
1187                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1188                                 "nspawn",
1189                                 "container",
1190                                 (uint32_t) pid,
1191                                 strempty(arg_directory),
1192                                 !isempty(arg_slice), "Slice", "s", arg_slice);
1193         }
1194
1195         if (r < 0) {
1196                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1197                 return r;
1198         }
1199
1200         return 0;
1201 }
1202
1203 static int terminate_machine(pid_t pid) {
1204         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1205         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1206         _cleanup_bus_unref_ sd_bus *bus = NULL;
1207         const char *path;
1208         int r;
1209
1210         if (!arg_register)
1211                 return 0;
1212
1213         r = sd_bus_default_system(&bus);
1214         if (r < 0) {
1215                 log_error("Failed to open system bus: %s", strerror(-r));
1216                 return r;
1217         }
1218
1219         r = sd_bus_call_method(
1220                         bus,
1221                         "org.freedesktop.machine1",
1222                         "/org/freedesktop/machine1",
1223                         "org.freedesktop.machine1.Manager",
1224                         "GetMachineByPID",
1225                         &error,
1226                         &reply,
1227                         "u",
1228                         (uint32_t) pid);
1229         if (r < 0) {
1230                 /* Note that the machine might already have been
1231                  * cleaned up automatically, hence don't consider it a
1232                  * failure if we cannot get the machine object. */
1233                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1234                 return 0;
1235         }
1236
1237         r = sd_bus_message_read(reply, "o", &path);
1238         if (r < 0)
1239                 return bus_log_parse_error(r);
1240
1241         r = sd_bus_call_method(
1242                         bus,
1243                         "org.freedesktop.machine1",
1244                         path,
1245                         "org.freedesktop.machine1.Machine",
1246                         "Terminate",
1247                         &error,
1248                         NULL,
1249                         NULL);
1250         if (r < 0) {
1251                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1252                 return 0;
1253         }
1254
1255         return 0;
1256 }
1257
1258 static int reset_audit_loginuid(void) {
1259         _cleanup_free_ char *p = NULL;
1260         int r;
1261
1262         if (arg_share_system)
1263                 return 0;
1264
1265         r = read_one_line_file("/proc/self/loginuid", &p);
1266         if (r == -EEXIST)
1267                 return 0;
1268         if (r < 0) {
1269                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1270                 return r;
1271         }
1272
1273         /* Already reset? */
1274         if (streq(p, "4294967295"))
1275                 return 0;
1276
1277         r = write_string_file("/proc/self/loginuid", "4294967295");
1278         if (r < 0) {
1279                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1280                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1281                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1282                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1283                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1284
1285                 sleep(5);
1286         }
1287
1288         return 0;
1289 }
1290
1291 static int setup_veth(pid_t pid, char iface_name[]) {
1292         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1293         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1294         int r;
1295
1296         if (!arg_private_network)
1297                 return 0;
1298
1299         if (!arg_network_veth)
1300                 return 0;
1301
1302         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1303
1304         r = sd_rtnl_open(0, &rtnl);
1305         if (r < 0) {
1306                 log_error("Failed to connect to netlink: %s", strerror(-r));
1307                 return r;
1308         }
1309
1310         r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, 0, &m);
1311         if (r < 0) {
1312                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1313                 return r;
1314         }
1315
1316         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1317         if (r < 0) {
1318                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1319                 return r;
1320         }
1321
1322         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1323         if (r < 0) {
1324                 log_error("Failed to open netlink container: %s", strerror(-r));
1325                 return r;
1326         }
1327
1328         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1329         if (r < 0) {
1330                 log_error("Failed to append netlink kind: %s", strerror(-r));
1331                 return r;
1332         }
1333
1334         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1335         if (r < 0) {
1336                 log_error("Failed to open netlink container: %s", strerror(-r));
1337                 return r;
1338         }
1339
1340         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1341         if (r < 0) {
1342                 log_error("Failed to open netlink container: %s", strerror(-r));
1343                 return r;
1344         }
1345
1346         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1347         if (r < 0) {
1348                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1349                 return r;
1350         }
1351
1352         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1353         if (r < 0) {
1354                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1355                 return r;
1356         }
1357
1358         r = sd_rtnl_message_close_container(m);
1359         if (r < 0) {
1360                 log_error("Failed to close netlink container: %s", strerror(-r));
1361                 return r;
1362         }
1363
1364         r = sd_rtnl_message_close_container(m);
1365         if (r < 0) {
1366                 log_error("Failed to close netlink container: %s", strerror(-r));
1367                 return r;
1368         }
1369
1370         r = sd_rtnl_message_close_container(m);
1371         if (r < 0) {
1372                 log_error("Failed to close netlink container: %s", strerror(-r));
1373                 return r;
1374         }
1375
1376         r = sd_rtnl_call(rtnl, m, 0, NULL);
1377         if (r < 0) {
1378                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1379                 return r;
1380         }
1381
1382         return 0;
1383 }
1384
1385 static int setup_bridge(const char veth_name[]) {
1386         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1387         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1388         int r, bridge;
1389
1390         if (!arg_private_network)
1391                 return 0;
1392
1393         if (!arg_network_veth)
1394                 return 0;
1395
1396         if (!arg_network_bridge)
1397                 return 0;
1398
1399         bridge = (int) if_nametoindex(arg_network_bridge);
1400         if (bridge <= 0) {
1401                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1402                 return -errno;
1403         }
1404
1405         r = sd_rtnl_open(0, &rtnl);
1406         if (r < 0) {
1407                 log_error("Failed to connect to netlink: %s", strerror(-r));
1408                 return r;
1409         }
1410
1411         r = sd_rtnl_message_new_link(rtnl, RTM_SETLINK, 0, &m);
1412         if (r < 0) {
1413                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1414                 return r;
1415         }
1416
1417         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1418         if (r < 0) {
1419                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1420                 return r;
1421         }
1422
1423         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1424         if (r < 0) {
1425                 log_error("Failed to add netlink master field: %s", strerror(-r));
1426                 return r;
1427         }
1428
1429         r = sd_rtnl_call(rtnl, m, 0, NULL);
1430         if (r < 0) {
1431                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1432                 return r;
1433         }
1434
1435         return 0;
1436 }
1437
1438 static int move_network_interfaces(pid_t pid) {
1439         _cleanup_udev_unref_ struct udev *udev = NULL;
1440         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1441         char **i;
1442         int r;
1443
1444         if (!arg_private_network)
1445                 return 0;
1446
1447         if (strv_isempty(arg_network_interfaces))
1448                 return 0;
1449
1450         r = sd_rtnl_open(0, &rtnl);
1451         if (r < 0) {
1452                 log_error("Failed to connect to netlink: %s", strerror(-r));
1453                 return r;
1454         }
1455
1456         udev = udev_new();
1457         if (!udev) {
1458                 log_error("Failed to connect to udev.");
1459                 return -ENOMEM;
1460         }
1461
1462         STRV_FOREACH(i, arg_network_interfaces) {
1463                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1464                 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1465                 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1466                 int ifi;
1467
1468                 ifi = (int) if_nametoindex(*i);
1469                 if (ifi <= 0) {
1470                         log_error("Failed to resolve interface %s: %m", *i);
1471                         return -errno;
1472                 }
1473
1474                 sprintf(ifi_str, "n%i", ifi);
1475                 d = udev_device_new_from_device_id(udev, ifi_str);
1476                 if (!d) {
1477                         log_error("Failed to get udev device for interface %s: %m", *i);
1478                         return -errno;
1479                 }
1480
1481                 if (udev_device_get_is_initialized(d) <= 0) {
1482                         log_error("Network interface %s is not initialized yet.", *i);
1483                         return -EBUSY;
1484                 }
1485
1486                 r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, ifi, &m);
1487                 if (r < 0) {
1488                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1489                         return r;
1490                 }
1491
1492                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1493                 if (r < 0) {
1494                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1495                         return r;
1496                 }
1497
1498                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1499                 if (r < 0) {
1500                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1501                         return r;
1502                 }
1503         }
1504
1505         return 0;
1506 }
1507
1508 static int audit_still_doesnt_work_in_containers(void) {
1509
1510 #ifdef HAVE_SECCOMP
1511         scmp_filter_ctx seccomp;
1512         int r;
1513
1514         /*
1515            Audit is broken in containers, much of the userspace audit
1516            hookup will fail if running inside a container. We don't
1517            care and just turn off creation of audit sockets.
1518
1519            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1520            with EAFNOSUPPORT which audit userspace uses as indication
1521            that audit is disabled in the kernel.
1522          */
1523
1524         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1525         if (!seccomp)
1526                 return log_oom();
1527
1528         r = seccomp_add_secondary_archs(seccomp);
1529         if (r < 0 && r != -EEXIST) {
1530                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1531                 goto finish;
1532         }
1533
1534         r = seccomp_rule_add(
1535                         seccomp,
1536                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1537                         SCMP_SYS(socket),
1538                         2,
1539                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1540                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1541         if (r < 0) {
1542                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1543                 goto finish;
1544         }
1545
1546         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1547         if (r < 0) {
1548                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1549                 goto finish;
1550         }
1551
1552         r = seccomp_load(seccomp);
1553         if (r < 0)
1554                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1555
1556 finish:
1557         seccomp_release(seccomp);
1558         return r;
1559 #else
1560         return 0;
1561 #endif
1562
1563 }
1564
1565 int main(int argc, char *argv[]) {
1566
1567         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
1568         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1569         _cleanup_free_ char *kdbus_domain = NULL;
1570         _cleanup_fdset_free_ FDSet *fds = NULL;
1571         const char *console = NULL;
1572         int r = EXIT_FAILURE, k;
1573         int n_fd_passed;
1574         pid_t pid = 0;
1575         sigset_t mask;
1576         char veth_name[IFNAMSIZ] = "ve-";
1577
1578         log_parse_environment();
1579         log_open();
1580
1581         k = parse_argv(argc, argv);
1582         if (k < 0)
1583                 goto finish;
1584         else if (k == 0) {
1585                 r = EXIT_SUCCESS;
1586                 goto finish;
1587         }
1588
1589         if (arg_directory) {
1590                 char *p;
1591
1592                 p = path_make_absolute_cwd(arg_directory);
1593                 free(arg_directory);
1594                 arg_directory = p;
1595         } else
1596                 arg_directory = get_current_dir_name();
1597
1598         if (!arg_directory) {
1599                 log_error("Failed to determine path, please use -D.");
1600                 goto finish;
1601         }
1602
1603         path_kill_slashes(arg_directory);
1604
1605         if (!arg_machine) {
1606                 arg_machine = strdup(basename(arg_directory));
1607                 if (!arg_machine) {
1608                         log_oom();
1609                         goto finish;
1610                 }
1611
1612                 hostname_cleanup(arg_machine, false);
1613                 if (isempty(arg_machine)) {
1614                         log_error("Failed to determine machine name automatically, please use -M.");
1615                         goto finish;
1616                 }
1617         }
1618
1619         if (geteuid() != 0) {
1620                 log_error("Need to be root.");
1621                 goto finish;
1622         }
1623
1624         if (sd_booted() <= 0) {
1625                 log_error("Not running on a systemd system.");
1626                 goto finish;
1627         }
1628
1629         if (path_equal(arg_directory, "/")) {
1630                 log_error("Spawning container on root directory not supported.");
1631                 goto finish;
1632         }
1633
1634         if (arg_boot) {
1635                 if (path_is_os_tree(arg_directory) <= 0) {
1636                         log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1637                         goto finish;
1638                 }
1639         } else {
1640                 const char *p;
1641
1642                 p = strappenda(arg_directory,
1643                                argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1644                 if (access(p, F_OK) < 0) {
1645                         log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1646                         goto finish;
1647
1648                 }
1649         }
1650
1651         log_close();
1652         n_fd_passed = sd_listen_fds(false);
1653         if (n_fd_passed > 0) {
1654                 k = fdset_new_listen_fds(&fds, false);
1655                 if (k < 0) {
1656                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1657                         goto finish;
1658                 }
1659         }
1660         fdset_close_others(fds);
1661         log_open();
1662
1663         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1664         if (master < 0) {
1665                 log_error("Failed to acquire pseudo tty: %m");
1666                 goto finish;
1667         }
1668
1669         console = ptsname(master);
1670         if (!console) {
1671                 log_error("Failed to determine tty name: %m");
1672                 goto finish;
1673         }
1674
1675         if (!arg_quiet)
1676                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1677
1678         if (unlockpt(master) < 0) {
1679                 log_error("Failed to unlock tty: %m");
1680                 goto finish;
1681         }
1682
1683         if (access("/dev/kdbus/control", F_OK) >= 0) {
1684
1685                 if (arg_share_system) {
1686                         kdbus_domain = strdup("/dev/kdbus");
1687                         if (!kdbus_domain) {
1688                                 log_oom();
1689                                 goto finish;
1690                         }
1691                 } else {
1692                         const char *ns;
1693
1694                         ns = strappenda("machine-", arg_machine);
1695                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1696                         if (r < 0)
1697                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1698                         else
1699                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1700                 }
1701         }
1702
1703         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1704                 log_error("Failed to create kmsg socket pair: %m");
1705                 goto finish;
1706         }
1707
1708         sd_notify(0, "READY=1");
1709
1710         assert_se(sigemptyset(&mask) == 0);
1711         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1712         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1713
1714         for (;;) {
1715                 siginfo_t status;
1716
1717                 sync_fd = eventfd(0, EFD_CLOEXEC);
1718                 if (sync_fd < 0) {
1719                         log_error("Failed to create event fd: %m");
1720                         goto finish;
1721                 }
1722
1723                 pid = syscall(__NR_clone,
1724                               SIGCHLD|CLONE_NEWNS|
1725                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1726                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
1727                 if (pid < 0) {
1728                         if (errno == EINVAL)
1729                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1730                         else
1731                                 log_error("clone() failed: %m");
1732
1733                         goto finish;
1734                 }
1735
1736                 if (pid == 0) {
1737                         /* child */
1738                         const char *home = NULL;
1739                         uid_t uid = (uid_t) -1;
1740                         gid_t gid = (gid_t) -1;
1741                         unsigned n_env = 2;
1742                         const char *envp[] = {
1743                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
1744                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1745                                 NULL, /* TERM */
1746                                 NULL, /* HOME */
1747                                 NULL, /* USER */
1748                                 NULL, /* LOGNAME */
1749                                 NULL, /* container_uuid */
1750                                 NULL, /* LISTEN_FDS */
1751                                 NULL, /* LISTEN_PID */
1752                                 NULL
1753                         };
1754                         char **env_use;
1755                         eventfd_t x;
1756
1757                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1758                         if (envp[n_env])
1759                                 n_env ++;
1760
1761                         close_nointr_nofail(master);
1762                         master = -1;
1763
1764                         close_nointr(STDIN_FILENO);
1765                         close_nointr(STDOUT_FILENO);
1766                         close_nointr(STDERR_FILENO);
1767
1768                         close_nointr_nofail(kmsg_socket_pair[0]);
1769                         kmsg_socket_pair[0] = -1;
1770
1771                         reset_all_signal_handlers();
1772
1773                         assert_se(sigemptyset(&mask) == 0);
1774                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1775
1776                         k = open_terminal(console, O_RDWR);
1777                         if (k != STDIN_FILENO) {
1778                                 if (k >= 0) {
1779                                         close_nointr_nofail(k);
1780                                         k = -EINVAL;
1781                                 }
1782
1783                                 log_error("Failed to open console: %s", strerror(-k));
1784                                 goto child_fail;
1785                         }
1786
1787                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1788                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1789                                 log_error("Failed to duplicate console: %m");
1790                                 goto child_fail;
1791                         }
1792
1793                         if (setsid() < 0) {
1794                                 log_error("setsid() failed: %m");
1795                                 goto child_fail;
1796                         }
1797
1798                         if (reset_audit_loginuid() < 0)
1799                                 goto child_fail;
1800
1801                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1802                                 log_error("PR_SET_PDEATHSIG failed: %m");
1803                                 goto child_fail;
1804                         }
1805
1806                         /* Mark everything as slave, so that we still
1807                          * receive mounts from the real root, but don't
1808                          * propagate mounts to the real root. */
1809                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1810                                 log_error("MS_SLAVE|MS_REC failed: %m");
1811                                 goto child_fail;
1812                         }
1813
1814                         /* Turn directory into bind mount */
1815                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1816                                 log_error("Failed to make bind mount.");
1817                                 goto child_fail;
1818                         }
1819
1820                         if (arg_read_only)
1821                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1822                                         log_error("Failed to make read-only.");
1823                                         goto child_fail;
1824                                 }
1825
1826                         if (mount_all(arg_directory) < 0)
1827                                 goto child_fail;
1828
1829                         if (copy_devnodes(arg_directory) < 0)
1830                                 goto child_fail;
1831
1832                         if (setup_ptmx(arg_directory) < 0)
1833                                 goto child_fail;
1834
1835                         dev_setup(arg_directory);
1836
1837                         if (audit_still_doesnt_work_in_containers() < 0)
1838                                 goto child_fail;
1839
1840                         if (setup_dev_console(arg_directory, console) < 0)
1841                                 goto child_fail;
1842
1843                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1844                                 goto child_fail;
1845
1846                         close_nointr_nofail(kmsg_socket_pair[1]);
1847                         kmsg_socket_pair[1] = -1;
1848
1849                         if (setup_boot_id(arg_directory) < 0)
1850                                 goto child_fail;
1851
1852                         if (setup_timezone(arg_directory) < 0)
1853                                 goto child_fail;
1854
1855                         if (setup_resolv_conf(arg_directory) < 0)
1856                                 goto child_fail;
1857
1858                         if (setup_journal(arg_directory) < 0)
1859                                 goto child_fail;
1860
1861                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1862                                 goto child_fail;
1863
1864                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1865                                 goto child_fail;
1866
1867                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
1868                                 goto child_fail;
1869
1870                         if (chdir(arg_directory) < 0) {
1871                                 log_error("chdir(%s) failed: %m", arg_directory);
1872                                 goto child_fail;
1873                         }
1874
1875                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1876                                 log_error("mount(MS_MOVE) failed: %m");
1877                                 goto child_fail;
1878                         }
1879
1880                         if (chroot(".") < 0) {
1881                                 log_error("chroot() failed: %m");
1882                                 goto child_fail;
1883                         }
1884
1885                         if (chdir("/") < 0) {
1886                                 log_error("chdir() failed: %m");
1887                                 goto child_fail;
1888                         }
1889
1890                         umask(0022);
1891
1892                         if (arg_private_network)
1893                                 loopback_setup();
1894
1895                         if (drop_capabilities() < 0) {
1896                                 log_error("drop_capabilities() failed: %m");
1897                                 goto child_fail;
1898                         }
1899
1900                         if (arg_user) {
1901
1902                                 /* Note that this resolves user names
1903                                  * inside the container, and hence
1904                                  * accesses the NSS modules from the
1905                                  * container and not the host. This is
1906                                  * a bit weird... */
1907
1908                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1909                                         log_error("get_user_creds() failed: %m");
1910                                         goto child_fail;
1911                                 }
1912
1913                                 if (mkdir_parents_label(home, 0775) < 0) {
1914                                         log_error("mkdir_parents_label() failed: %m");
1915                                         goto child_fail;
1916                                 }
1917
1918                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1919                                         log_error("mkdir_safe_label() failed: %m");
1920                                         goto child_fail;
1921                                 }
1922
1923                                 if (initgroups((const char*)arg_user, gid) < 0) {
1924                                         log_error("initgroups() failed: %m");
1925                                         goto child_fail;
1926                                 }
1927
1928                                 if (setresgid(gid, gid, gid) < 0) {
1929                                         log_error("setregid() failed: %m");
1930                                         goto child_fail;
1931                                 }
1932
1933                                 if (setresuid(uid, uid, uid) < 0) {
1934                                         log_error("setreuid() failed: %m");
1935                                         goto child_fail;
1936                                 }
1937                         } else {
1938                                 /* Reset everything fully to 0, just in case */
1939
1940                                 if (setgroups(0, NULL) < 0) {
1941                                         log_error("setgroups() failed: %m");
1942                                         goto child_fail;
1943                                 }
1944
1945                                 if (setresgid(0, 0, 0) < 0) {
1946                                         log_error("setregid() failed: %m");
1947                                         goto child_fail;
1948                                 }
1949
1950                                 if (setresuid(0, 0, 0) < 0) {
1951                                         log_error("setreuid() failed: %m");
1952                                         goto child_fail;
1953                                 }
1954                         }
1955
1956                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1957                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1958                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1959                                 log_oom();
1960                                 goto child_fail;
1961                         }
1962
1963                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1964                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1965                                         log_oom();
1966                                         goto child_fail;
1967                                 }
1968                         }
1969
1970                         if (fdset_size(fds) > 0) {
1971                                 k = fdset_cloexec(fds, false);
1972                                 if (k < 0) {
1973                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1974                                         goto child_fail;
1975                                 }
1976
1977                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1978                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1979                                         log_oom();
1980                                         goto child_fail;
1981                                 }
1982                         }
1983
1984                         setup_hostname();
1985
1986                         eventfd_read(sync_fd, &x);
1987                         close_nointr_nofail(sync_fd);
1988                         sync_fd = -1;
1989
1990                         if (!strv_isempty(arg_setenv)) {
1991                                 char **n;
1992
1993                                 n = strv_env_merge(2, envp, arg_setenv);
1994                                 if (!n) {
1995                                         log_oom();
1996                                         goto child_fail;
1997                                 }
1998
1999                                 env_use = n;
2000                         } else
2001                                 env_use = (char**) envp;
2002
2003 #ifdef HAVE_SELINUX
2004                         if (arg_selinux_context)
2005                                 if (setexeccon(arg_selinux_context) < 0)
2006                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2007 #endif
2008                         if (arg_boot) {
2009                                 char **a;
2010                                 size_t l;
2011
2012                                 /* Automatically search for the init system */
2013
2014                                 l = 1 + argc - optind;
2015                                 a = newa(char*, l + 1);
2016                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2017
2018                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2019                                 execve(a[0], a, env_use);
2020
2021                                 a[0] = (char*) "/lib/systemd/systemd";
2022                                 execve(a[0], a, env_use);
2023
2024                                 a[0] = (char*) "/sbin/init";
2025                                 execve(a[0], a, env_use);
2026                         } else if (argc > optind)
2027                                 execvpe(argv[optind], argv + optind, env_use);
2028                         else {
2029                                 chdir(home ? home : "/root");
2030                                 execle("/bin/bash", "-bash", NULL, env_use);
2031                                 execle("/bin/sh", "-sh", NULL, env_use);
2032                         }
2033
2034                         log_error("execv() failed: %m");
2035
2036                 child_fail:
2037                         _exit(EXIT_FAILURE);
2038                 }
2039
2040                 fdset_free(fds);
2041                 fds = NULL;
2042
2043                 r = register_machine(pid);
2044                 if (r < 0)
2045                         goto finish;
2046
2047                 r = move_network_interfaces(pid);
2048                 if (r < 0)
2049                         goto finish;
2050
2051                 r = setup_veth(pid, veth_name);
2052                 if (r < 0)
2053                         goto finish;
2054
2055                 r = setup_bridge(veth_name);
2056                 if (r < 0)
2057                         goto finish;
2058
2059                 eventfd_write(sync_fd, 1);
2060                 close_nointr_nofail(sync_fd);
2061                 sync_fd = -1;
2062
2063                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2064                 if (k < 0) {
2065                         r = EXIT_FAILURE;
2066                         break;
2067                 }
2068
2069                 if (!arg_quiet)
2070                         putc('\n', stdout);
2071
2072                 /* Kill if it is not dead yet anyway */
2073                 terminate_machine(pid);
2074
2075                 /* Redundant, but better safe than sorry */
2076                 kill(pid, SIGKILL);
2077
2078                 k = wait_for_terminate(pid, &status);
2079                 pid = 0;
2080
2081                 if (k < 0) {
2082                         r = EXIT_FAILURE;
2083                         break;
2084                 }
2085
2086                 if (status.si_code == CLD_EXITED) {
2087                         r = status.si_status;
2088                         if (status.si_status != 0) {
2089                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2090                                 break;
2091                         }
2092
2093                         if (!arg_quiet)
2094                                 log_debug("Container %s exited successfully.", arg_machine);
2095                         break;
2096                 } else if (status.si_code == CLD_KILLED &&
2097                            status.si_status == SIGINT) {
2098
2099                         if (!arg_quiet)
2100                                 log_info("Container %s has been shut down.", arg_machine);
2101                         r = 0;
2102                         break;
2103                 } else if (status.si_code == CLD_KILLED &&
2104                            status.si_status == SIGHUP) {
2105
2106                         if (!arg_quiet)
2107                                 log_info("Container %s is being rebooted.", arg_machine);
2108                         continue;
2109                 } else if (status.si_code == CLD_KILLED ||
2110                            status.si_code == CLD_DUMPED) {
2111
2112                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2113                         r = EXIT_FAILURE;
2114                         break;
2115                 } else {
2116                         log_error("Container %s failed due to unknown reason.", arg_machine);
2117                         r = EXIT_FAILURE;
2118                         break;
2119                 }
2120         }
2121
2122 finish:
2123         if (pid > 0)
2124                 kill(pid, SIGKILL);
2125
2126         free(arg_directory);
2127         free(arg_machine);
2128         free(arg_setenv);
2129         free(arg_network_interfaces);
2130
2131         return r;
2132 }