chiark / gitweb /
systemctl: fix compiler warning in list_timers()
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
47
48 #ifdef HAVE_SELINUX
49 #include <selinux/selinux.h>
50 #endif
51
52 #ifdef HAVE_SECCOMP
53 #include <seccomp.h>
54 #endif
55
56 #include "sd-daemon.h"
57 #include "sd-bus.h"
58 #include "sd-id128.h"
59 #include "sd-rtnl.h"
60 #include "log.h"
61 #include "util.h"
62 #include "mkdir.h"
63 #include "macro.h"
64 #include "audit.h"
65 #include "missing.h"
66 #include "cgroup-util.h"
67 #include "strv.h"
68 #include "path-util.h"
69 #include "loopback-setup.h"
70 #include "dev-setup.h"
71 #include "fdset.h"
72 #include "build.h"
73 #include "fileio.h"
74 #include "bus-util.h"
75 #include "bus-error.h"
76 #include "ptyfwd.h"
77 #include "bus-kernel.h"
78 #include "env-util.h"
79 #include "def.h"
80 #include "rtnl-util.h"
81 #include "udev-util.h"
82
83 #ifdef HAVE_SECCOMP
84 #include "seccomp-util.h"
85 #endif
86
87 typedef enum LinkJournal {
88         LINK_NO,
89         LINK_AUTO,
90         LINK_HOST,
91         LINK_GUEST
92 } LinkJournal;
93
94 static char *arg_directory = NULL;
95 static char *arg_user = NULL;
96 static sd_id128_t arg_uuid = {};
97 static char *arg_machine = NULL;
98 static char *arg_selinux_context = NULL;
99 static char *arg_selinux_apifs_context = NULL;
100 static const char *arg_slice = NULL;
101 static bool arg_private_network = false;
102 static bool arg_read_only = false;
103 static bool arg_boot = false;
104 static LinkJournal arg_link_journal = LINK_AUTO;
105 static uint64_t arg_retain =
106         (1ULL << CAP_CHOWN) |
107         (1ULL << CAP_DAC_OVERRIDE) |
108         (1ULL << CAP_DAC_READ_SEARCH) |
109         (1ULL << CAP_FOWNER) |
110         (1ULL << CAP_FSETID) |
111         (1ULL << CAP_IPC_OWNER) |
112         (1ULL << CAP_KILL) |
113         (1ULL << CAP_LEASE) |
114         (1ULL << CAP_LINUX_IMMUTABLE) |
115         (1ULL << CAP_NET_BIND_SERVICE) |
116         (1ULL << CAP_NET_BROADCAST) |
117         (1ULL << CAP_NET_RAW) |
118         (1ULL << CAP_SETGID) |
119         (1ULL << CAP_SETFCAP) |
120         (1ULL << CAP_SETPCAP) |
121         (1ULL << CAP_SETUID) |
122         (1ULL << CAP_SYS_ADMIN) |
123         (1ULL << CAP_SYS_CHROOT) |
124         (1ULL << CAP_SYS_NICE) |
125         (1ULL << CAP_SYS_PTRACE) |
126         (1ULL << CAP_SYS_TTY_CONFIG) |
127         (1ULL << CAP_SYS_RESOURCE) |
128         (1ULL << CAP_SYS_BOOT) |
129         (1ULL << CAP_AUDIT_WRITE) |
130         (1ULL << CAP_AUDIT_CONTROL) |
131         (1ULL << CAP_MKNOD);
132 static char **arg_bind = NULL;
133 static char **arg_bind_ro = NULL;
134 static char **arg_setenv = NULL;
135 static bool arg_quiet = false;
136 static bool arg_share_system = false;
137 static bool arg_register = true;
138 static bool arg_keep_unit = false;
139 static char **arg_network_interfaces = NULL;
140 static bool arg_network_veth = false;
141 static char *arg_network_bridge = NULL;
142 static unsigned long arg_personality = 0xffffffffLU;
143
144 static int help(void) {
145
146         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
147                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
148                "  -h --help                 Show this help\n"
149                "     --version              Print version string\n"
150                "  -q --quiet                Do not show status information\n"
151                "  -D --directory=NAME       Root directory for the container\n"
152                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
153                "  -u --user=USER            Run the command under specified user or uid\n"
154                "  -M --machine=NAME         Set the machine name for the container\n"
155                "     --uuid=UUID            Set a specific machine UUID for the container\n"
156                "  -S --slice=SLICE          Place the container in the specified slice\n"
157                "     --private-network      Disable network in container\n"
158                "     --network-interface=INTERFACE\n"
159                "                            Assign an existing network interface to the\n"
160                "                            container\n"
161                "     --network-veth         Add a virtual ethernet connection between host\n"
162                "                            and container\n"
163                "     --network-bridge=INTERFACE\n"
164                "                            Add a virtual ethernet connection between host\n"
165                "                            and container and add it to an existing bridge on\n"
166                "                            the host\n"
167                "  -Z --selinux-context=SECLABEL\n"
168                "                            Set the SELinux security context to be used by\n"
169                "                            processes in the container\n"
170                "  -L --selinux-apifs-context=SECLABEL\n"
171                "                            Set the SELinux security context to be used by\n"
172                "                            API/tmpfs file systems in the container\n"
173                "     --capability=CAP       In addition to the default, retain specified\n"
174                "                            capability\n"
175                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
176                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
177                "  -j                        Equivalent to --link-journal=host\n"
178                "     --read-only            Mount the root directory read-only\n"
179                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
180                "                            the container\n"
181                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
182                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
183                "     --share-system         Share system namespaces with host\n"
184                "     --register=BOOLEAN     Register container as machine\n"
185                "     --keep-unit            Do not register a scope for the machine, reuse\n"
186                "                            the service unit nspawn is running in\n",
187                program_invocation_short_name);
188
189         return 0;
190 }
191
192 static int parse_argv(int argc, char *argv[]) {
193
194         enum {
195                 ARG_VERSION = 0x100,
196                 ARG_PRIVATE_NETWORK,
197                 ARG_UUID,
198                 ARG_READ_ONLY,
199                 ARG_CAPABILITY,
200                 ARG_DROP_CAPABILITY,
201                 ARG_LINK_JOURNAL,
202                 ARG_BIND,
203                 ARG_BIND_RO,
204                 ARG_SETENV,
205                 ARG_SHARE_SYSTEM,
206                 ARG_REGISTER,
207                 ARG_KEEP_UNIT,
208                 ARG_NETWORK_INTERFACE,
209                 ARG_NETWORK_VETH,
210                 ARG_NETWORK_BRIDGE,
211                 ARG_PERSONALITY,
212         };
213
214         static const struct option options[] = {
215                 { "help",                  no_argument,       NULL, 'h'                   },
216                 { "version",               no_argument,       NULL, ARG_VERSION           },
217                 { "directory",             required_argument, NULL, 'D'                   },
218                 { "user",                  required_argument, NULL, 'u'                   },
219                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
220                 { "boot",                  no_argument,       NULL, 'b'                   },
221                 { "uuid",                  required_argument, NULL, ARG_UUID              },
222                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
223                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
224                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
225                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
226                 { "bind",                  required_argument, NULL, ARG_BIND              },
227                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
228                 { "machine",               required_argument, NULL, 'M'                   },
229                 { "slice",                 required_argument, NULL, 'S'                   },
230                 { "setenv",                required_argument, NULL, ARG_SETENV            },
231                 { "selinux-context",       required_argument, NULL, 'Z'                   },
232                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
233                 { "quiet",                 no_argument,       NULL, 'q'                   },
234                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
235                 { "register",              required_argument, NULL, ARG_REGISTER          },
236                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
237                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
238                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
239                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
240                 { "personality",           required_argument, NULL, ARG_PERSONALITY       },
241                 {}
242         };
243
244         int c, r;
245         uint64_t plus = 0, minus = 0;
246
247         assert(argc >= 0);
248         assert(argv);
249
250         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
251
252                 switch (c) {
253
254                 case 'h':
255                         return help();
256
257                 case ARG_VERSION:
258                         puts(PACKAGE_STRING);
259                         puts(SYSTEMD_FEATURES);
260                         return 0;
261
262                 case 'D':
263                         free(arg_directory);
264                         arg_directory = canonicalize_file_name(optarg);
265                         if (!arg_directory) {
266                                 log_error("Invalid root directory: %m");
267                                 return -ENOMEM;
268                         }
269
270                         break;
271
272                 case 'u':
273                         free(arg_user);
274                         arg_user = strdup(optarg);
275                         if (!arg_user)
276                                 return log_oom();
277
278                         break;
279
280                 case ARG_NETWORK_BRIDGE:
281                         arg_network_bridge = strdup(optarg);
282                         if (!arg_network_bridge)
283                                 return log_oom();
284
285                         /* fall through */
286
287                 case ARG_NETWORK_VETH:
288                         arg_network_veth = true;
289                         arg_private_network = true;
290                         break;
291
292                 case ARG_NETWORK_INTERFACE:
293                         if (strv_push(&arg_network_interfaces, optarg) < 0)
294                                 return log_oom();
295
296                         /* fall through */
297
298                 case ARG_PRIVATE_NETWORK:
299                         arg_private_network = true;
300                         break;
301
302                 case 'b':
303                         arg_boot = true;
304                         break;
305
306                 case ARG_UUID:
307                         r = sd_id128_from_string(optarg, &arg_uuid);
308                         if (r < 0) {
309                                 log_error("Invalid UUID: %s", optarg);
310                                 return r;
311                         }
312                         break;
313
314                 case 'S':
315                         arg_slice = strdup(optarg);
316                         if (!arg_slice)
317                                 return log_oom();
318
319                         break;
320
321                 case 'M':
322                         if (isempty(optarg)) {
323                                 free(arg_machine);
324                                 arg_machine = NULL;
325                         } else {
326
327                                 if (!hostname_is_valid(optarg)) {
328                                         log_error("Invalid machine name: %s", optarg);
329                                         return -EINVAL;
330                                 }
331
332                                 free(arg_machine);
333                                 arg_machine = strdup(optarg);
334                                 if (!arg_machine)
335                                         return log_oom();
336
337                                 break;
338                         }
339
340                 case 'Z':
341                         arg_selinux_context = optarg;
342                         break;
343
344                 case 'L':
345                         arg_selinux_apifs_context = optarg;
346                         break;
347
348                 case ARG_READ_ONLY:
349                         arg_read_only = true;
350                         break;
351
352                 case ARG_CAPABILITY:
353                 case ARG_DROP_CAPABILITY: {
354                         char *state, *word;
355                         size_t length;
356
357                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
358                                 _cleanup_free_ char *t;
359                                 cap_value_t cap;
360
361                                 t = strndup(word, length);
362                                 if (!t)
363                                         return log_oom();
364
365                                 if (streq(t, "all")) {
366                                         if (c == ARG_CAPABILITY)
367                                                 plus = (uint64_t) -1;
368                                         else
369                                                 minus = (uint64_t) -1;
370                                 } else {
371                                         if (cap_from_name(t, &cap) < 0) {
372                                                 log_error("Failed to parse capability %s.", t);
373                                                 return -EINVAL;
374                                         }
375
376                                         if (c == ARG_CAPABILITY)
377                                                 plus |= 1ULL << (uint64_t) cap;
378                                         else
379                                                 minus |= 1ULL << (uint64_t) cap;
380                                 }
381                         }
382
383                         break;
384                 }
385
386                 case 'j':
387                         arg_link_journal = LINK_GUEST;
388                         break;
389
390                 case ARG_LINK_JOURNAL:
391                         if (streq(optarg, "auto"))
392                                 arg_link_journal = LINK_AUTO;
393                         else if (streq(optarg, "no"))
394                                 arg_link_journal = LINK_NO;
395                         else if (streq(optarg, "guest"))
396                                 arg_link_journal = LINK_GUEST;
397                         else if (streq(optarg, "host"))
398                                 arg_link_journal = LINK_HOST;
399                         else {
400                                 log_error("Failed to parse link journal mode %s", optarg);
401                                 return -EINVAL;
402                         }
403
404                         break;
405
406                 case ARG_BIND:
407                 case ARG_BIND_RO: {
408                         _cleanup_free_ char *a = NULL, *b = NULL;
409                         char *e;
410                         char ***x;
411
412                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
413
414                         e = strchr(optarg, ':');
415                         if (e) {
416                                 a = strndup(optarg, e - optarg);
417                                 b = strdup(e + 1);
418                         } else {
419                                 a = strdup(optarg);
420                                 b = strdup(optarg);
421                         }
422
423                         if (!a || !b)
424                                 return log_oom();
425
426                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
427                                 log_error("Invalid bind mount specification: %s", optarg);
428                                 return -EINVAL;
429                         }
430
431                         r = strv_extend(x, a);
432                         if (r < 0)
433                                 return log_oom();
434
435                         r = strv_extend(x, b);
436                         if (r < 0)
437                                 return log_oom();
438
439                         break;
440                 }
441
442                 case ARG_SETENV: {
443                         char **n;
444
445                         if (!env_assignment_is_valid(optarg)) {
446                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
447                                 return -EINVAL;
448                         }
449
450                         n = strv_env_set(arg_setenv, optarg);
451                         if (!n)
452                                 return log_oom();
453
454                         strv_free(arg_setenv);
455                         arg_setenv = n;
456                         break;
457                 }
458
459                 case 'q':
460                         arg_quiet = true;
461                         break;
462
463                 case ARG_SHARE_SYSTEM:
464                         arg_share_system = true;
465                         break;
466
467                 case ARG_REGISTER:
468                         r = parse_boolean(optarg);
469                         if (r < 0) {
470                                 log_error("Failed to parse --register= argument: %s", optarg);
471                                 return r;
472                         }
473
474                         arg_register = r;
475                         break;
476
477                 case ARG_KEEP_UNIT:
478                         arg_keep_unit = true;
479                         break;
480
481                 case ARG_PERSONALITY:
482
483                         arg_personality = personality_from_string(optarg);
484                         if (arg_personality == 0xffffffffLU) {
485                                 log_error("Unknown or unsupported personality '%s'.", optarg);
486                                 return -EINVAL;
487                         }
488
489                         break;
490
491                 case '?':
492                         return -EINVAL;
493
494                 default:
495                         assert_not_reached("Unhandled option");
496                 }
497         }
498
499         if (arg_share_system)
500                 arg_register = false;
501
502         if (arg_boot && arg_share_system) {
503                 log_error("--boot and --share-system may not be combined.");
504                 return -EINVAL;
505         }
506
507         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
508                 log_error("--keep-unit may not be used when invoked from a user session.");
509                 return -EINVAL;
510         }
511
512         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
513
514         return 1;
515 }
516
517 static int mount_all(const char *dest) {
518
519         typedef struct MountPoint {
520                 const char *what;
521                 const char *where;
522                 const char *type;
523                 const char *options;
524                 unsigned long flags;
525                 bool fatal;
526         } MountPoint;
527
528         static const MountPoint mount_table[] = {
529                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
530                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
531                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
532                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
533                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
534                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
535                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
536                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
537 #ifdef HAVE_SELINUX
538                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
539                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
540 #endif
541         };
542
543         unsigned k;
544         int r = 0;
545
546         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
547                 _cleanup_free_ char *where = NULL;
548 #ifdef HAVE_SELINUX
549                 _cleanup_free_ char *options = NULL;
550 #endif
551                 const char *o;
552                 int t;
553
554                 where = strjoin(dest, "/", mount_table[k].where, NULL);
555                 if (!where)
556                         return log_oom();
557
558                 t = path_is_mount_point(where, true);
559                 if (t < 0) {
560                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
561
562                         if (r == 0)
563                                 r = t;
564
565                         continue;
566                 }
567
568                 /* Skip this entry if it is not a remount. */
569                 if (mount_table[k].what && t > 0)
570                         continue;
571
572                 mkdir_p(where, 0755);
573
574 #ifdef HAVE_SELINUX
575                 if (arg_selinux_apifs_context &&
576                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
577                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
578                         if (!options)
579                                 return log_oom();
580
581                         o = options;
582                 } else
583 #endif
584                         o = mount_table[k].options;
585
586
587                 if (mount(mount_table[k].what,
588                           where,
589                           mount_table[k].type,
590                           mount_table[k].flags,
591                           o) < 0 &&
592                     mount_table[k].fatal) {
593
594                         log_error("mount(%s) failed: %m", where);
595
596                         if (r == 0)
597                                 r = -errno;
598                 }
599         }
600
601         return r;
602 }
603
604 static int mount_binds(const char *dest, char **l, unsigned long flags) {
605         char **x, **y;
606
607         STRV_FOREACH_PAIR(x, y, l) {
608                 char *where;
609                 struct stat source_st, dest_st;
610                 int r;
611
612                 if (stat(*x, &source_st) < 0) {
613                         log_error("failed to stat %s: %m", *x);
614                         return -errno;
615                 }
616
617                 where = strappenda(dest, *y);
618                 r = stat(where, &dest_st);
619                 if (r == 0) {
620                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
621                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
622                                                 *x, where);
623                                 return -EINVAL;
624                         }
625                 } else if (errno == ENOENT) {
626                         r = mkdir_parents_label(where, 0755);
627                         if (r < 0) {
628                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
629                                 return r;
630                         }
631                 } else {
632                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
633                         return -errno;
634                 }
635                 /* Create the mount point, but be conservative -- refuse to create block
636                 * and char devices. */
637                 if (S_ISDIR(source_st.st_mode))
638                         mkdir_label(where, 0755);
639                 else if (S_ISFIFO(source_st.st_mode))
640                         mkfifo(where, 0644);
641                 else if (S_ISSOCK(source_st.st_mode))
642                         mknod(where, 0644 | S_IFSOCK, 0);
643                 else if (S_ISREG(source_st.st_mode))
644                         touch(where);
645                 else {
646                         log_error("Refusing to create mountpoint for file: %s", *x);
647                         return -ENOTSUP;
648                 }
649
650                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
651                         log_error("mount(%s) failed: %m", where);
652                         return -errno;
653                 }
654
655                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
656                         log_error("mount(%s) failed: %m", where);
657                         return -errno;
658                 }
659         }
660
661         return 0;
662 }
663
664 static int setup_timezone(const char *dest) {
665         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
666         char *z, *y;
667         int r;
668
669         assert(dest);
670
671         /* Fix the timezone, if possible */
672         r = readlink_malloc("/etc/localtime", &p);
673         if (r < 0) {
674                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
675                 return 0;
676         }
677
678         z = path_startswith(p, "../usr/share/zoneinfo/");
679         if (!z)
680                 z = path_startswith(p, "/usr/share/zoneinfo/");
681         if (!z) {
682                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
683                 return 0;
684         }
685
686         where = strappend(dest, "/etc/localtime");
687         if (!where)
688                 return log_oom();
689
690         r = readlink_malloc(where, &q);
691         if (r >= 0) {
692                 y = path_startswith(q, "../usr/share/zoneinfo/");
693                 if (!y)
694                         y = path_startswith(q, "/usr/share/zoneinfo/");
695
696
697                 /* Already pointing to the right place? Then do nothing .. */
698                 if (y && streq(y, z))
699                         return 0;
700         }
701
702         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
703         if (!check)
704                 return log_oom();
705
706         if (access(check, F_OK) < 0) {
707                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
708                 return 0;
709         }
710
711         what = strappend("../usr/share/zoneinfo/", z);
712         if (!what)
713                 return log_oom();
714
715         unlink(where);
716         if (symlink(what, where) < 0) {
717                 log_error("Failed to correct timezone of container: %m");
718                 return 0;
719         }
720
721         return 0;
722 }
723
724 static int setup_resolv_conf(const char *dest) {
725         char _cleanup_free_ *where = NULL;
726
727         assert(dest);
728
729         if (arg_private_network)
730                 return 0;
731
732         /* Fix resolv.conf, if possible */
733         where = strappend(dest, "/etc/resolv.conf");
734         if (!where)
735                 return log_oom();
736
737         /* We don't really care for the results of this really. If it
738          * fails, it fails, but meh... */
739         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
740
741         return 0;
742 }
743
744 static int setup_boot_id(const char *dest) {
745         _cleanup_free_ char *from = NULL, *to = NULL;
746         sd_id128_t rnd = {};
747         char as_uuid[37];
748         int r;
749
750         assert(dest);
751
752         if (arg_share_system)
753                 return 0;
754
755         /* Generate a new randomized boot ID, so that each boot-up of
756          * the container gets a new one */
757
758         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
759         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
760         if (!from || !to)
761                 return log_oom();
762
763         r = sd_id128_randomize(&rnd);
764         if (r < 0) {
765                 log_error("Failed to generate random boot id: %s", strerror(-r));
766                 return r;
767         }
768
769         snprintf(as_uuid, sizeof(as_uuid),
770                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
771                  SD_ID128_FORMAT_VAL(rnd));
772         char_array_0(as_uuid);
773
774         r = write_string_file(from, as_uuid);
775         if (r < 0) {
776                 log_error("Failed to write boot id: %s", strerror(-r));
777                 return r;
778         }
779
780         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
781                 log_error("Failed to bind mount boot id: %m");
782                 r = -errno;
783         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
784                 log_warning("Failed to make boot id read-only: %m");
785
786         unlink(from);
787         return r;
788 }
789
790 static int copy_devnodes(const char *dest) {
791
792         static const char devnodes[] =
793                 "null\0"
794                 "zero\0"
795                 "full\0"
796                 "random\0"
797                 "urandom\0"
798                 "tty\0";
799
800         const char *d;
801         int r = 0;
802         _cleanup_umask_ mode_t u;
803
804         assert(dest);
805
806         u = umask(0000);
807
808         NULSTR_FOREACH(d, devnodes) {
809                 _cleanup_free_ char *from = NULL, *to = NULL;
810                 struct stat st;
811
812                 from = strappend("/dev/", d);
813                 to = strjoin(dest, "/dev/", d, NULL);
814                 if (!from || !to)
815                         return log_oom();
816
817                 if (stat(from, &st) < 0) {
818
819                         if (errno != ENOENT) {
820                                 log_error("Failed to stat %s: %m", from);
821                                 return -errno;
822                         }
823
824                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
825
826                         log_error("%s is not a char or block device, cannot copy", from);
827                         return -EIO;
828
829                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
830
831                         log_error("mknod(%s) failed: %m", dest);
832                         return  -errno;
833                 }
834         }
835
836         return r;
837 }
838
839 static int setup_ptmx(const char *dest) {
840         _cleanup_free_ char *p = NULL;
841
842         p = strappend(dest, "/dev/ptmx");
843         if (!p)
844                 return log_oom();
845
846         if (symlink("pts/ptmx", p) < 0) {
847                 log_error("Failed to create /dev/ptmx symlink: %m");
848                 return -errno;
849         }
850
851         return 0;
852 }
853
854 static int setup_dev_console(const char *dest, const char *console) {
855         struct stat st;
856         _cleanup_free_ char *to = NULL;
857         int r;
858         _cleanup_umask_ mode_t u;
859
860         assert(dest);
861         assert(console);
862
863         u = umask(0000);
864
865         if (stat(console, &st) < 0) {
866                 log_error("Failed to stat %s: %m", console);
867                 return -errno;
868
869         } else if (!S_ISCHR(st.st_mode)) {
870                 log_error("/dev/console is not a char device");
871                 return -EIO;
872         }
873
874         r = chmod_and_chown(console, 0600, 0, 0);
875         if (r < 0) {
876                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
877                 return r;
878         }
879
880         if (asprintf(&to, "%s/dev/console", dest) < 0)
881                 return log_oom();
882
883         /* We need to bind mount the right tty to /dev/console since
884          * ptys can only exist on pts file systems. To have something
885          * to bind mount things on we create a device node first, that
886          * has the right major/minor (note that the major minor
887          * doesn't actually matter here, since we mount it over
888          * anyway). */
889
890         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
891                 log_error("mknod() for /dev/console failed: %m");
892                 return -errno;
893         }
894
895         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
896                 log_error("Bind mount for /dev/console failed: %m");
897                 return -errno;
898         }
899
900         return 0;
901 }
902
903 static int setup_kmsg(const char *dest, int kmsg_socket) {
904         _cleanup_free_ char *from = NULL, *to = NULL;
905         int r, fd, k;
906         _cleanup_umask_ mode_t u;
907         union {
908                 struct cmsghdr cmsghdr;
909                 uint8_t buf[CMSG_SPACE(sizeof(int))];
910         } control = {};
911         struct msghdr mh = {
912                 .msg_control = &control,
913                 .msg_controllen = sizeof(control),
914         };
915         struct cmsghdr *cmsg;
916
917         assert(dest);
918         assert(kmsg_socket >= 0);
919
920         u = umask(0000);
921
922         /* We create the kmsg FIFO as /dev/kmsg, but immediately
923          * delete it after bind mounting it to /proc/kmsg. While FIFOs
924          * on the reading side behave very similar to /proc/kmsg,
925          * their writing side behaves differently from /dev/kmsg in
926          * that writing blocks when nothing is reading. In order to
927          * avoid any problems with containers deadlocking due to this
928          * we simply make /dev/kmsg unavailable to the container. */
929         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
930             asprintf(&to, "%s/proc/kmsg", dest) < 0)
931                 return log_oom();
932
933         if (mkfifo(from, 0600) < 0) {
934                 log_error("mkfifo() for /dev/kmsg failed: %m");
935                 return -errno;
936         }
937
938         r = chmod_and_chown(from, 0600, 0, 0);
939         if (r < 0) {
940                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
941                 return r;
942         }
943
944         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
945                 log_error("Bind mount for /proc/kmsg failed: %m");
946                 return -errno;
947         }
948
949         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
950         if (fd < 0) {
951                 log_error("Failed to open fifo: %m");
952                 return -errno;
953         }
954
955         cmsg = CMSG_FIRSTHDR(&mh);
956         cmsg->cmsg_level = SOL_SOCKET;
957         cmsg->cmsg_type = SCM_RIGHTS;
958         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
959         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
960
961         mh.msg_controllen = cmsg->cmsg_len;
962
963         /* Store away the fd in the socket, so that it stays open as
964          * long as we run the child */
965         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
966         close_nointr_nofail(fd);
967
968         if (k < 0) {
969                 log_error("Failed to send FIFO fd: %m");
970                 return -errno;
971         }
972
973         /* And now make the FIFO unavailable as /dev/kmsg... */
974         unlink(from);
975         return 0;
976 }
977
978 static int setup_hostname(void) {
979
980         if (arg_share_system)
981                 return 0;
982
983         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
984                 return -errno;
985
986         return 0;
987 }
988
989 static int setup_journal(const char *directory) {
990         sd_id128_t machine_id, this_id;
991         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
992         char *id;
993         int r;
994
995         p = strappend(directory, "/etc/machine-id");
996         if (!p)
997                 return log_oom();
998
999         r = read_one_line_file(p, &b);
1000         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1001                 return 0;
1002         else if (r < 0) {
1003                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1004                 return r;
1005         }
1006
1007         id = strstrip(b);
1008         if (isempty(id) && arg_link_journal == LINK_AUTO)
1009                 return 0;
1010
1011         /* Verify validity */
1012         r = sd_id128_from_string(id, &machine_id);
1013         if (r < 0) {
1014                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1015                 return r;
1016         }
1017
1018         r = sd_id128_get_machine(&this_id);
1019         if (r < 0) {
1020                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1021                 return r;
1022         }
1023
1024         if (sd_id128_equal(machine_id, this_id)) {
1025                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1026                          "Host and machine ids are equal (%s): refusing to link journals", id);
1027                 if (arg_link_journal == LINK_AUTO)
1028                         return 0;
1029                 return
1030                         -EEXIST;
1031         }
1032
1033         if (arg_link_journal == LINK_NO)
1034                 return 0;
1035
1036         free(p);
1037         p = strappend("/var/log/journal/", id);
1038         q = strjoin(directory, "/var/log/journal/", id, NULL);
1039         if (!p || !q)
1040                 return log_oom();
1041
1042         if (path_is_mount_point(p, false) > 0) {
1043                 if (arg_link_journal != LINK_AUTO) {
1044                         log_error("%s: already a mount point, refusing to use for journal", p);
1045                         return -EEXIST;
1046                 }
1047
1048                 return 0;
1049         }
1050
1051         if (path_is_mount_point(q, false) > 0) {
1052                 if (arg_link_journal != LINK_AUTO) {
1053                         log_error("%s: already a mount point, refusing to use for journal", q);
1054                         return -EEXIST;
1055                 }
1056
1057                 return 0;
1058         }
1059
1060         r = readlink_and_make_absolute(p, &d);
1061         if (r >= 0) {
1062                 if ((arg_link_journal == LINK_GUEST ||
1063                      arg_link_journal == LINK_AUTO) &&
1064                     path_equal(d, q)) {
1065
1066                         r = mkdir_p(q, 0755);
1067                         if (r < 0)
1068                                 log_warning("failed to create directory %s: %m", q);
1069                         return 0;
1070                 }
1071
1072                 if (unlink(p) < 0) {
1073                         log_error("Failed to remove symlink %s: %m", p);
1074                         return -errno;
1075                 }
1076         } else if (r == -EINVAL) {
1077
1078                 if (arg_link_journal == LINK_GUEST &&
1079                     rmdir(p) < 0) {
1080
1081                         if (errno == ENOTDIR) {
1082                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1083                                 return r;
1084                         } else {
1085                                 log_error("Failed to remove %s: %m", p);
1086                                 return -errno;
1087                         }
1088                 }
1089         } else if (r != -ENOENT) {
1090                 log_error("readlink(%s) failed: %m", p);
1091                 return r;
1092         }
1093
1094         if (arg_link_journal == LINK_GUEST) {
1095
1096                 if (symlink(q, p) < 0) {
1097                         log_error("Failed to symlink %s to %s: %m", q, p);
1098                         return -errno;
1099                 }
1100
1101                 r = mkdir_p(q, 0755);
1102                 if (r < 0)
1103                         log_warning("failed to create directory %s: %m", q);
1104                 return 0;
1105         }
1106
1107         if (arg_link_journal == LINK_HOST) {
1108                 r = mkdir_p(p, 0755);
1109                 if (r < 0) {
1110                         log_error("Failed to create %s: %m", p);
1111                         return r;
1112                 }
1113
1114         } else if (access(p, F_OK) < 0)
1115                 return 0;
1116
1117         if (dir_is_empty(q) == 0) {
1118                 log_error("%s not empty.", q);
1119                 return -ENOTEMPTY;
1120         }
1121
1122         r = mkdir_p(q, 0755);
1123         if (r < 0) {
1124                 log_error("Failed to create %s: %m", q);
1125                 return r;
1126         }
1127
1128         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1129                 log_error("Failed to bind mount journal from host into guest: %m");
1130                 return -errno;
1131         }
1132
1133         return 0;
1134 }
1135
1136 static int setup_kdbus(const char *dest, const char *path) {
1137         const char *p;
1138
1139         if (!path)
1140                 return 0;
1141
1142         p = strappenda(dest, "/dev/kdbus");
1143         if (mkdir(p, 0755) < 0) {
1144                 log_error("Failed to create kdbus path: %m");
1145                 return  -errno;
1146         }
1147
1148         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1149                 log_error("Failed to mount kdbus domain path: %m");
1150                 return -errno;
1151         }
1152
1153         return 0;
1154 }
1155
1156 static int drop_capabilities(void) {
1157         return capability_bounding_set_drop(~arg_retain, false);
1158 }
1159
1160 static int register_machine(pid_t pid) {
1161         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1162         _cleanup_bus_unref_ sd_bus *bus = NULL;
1163         int r;
1164
1165         if (!arg_register)
1166                 return 0;
1167
1168         r = sd_bus_default_system(&bus);
1169         if (r < 0) {
1170                 log_error("Failed to open system bus: %s", strerror(-r));
1171                 return r;
1172         }
1173
1174         if (arg_keep_unit) {
1175                 r = sd_bus_call_method(
1176                                 bus,
1177                                 "org.freedesktop.machine1",
1178                                 "/org/freedesktop/machine1",
1179                                 "org.freedesktop.machine1.Manager",
1180                                 "RegisterMachine",
1181                                 &error,
1182                                 NULL,
1183                                 "sayssus",
1184                                 arg_machine,
1185                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1186                                 "nspawn",
1187                                 "container",
1188                                 (uint32_t) pid,
1189                                 strempty(arg_directory));
1190         } else {
1191                 r = sd_bus_call_method(
1192                                 bus,
1193                                 "org.freedesktop.machine1",
1194                                 "/org/freedesktop/machine1",
1195                                 "org.freedesktop.machine1.Manager",
1196                                 "CreateMachine",
1197                                 &error,
1198                                 NULL,
1199                                 "sayssusa(sv)",
1200                                 arg_machine,
1201                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1202                                 "nspawn",
1203                                 "container",
1204                                 (uint32_t) pid,
1205                                 strempty(arg_directory),
1206                                 !isempty(arg_slice), "Slice", "s", arg_slice);
1207         }
1208
1209         if (r < 0) {
1210                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1211                 return r;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int terminate_machine(pid_t pid) {
1218         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1219         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1220         _cleanup_bus_unref_ sd_bus *bus = NULL;
1221         const char *path;
1222         int r;
1223
1224         if (!arg_register)
1225                 return 0;
1226
1227         r = sd_bus_default_system(&bus);
1228         if (r < 0) {
1229                 log_error("Failed to open system bus: %s", strerror(-r));
1230                 return r;
1231         }
1232
1233         r = sd_bus_call_method(
1234                         bus,
1235                         "org.freedesktop.machine1",
1236                         "/org/freedesktop/machine1",
1237                         "org.freedesktop.machine1.Manager",
1238                         "GetMachineByPID",
1239                         &error,
1240                         &reply,
1241                         "u",
1242                         (uint32_t) pid);
1243         if (r < 0) {
1244                 /* Note that the machine might already have been
1245                  * cleaned up automatically, hence don't consider it a
1246                  * failure if we cannot get the machine object. */
1247                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1248                 return 0;
1249         }
1250
1251         r = sd_bus_message_read(reply, "o", &path);
1252         if (r < 0)
1253                 return bus_log_parse_error(r);
1254
1255         r = sd_bus_call_method(
1256                         bus,
1257                         "org.freedesktop.machine1",
1258                         path,
1259                         "org.freedesktop.machine1.Machine",
1260                         "Terminate",
1261                         &error,
1262                         NULL,
1263                         NULL);
1264         if (r < 0) {
1265                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1266                 return 0;
1267         }
1268
1269         return 0;
1270 }
1271
1272 static int reset_audit_loginuid(void) {
1273         _cleanup_free_ char *p = NULL;
1274         int r;
1275
1276         if (arg_share_system)
1277                 return 0;
1278
1279         r = read_one_line_file("/proc/self/loginuid", &p);
1280         if (r == -EEXIST)
1281                 return 0;
1282         if (r < 0) {
1283                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1284                 return r;
1285         }
1286
1287         /* Already reset? */
1288         if (streq(p, "4294967295"))
1289                 return 0;
1290
1291         r = write_string_file("/proc/self/loginuid", "4294967295");
1292         if (r < 0) {
1293                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1294                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1295                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1296                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1297                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1298
1299                 sleep(5);
1300         }
1301
1302         return 0;
1303 }
1304
1305 static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
1306         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1307         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1308         int r;
1309
1310         if (!arg_private_network)
1311                 return 0;
1312
1313         if (!arg_network_veth)
1314                 return 0;
1315
1316         /* Use two different interface name prefixes depending whether
1317          * we are in bridge mode or not. */
1318         if (arg_network_bridge)
1319                 memcpy(iface_name, "vb-", 3);
1320         else
1321                 memcpy(iface_name, "ve-", 3);
1322
1323         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1324
1325         r = sd_rtnl_open(&rtnl, 0);
1326         if (r < 0) {
1327                 log_error("Failed to connect to netlink: %s", strerror(-r));
1328                 return r;
1329         }
1330
1331         r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1332         if (r < 0) {
1333                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1334                 return r;
1335         }
1336
1337         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1338         if (r < 0) {
1339                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1340                 return r;
1341         }
1342
1343         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1344         if (r < 0) {
1345                 log_error("Failed to open netlink container: %s", strerror(-r));
1346                 return r;
1347         }
1348
1349         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1350         if (r < 0) {
1351                 log_error("Failed to append netlink kind: %s", strerror(-r));
1352                 return r;
1353         }
1354
1355         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1356         if (r < 0) {
1357                 log_error("Failed to open netlink container: %s", strerror(-r));
1358                 return r;
1359         }
1360
1361         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1362         if (r < 0) {
1363                 log_error("Failed to open netlink container: %s", strerror(-r));
1364                 return r;
1365         }
1366
1367         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1368         if (r < 0) {
1369                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1370                 return r;
1371         }
1372
1373         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1374         if (r < 0) {
1375                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1376                 return r;
1377         }
1378
1379         r = sd_rtnl_message_close_container(m);
1380         if (r < 0) {
1381                 log_error("Failed to close netlink container: %s", strerror(-r));
1382                 return r;
1383         }
1384
1385         r = sd_rtnl_message_close_container(m);
1386         if (r < 0) {
1387                 log_error("Failed to close netlink container: %s", strerror(-r));
1388                 return r;
1389         }
1390
1391         r = sd_rtnl_message_close_container(m);
1392         if (r < 0) {
1393                 log_error("Failed to close netlink container: %s", strerror(-r));
1394                 return r;
1395         }
1396
1397         r = sd_rtnl_call(rtnl, m, 0, NULL);
1398         if (r < 0) {
1399                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1400                 return r;
1401         }
1402
1403         return 0;
1404 }
1405
1406 static int setup_bridge(const char veth_name[]) {
1407         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1408         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1409         int r, bridge;
1410
1411         if (!arg_private_network)
1412                 return 0;
1413
1414         if (!arg_network_veth)
1415                 return 0;
1416
1417         if (!arg_network_bridge)
1418                 return 0;
1419
1420         bridge = (int) if_nametoindex(arg_network_bridge);
1421         if (bridge <= 0) {
1422                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1423                 return -errno;
1424         }
1425
1426         r = sd_rtnl_open(&rtnl, 0);
1427         if (r < 0) {
1428                 log_error("Failed to connect to netlink: %s", strerror(-r));
1429                 return r;
1430         }
1431
1432         r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
1433         if (r < 0) {
1434                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1435                 return r;
1436         }
1437
1438         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1439         if (r < 0) {
1440                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1441                 return r;
1442         }
1443
1444         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1445         if (r < 0) {
1446                 log_error("Failed to add netlink master field: %s", strerror(-r));
1447                 return r;
1448         }
1449
1450         r = sd_rtnl_call(rtnl, m, 0, NULL);
1451         if (r < 0) {
1452                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1453                 return r;
1454         }
1455
1456         return 0;
1457 }
1458
1459 static int move_network_interfaces(pid_t pid) {
1460         _cleanup_udev_unref_ struct udev *udev = NULL;
1461         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1462         char **i;
1463         int r;
1464
1465         if (!arg_private_network)
1466                 return 0;
1467
1468         if (strv_isempty(arg_network_interfaces))
1469                 return 0;
1470
1471         r = sd_rtnl_open(&rtnl, 0);
1472         if (r < 0) {
1473                 log_error("Failed to connect to netlink: %s", strerror(-r));
1474                 return r;
1475         }
1476
1477         udev = udev_new();
1478         if (!udev) {
1479                 log_error("Failed to connect to udev.");
1480                 return -ENOMEM;
1481         }
1482
1483         STRV_FOREACH(i, arg_network_interfaces) {
1484                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1485                 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1486                 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1487                 int ifi;
1488
1489                 ifi = (int) if_nametoindex(*i);
1490                 if (ifi <= 0) {
1491                         log_error("Failed to resolve interface %s: %m", *i);
1492                         return -errno;
1493                 }
1494
1495                 sprintf(ifi_str, "n%i", ifi);
1496                 d = udev_device_new_from_device_id(udev, ifi_str);
1497                 if (!d) {
1498                         log_error("Failed to get udev device for interface %s: %m", *i);
1499                         return -errno;
1500                 }
1501
1502                 if (udev_device_get_is_initialized(d) <= 0) {
1503                         log_error("Network interface %s is not initialized yet.", *i);
1504                         return -EBUSY;
1505                 }
1506
1507                 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
1508                 if (r < 0) {
1509                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1510                         return r;
1511                 }
1512
1513                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1514                 if (r < 0) {
1515                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1516                         return r;
1517                 }
1518
1519                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1520                 if (r < 0) {
1521                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1522                         return r;
1523                 }
1524         }
1525
1526         return 0;
1527 }
1528
1529 static int audit_still_doesnt_work_in_containers(void) {
1530
1531 #ifdef HAVE_SECCOMP
1532         scmp_filter_ctx seccomp;
1533         int r;
1534
1535         /*
1536            Audit is broken in containers, much of the userspace audit
1537            hookup will fail if running inside a container. We don't
1538            care and just turn off creation of audit sockets.
1539
1540            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1541            with EAFNOSUPPORT which audit userspace uses as indication
1542            that audit is disabled in the kernel.
1543          */
1544
1545         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1546         if (!seccomp)
1547                 return log_oom();
1548
1549         r = seccomp_add_secondary_archs(seccomp);
1550         if (r < 0 && r != -EEXIST) {
1551                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1552                 goto finish;
1553         }
1554
1555         r = seccomp_rule_add(
1556                         seccomp,
1557                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1558                         SCMP_SYS(socket),
1559                         2,
1560                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1561                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1562         if (r < 0) {
1563                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1564                 goto finish;
1565         }
1566
1567         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1568         if (r < 0) {
1569                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1570                 goto finish;
1571         }
1572
1573         r = seccomp_load(seccomp);
1574         if (r < 0)
1575                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1576
1577 finish:
1578         seccomp_release(seccomp);
1579         return r;
1580 #else
1581         return 0;
1582 #endif
1583
1584 }
1585
1586 int main(int argc, char *argv[]) {
1587
1588         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
1589         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1590         _cleanup_free_ char *kdbus_domain = NULL;
1591         _cleanup_fdset_free_ FDSet *fds = NULL;
1592         const char *console = NULL;
1593         int r = EXIT_FAILURE, k;
1594         int n_fd_passed;
1595         pid_t pid = 0;
1596         sigset_t mask;
1597         char veth_name[IFNAMSIZ];
1598
1599         log_parse_environment();
1600         log_open();
1601
1602         k = parse_argv(argc, argv);
1603         if (k < 0)
1604                 goto finish;
1605         else if (k == 0) {
1606                 r = EXIT_SUCCESS;
1607                 goto finish;
1608         }
1609
1610         if (arg_directory) {
1611                 char *p;
1612
1613                 p = path_make_absolute_cwd(arg_directory);
1614                 free(arg_directory);
1615                 arg_directory = p;
1616         } else
1617                 arg_directory = get_current_dir_name();
1618
1619         if (!arg_directory) {
1620                 log_error("Failed to determine path, please use -D.");
1621                 goto finish;
1622         }
1623
1624         path_kill_slashes(arg_directory);
1625
1626         if (!arg_machine) {
1627                 arg_machine = strdup(basename(arg_directory));
1628                 if (!arg_machine) {
1629                         log_oom();
1630                         goto finish;
1631                 }
1632
1633                 hostname_cleanup(arg_machine, false);
1634                 if (isempty(arg_machine)) {
1635                         log_error("Failed to determine machine name automatically, please use -M.");
1636                         goto finish;
1637                 }
1638         }
1639
1640         if (geteuid() != 0) {
1641                 log_error("Need to be root.");
1642                 goto finish;
1643         }
1644
1645         if (sd_booted() <= 0) {
1646                 log_error("Not running on a systemd system.");
1647                 goto finish;
1648         }
1649
1650         if (path_equal(arg_directory, "/")) {
1651                 log_error("Spawning container on root directory not supported.");
1652                 goto finish;
1653         }
1654
1655         if (arg_boot) {
1656                 if (path_is_os_tree(arg_directory) <= 0) {
1657                         log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1658                         goto finish;
1659                 }
1660         } else {
1661                 const char *p;
1662
1663                 p = strappenda(arg_directory,
1664                                argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1665                 if (access(p, F_OK) < 0) {
1666                         log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1667                         goto finish;
1668
1669                 }
1670         }
1671
1672         log_close();
1673         n_fd_passed = sd_listen_fds(false);
1674         if (n_fd_passed > 0) {
1675                 k = fdset_new_listen_fds(&fds, false);
1676                 if (k < 0) {
1677                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1678                         goto finish;
1679                 }
1680         }
1681         fdset_close_others(fds);
1682         log_open();
1683
1684         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1685         if (master < 0) {
1686                 log_error("Failed to acquire pseudo tty: %m");
1687                 goto finish;
1688         }
1689
1690         console = ptsname(master);
1691         if (!console) {
1692                 log_error("Failed to determine tty name: %m");
1693                 goto finish;
1694         }
1695
1696         if (!arg_quiet)
1697                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1698
1699         if (unlockpt(master) < 0) {
1700                 log_error("Failed to unlock tty: %m");
1701                 goto finish;
1702         }
1703
1704         if (access("/dev/kdbus/control", F_OK) >= 0) {
1705
1706                 if (arg_share_system) {
1707                         kdbus_domain = strdup("/dev/kdbus");
1708                         if (!kdbus_domain) {
1709                                 log_oom();
1710                                 goto finish;
1711                         }
1712                 } else {
1713                         const char *ns;
1714
1715                         ns = strappenda("machine-", arg_machine);
1716                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1717                         if (r < 0)
1718                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1719                         else
1720                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1721                 }
1722         }
1723
1724         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1725                 log_error("Failed to create kmsg socket pair: %m");
1726                 goto finish;
1727         }
1728
1729         sd_notify(0, "READY=1");
1730
1731         assert_se(sigemptyset(&mask) == 0);
1732         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1733         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1734
1735         for (;;) {
1736                 siginfo_t status;
1737
1738                 sync_fd = eventfd(0, EFD_CLOEXEC);
1739                 if (sync_fd < 0) {
1740                         log_error("Failed to create event fd: %m");
1741                         goto finish;
1742                 }
1743
1744                 pid = syscall(__NR_clone,
1745                               SIGCHLD|CLONE_NEWNS|
1746                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1747                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
1748                 if (pid < 0) {
1749                         if (errno == EINVAL)
1750                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1751                         else
1752                                 log_error("clone() failed: %m");
1753
1754                         goto finish;
1755                 }
1756
1757                 if (pid == 0) {
1758                         /* child */
1759                         const char *home = NULL;
1760                         uid_t uid = (uid_t) -1;
1761                         gid_t gid = (gid_t) -1;
1762                         unsigned n_env = 2;
1763                         const char *envp[] = {
1764                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
1765                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1766                                 NULL, /* TERM */
1767                                 NULL, /* HOME */
1768                                 NULL, /* USER */
1769                                 NULL, /* LOGNAME */
1770                                 NULL, /* container_uuid */
1771                                 NULL, /* LISTEN_FDS */
1772                                 NULL, /* LISTEN_PID */
1773                                 NULL
1774                         };
1775                         char **env_use;
1776                         eventfd_t x;
1777
1778                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1779                         if (envp[n_env])
1780                                 n_env ++;
1781
1782                         close_nointr_nofail(master);
1783                         master = -1;
1784
1785                         close_nointr(STDIN_FILENO);
1786                         close_nointr(STDOUT_FILENO);
1787                         close_nointr(STDERR_FILENO);
1788
1789                         close_nointr_nofail(kmsg_socket_pair[0]);
1790                         kmsg_socket_pair[0] = -1;
1791
1792                         reset_all_signal_handlers();
1793
1794                         assert_se(sigemptyset(&mask) == 0);
1795                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1796
1797                         k = open_terminal(console, O_RDWR);
1798                         if (k != STDIN_FILENO) {
1799                                 if (k >= 0) {
1800                                         close_nointr_nofail(k);
1801                                         k = -EINVAL;
1802                                 }
1803
1804                                 log_error("Failed to open console: %s", strerror(-k));
1805                                 goto child_fail;
1806                         }
1807
1808                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1809                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1810                                 log_error("Failed to duplicate console: %m");
1811                                 goto child_fail;
1812                         }
1813
1814                         if (setsid() < 0) {
1815                                 log_error("setsid() failed: %m");
1816                                 goto child_fail;
1817                         }
1818
1819                         if (reset_audit_loginuid() < 0)
1820                                 goto child_fail;
1821
1822                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1823                                 log_error("PR_SET_PDEATHSIG failed: %m");
1824                                 goto child_fail;
1825                         }
1826
1827                         /* Mark everything as slave, so that we still
1828                          * receive mounts from the real root, but don't
1829                          * propagate mounts to the real root. */
1830                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1831                                 log_error("MS_SLAVE|MS_REC failed: %m");
1832                                 goto child_fail;
1833                         }
1834
1835                         /* Turn directory into bind mount */
1836                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1837                                 log_error("Failed to make bind mount.");
1838                                 goto child_fail;
1839                         }
1840
1841                         if (arg_read_only)
1842                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1843                                         log_error("Failed to make read-only.");
1844                                         goto child_fail;
1845                                 }
1846
1847                         if (mount_all(arg_directory) < 0)
1848                                 goto child_fail;
1849
1850                         if (copy_devnodes(arg_directory) < 0)
1851                                 goto child_fail;
1852
1853                         if (setup_ptmx(arg_directory) < 0)
1854                                 goto child_fail;
1855
1856                         dev_setup(arg_directory);
1857
1858                         if (audit_still_doesnt_work_in_containers() < 0)
1859                                 goto child_fail;
1860
1861                         if (setup_dev_console(arg_directory, console) < 0)
1862                                 goto child_fail;
1863
1864                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1865                                 goto child_fail;
1866
1867                         close_nointr_nofail(kmsg_socket_pair[1]);
1868                         kmsg_socket_pair[1] = -1;
1869
1870                         if (setup_boot_id(arg_directory) < 0)
1871                                 goto child_fail;
1872
1873                         if (setup_timezone(arg_directory) < 0)
1874                                 goto child_fail;
1875
1876                         if (setup_resolv_conf(arg_directory) < 0)
1877                                 goto child_fail;
1878
1879                         if (setup_journal(arg_directory) < 0)
1880                                 goto child_fail;
1881
1882                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1883                                 goto child_fail;
1884
1885                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1886                                 goto child_fail;
1887
1888                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
1889                                 goto child_fail;
1890
1891                         if (chdir(arg_directory) < 0) {
1892                                 log_error("chdir(%s) failed: %m", arg_directory);
1893                                 goto child_fail;
1894                         }
1895
1896                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1897                                 log_error("mount(MS_MOVE) failed: %m");
1898                                 goto child_fail;
1899                         }
1900
1901                         if (chroot(".") < 0) {
1902                                 log_error("chroot() failed: %m");
1903                                 goto child_fail;
1904                         }
1905
1906                         if (chdir("/") < 0) {
1907                                 log_error("chdir() failed: %m");
1908                                 goto child_fail;
1909                         }
1910
1911                         umask(0022);
1912
1913                         if (arg_private_network)
1914                                 loopback_setup();
1915
1916                         if (drop_capabilities() < 0) {
1917                                 log_error("drop_capabilities() failed: %m");
1918                                 goto child_fail;
1919                         }
1920
1921                         if (arg_user) {
1922
1923                                 /* Note that this resolves user names
1924                                  * inside the container, and hence
1925                                  * accesses the NSS modules from the
1926                                  * container and not the host. This is
1927                                  * a bit weird... */
1928
1929                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1930                                         log_error("get_user_creds() failed: %m");
1931                                         goto child_fail;
1932                                 }
1933
1934                                 if (mkdir_parents_label(home, 0775) < 0) {
1935                                         log_error("mkdir_parents_label() failed: %m");
1936                                         goto child_fail;
1937                                 }
1938
1939                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1940                                         log_error("mkdir_safe_label() failed: %m");
1941                                         goto child_fail;
1942                                 }
1943
1944                                 if (initgroups((const char*)arg_user, gid) < 0) {
1945                                         log_error("initgroups() failed: %m");
1946                                         goto child_fail;
1947                                 }
1948
1949                                 if (setresgid(gid, gid, gid) < 0) {
1950                                         log_error("setregid() failed: %m");
1951                                         goto child_fail;
1952                                 }
1953
1954                                 if (setresuid(uid, uid, uid) < 0) {
1955                                         log_error("setreuid() failed: %m");
1956                                         goto child_fail;
1957                                 }
1958                         } else {
1959                                 /* Reset everything fully to 0, just in case */
1960
1961                                 if (setgroups(0, NULL) < 0) {
1962                                         log_error("setgroups() failed: %m");
1963                                         goto child_fail;
1964                                 }
1965
1966                                 if (setresgid(0, 0, 0) < 0) {
1967                                         log_error("setregid() failed: %m");
1968                                         goto child_fail;
1969                                 }
1970
1971                                 if (setresuid(0, 0, 0) < 0) {
1972                                         log_error("setreuid() failed: %m");
1973                                         goto child_fail;
1974                                 }
1975                         }
1976
1977                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1978                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1979                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1980                                 log_oom();
1981                                 goto child_fail;
1982                         }
1983
1984                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1985                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1986                                         log_oom();
1987                                         goto child_fail;
1988                                 }
1989                         }
1990
1991                         if (fdset_size(fds) > 0) {
1992                                 k = fdset_cloexec(fds, false);
1993                                 if (k < 0) {
1994                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1995                                         goto child_fail;
1996                                 }
1997
1998                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1999                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
2000                                         log_oom();
2001                                         goto child_fail;
2002                                 }
2003                         }
2004
2005                         setup_hostname();
2006
2007                         if (arg_personality != 0xffffffffLU) {
2008                                 if (personality(arg_personality) < 0) {
2009                                         log_error("personality() failed: %m");
2010                                         goto child_fail;
2011                                 }
2012                         }
2013
2014                         eventfd_read(sync_fd, &x);
2015                         close_nointr_nofail(sync_fd);
2016                         sync_fd = -1;
2017
2018                         if (!strv_isempty(arg_setenv)) {
2019                                 char **n;
2020
2021                                 n = strv_env_merge(2, envp, arg_setenv);
2022                                 if (!n) {
2023                                         log_oom();
2024                                         goto child_fail;
2025                                 }
2026
2027                                 env_use = n;
2028                         } else
2029                                 env_use = (char**) envp;
2030
2031 #ifdef HAVE_SELINUX
2032                         if (arg_selinux_context)
2033                                 if (setexeccon(arg_selinux_context) < 0)
2034                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2035 #endif
2036                         if (arg_boot) {
2037                                 char **a;
2038                                 size_t l;
2039
2040                                 /* Automatically search for the init system */
2041
2042                                 l = 1 + argc - optind;
2043                                 a = newa(char*, l + 1);
2044                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2045
2046                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2047                                 execve(a[0], a, env_use);
2048
2049                                 a[0] = (char*) "/lib/systemd/systemd";
2050                                 execve(a[0], a, env_use);
2051
2052                                 a[0] = (char*) "/sbin/init";
2053                                 execve(a[0], a, env_use);
2054                         } else if (argc > optind)
2055                                 execvpe(argv[optind], argv + optind, env_use);
2056                         else {
2057                                 chdir(home ? home : "/root");
2058                                 execle("/bin/bash", "-bash", NULL, env_use);
2059                                 execle("/bin/sh", "-sh", NULL, env_use);
2060                         }
2061
2062                         log_error("execv() failed: %m");
2063
2064                 child_fail:
2065                         _exit(EXIT_FAILURE);
2066                 }
2067
2068                 fdset_free(fds);
2069                 fds = NULL;
2070
2071                 r = register_machine(pid);
2072                 if (r < 0)
2073                         goto finish;
2074
2075                 r = move_network_interfaces(pid);
2076                 if (r < 0)
2077                         goto finish;
2078
2079                 r = setup_veth(pid, veth_name);
2080                 if (r < 0)
2081                         goto finish;
2082
2083                 r = setup_bridge(veth_name);
2084                 if (r < 0)
2085                         goto finish;
2086
2087                 eventfd_write(sync_fd, 1);
2088                 close_nointr_nofail(sync_fd);
2089                 sync_fd = -1;
2090
2091                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2092                 if (k < 0) {
2093                         r = EXIT_FAILURE;
2094                         break;
2095                 }
2096
2097                 if (!arg_quiet)
2098                         putc('\n', stdout);
2099
2100                 /* Kill if it is not dead yet anyway */
2101                 terminate_machine(pid);
2102
2103                 /* Redundant, but better safe than sorry */
2104                 kill(pid, SIGKILL);
2105
2106                 k = wait_for_terminate(pid, &status);
2107                 pid = 0;
2108
2109                 if (k < 0) {
2110                         r = EXIT_FAILURE;
2111                         break;
2112                 }
2113
2114                 if (status.si_code == CLD_EXITED) {
2115                         r = status.si_status;
2116                         if (status.si_status != 0) {
2117                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2118                                 break;
2119                         }
2120
2121                         if (!arg_quiet)
2122                                 log_debug("Container %s exited successfully.", arg_machine);
2123                         break;
2124                 } else if (status.si_code == CLD_KILLED &&
2125                            status.si_status == SIGINT) {
2126
2127                         if (!arg_quiet)
2128                                 log_info("Container %s has been shut down.", arg_machine);
2129                         r = 0;
2130                         break;
2131                 } else if (status.si_code == CLD_KILLED &&
2132                            status.si_status == SIGHUP) {
2133
2134                         if (!arg_quiet)
2135                                 log_info("Container %s is being rebooted.", arg_machine);
2136                         continue;
2137                 } else if (status.si_code == CLD_KILLED ||
2138                            status.si_code == CLD_DUMPED) {
2139
2140                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2141                         r = EXIT_FAILURE;
2142                         break;
2143                 } else {
2144                         log_error("Container %s failed due to unknown reason.", arg_machine);
2145                         r = EXIT_FAILURE;
2146                         break;
2147                 }
2148         }
2149
2150 finish:
2151         if (pid > 0)
2152                 kill(pid, SIGKILL);
2153
2154         free(arg_directory);
2155         free(arg_machine);
2156         free(arg_setenv);
2157         free(arg_network_interfaces);
2158
2159         return r;
2160 }