chiark / gitweb /
nspawn: add new --personality= switch to make it easier to run 32bit containers on...
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
47
48 #ifdef HAVE_SELINUX
49 #include <selinux/selinux.h>
50 #endif
51
52 #ifdef HAVE_SECCOMP
53 #include <seccomp.h>
54 #endif
55
56 #include "sd-daemon.h"
57 #include "sd-bus.h"
58 #include "sd-id128.h"
59 #include "sd-rtnl.h"
60 #include "log.h"
61 #include "util.h"
62 #include "mkdir.h"
63 #include "macro.h"
64 #include "audit.h"
65 #include "missing.h"
66 #include "cgroup-util.h"
67 #include "strv.h"
68 #include "path-util.h"
69 #include "loopback-setup.h"
70 #include "dev-setup.h"
71 #include "fdset.h"
72 #include "build.h"
73 #include "fileio.h"
74 #include "bus-util.h"
75 #include "bus-error.h"
76 #include "ptyfwd.h"
77 #include "bus-kernel.h"
78 #include "env-util.h"
79 #include "def.h"
80 #include "rtnl-util.h"
81 #include "udev-util.h"
82
83 #ifdef HAVE_SECCOMP
84 #include "seccomp-util.h"
85 #endif
86
87 typedef enum LinkJournal {
88         LINK_NO,
89         LINK_AUTO,
90         LINK_HOST,
91         LINK_GUEST
92 } LinkJournal;
93
94 static char *arg_directory = NULL;
95 static char *arg_user = NULL;
96 static sd_id128_t arg_uuid = {};
97 static char *arg_machine = NULL;
98 static char *arg_selinux_context = NULL;
99 static char *arg_selinux_apifs_context = NULL;
100 static const char *arg_slice = NULL;
101 static bool arg_private_network = false;
102 static bool arg_read_only = false;
103 static bool arg_boot = false;
104 static LinkJournal arg_link_journal = LINK_AUTO;
105 static uint64_t arg_retain =
106         (1ULL << CAP_CHOWN) |
107         (1ULL << CAP_DAC_OVERRIDE) |
108         (1ULL << CAP_DAC_READ_SEARCH) |
109         (1ULL << CAP_FOWNER) |
110         (1ULL << CAP_FSETID) |
111         (1ULL << CAP_IPC_OWNER) |
112         (1ULL << CAP_KILL) |
113         (1ULL << CAP_LEASE) |
114         (1ULL << CAP_LINUX_IMMUTABLE) |
115         (1ULL << CAP_NET_BIND_SERVICE) |
116         (1ULL << CAP_NET_BROADCAST) |
117         (1ULL << CAP_NET_RAW) |
118         (1ULL << CAP_SETGID) |
119         (1ULL << CAP_SETFCAP) |
120         (1ULL << CAP_SETPCAP) |
121         (1ULL << CAP_SETUID) |
122         (1ULL << CAP_SYS_ADMIN) |
123         (1ULL << CAP_SYS_CHROOT) |
124         (1ULL << CAP_SYS_NICE) |
125         (1ULL << CAP_SYS_PTRACE) |
126         (1ULL << CAP_SYS_TTY_CONFIG) |
127         (1ULL << CAP_SYS_RESOURCE) |
128         (1ULL << CAP_SYS_BOOT) |
129         (1ULL << CAP_AUDIT_WRITE) |
130         (1ULL << CAP_AUDIT_CONTROL) |
131         (1ULL << CAP_MKNOD);
132 static char **arg_bind = NULL;
133 static char **arg_bind_ro = NULL;
134 static char **arg_setenv = NULL;
135 static bool arg_quiet = false;
136 static bool arg_share_system = false;
137 static bool arg_register = true;
138 static bool arg_keep_unit = false;
139 static char **arg_network_interfaces = NULL;
140 static bool arg_network_veth = false;
141 static char *arg_network_bridge = NULL;
142 static unsigned long arg_personality = 0xffffffffLU;
143
144 static int help(void) {
145
146         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
147                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
148                "  -h --help                 Show this help\n"
149                "     --version              Print version string\n"
150                "  -q --quiet                Do not show status information\n"
151                "  -D --directory=NAME       Root directory for the container\n"
152                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
153                "  -u --user=USER            Run the command under specified user or uid\n"
154                "  -M --machine=NAME         Set the machine name for the container\n"
155                "     --uuid=UUID            Set a specific machine UUID for the container\n"
156                "  -S --slice=SLICE          Place the container in the specified slice\n"
157                "     --private-network      Disable network in container\n"
158                "     --network-interface=INTERFACE\n"
159                "                            Assign an existing network interface to the\n"
160                "                            container\n"
161                "     --network-veth         Add a virtual ethernet connection between host\n"
162                "                            and container\n"
163                "     --network-bridge=INTERFACE\n"
164                "                            Add a virtual ethernet connection between host\n"
165                "                            and container and add it to an existing bridge on\n"
166                "                            the host\n"
167                "  -Z --selinux-context=SECLABEL\n"
168                "                            Set the SELinux security context to be used by\n"
169                "                            processes in the container\n"
170                "  -L --selinux-apifs-context=SECLABEL\n"
171                "                            Set the SELinux security context to be used by\n"
172                "                            API/tmpfs file systems in the container\n"
173                "     --capability=CAP       In addition to the default, retain specified\n"
174                "                            capability\n"
175                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
176                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
177                "  -j                        Equivalent to --link-journal=host\n"
178                "     --read-only            Mount the root directory read-only\n"
179                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
180                "                            the container\n"
181                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
182                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
183                "     --share-system         Share system namespaces with host\n"
184                "     --register=BOOLEAN     Register container as machine\n"
185                "     --keep-unit            Do not register a scope for the machine, reuse\n"
186                "                            the service unit nspawn is running in\n",
187                program_invocation_short_name);
188
189         return 0;
190 }
191
192 static int parse_argv(int argc, char *argv[]) {
193
194         enum {
195                 ARG_VERSION = 0x100,
196                 ARG_PRIVATE_NETWORK,
197                 ARG_UUID,
198                 ARG_READ_ONLY,
199                 ARG_CAPABILITY,
200                 ARG_DROP_CAPABILITY,
201                 ARG_LINK_JOURNAL,
202                 ARG_BIND,
203                 ARG_BIND_RO,
204                 ARG_SETENV,
205                 ARG_SHARE_SYSTEM,
206                 ARG_REGISTER,
207                 ARG_KEEP_UNIT,
208                 ARG_NETWORK_INTERFACE,
209                 ARG_NETWORK_VETH,
210                 ARG_NETWORK_BRIDGE,
211                 ARG_PERSONALITY,
212         };
213
214         static const struct option options[] = {
215                 { "help",                  no_argument,       NULL, 'h'                   },
216                 { "version",               no_argument,       NULL, ARG_VERSION           },
217                 { "directory",             required_argument, NULL, 'D'                   },
218                 { "user",                  required_argument, NULL, 'u'                   },
219                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
220                 { "boot",                  no_argument,       NULL, 'b'                   },
221                 { "uuid",                  required_argument, NULL, ARG_UUID              },
222                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
223                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
224                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
225                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
226                 { "bind",                  required_argument, NULL, ARG_BIND              },
227                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
228                 { "machine",               required_argument, NULL, 'M'                   },
229                 { "slice",                 required_argument, NULL, 'S'                   },
230                 { "setenv",                required_argument, NULL, ARG_SETENV            },
231                 { "selinux-context",       required_argument, NULL, 'Z'                   },
232                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
233                 { "quiet",                 no_argument,       NULL, 'q'                   },
234                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
235                 { "register",              required_argument, NULL, ARG_REGISTER          },
236                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
237                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
238                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
239                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
240                 { "personality",           required_argument, NULL, ARG_PERSONALITY       },
241                 {}
242         };
243
244         int c, r;
245         uint64_t plus = 0, minus = 0;
246
247         assert(argc >= 0);
248         assert(argv);
249
250         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:q", options, NULL)) >= 0) {
251
252                 switch (c) {
253
254                 case 'h':
255                         return help();
256
257                 case ARG_VERSION:
258                         puts(PACKAGE_STRING);
259                         puts(SYSTEMD_FEATURES);
260                         return 0;
261
262                 case 'D':
263                         free(arg_directory);
264                         arg_directory = canonicalize_file_name(optarg);
265                         if (!arg_directory) {
266                                 log_error("Invalid root directory: %m");
267                                 return -ENOMEM;
268                         }
269
270                         break;
271
272                 case 'u':
273                         free(arg_user);
274                         arg_user = strdup(optarg);
275                         if (!arg_user)
276                                 return log_oom();
277
278                         break;
279
280                 case ARG_NETWORK_BRIDGE:
281                         arg_network_bridge = strdup(optarg);
282                         if (!arg_network_bridge)
283                                 return log_oom();
284
285                         /* fall through */
286
287                 case ARG_NETWORK_VETH:
288                         arg_network_veth = true;
289                         arg_private_network = true;
290                         break;
291
292                 case ARG_NETWORK_INTERFACE:
293                         if (strv_push(&arg_network_interfaces, optarg) < 0)
294                                 return log_oom();
295
296                         /* fall through */
297
298                 case ARG_PRIVATE_NETWORK:
299                         arg_private_network = true;
300                         break;
301
302                 case 'b':
303                         arg_boot = true;
304                         break;
305
306                 case ARG_UUID:
307                         r = sd_id128_from_string(optarg, &arg_uuid);
308                         if (r < 0) {
309                                 log_error("Invalid UUID: %s", optarg);
310                                 return r;
311                         }
312                         break;
313
314                 case 'S':
315                         arg_slice = strdup(optarg);
316                         if (!arg_slice)
317                                 return log_oom();
318
319                         break;
320
321                 case 'M':
322                         if (isempty(optarg)) {
323                                 free(arg_machine);
324                                 arg_machine = NULL;
325                         } else {
326
327                                 if (!hostname_is_valid(optarg)) {
328                                         log_error("Invalid machine name: %s", optarg);
329                                         return -EINVAL;
330                                 }
331
332                                 free(arg_machine);
333                                 arg_machine = strdup(optarg);
334                                 if (!arg_machine)
335                                         return log_oom();
336
337                                 break;
338                         }
339
340                 case 'Z':
341                         arg_selinux_context = optarg;
342                         break;
343
344                 case 'L':
345                         arg_selinux_apifs_context = optarg;
346                         break;
347
348                 case ARG_READ_ONLY:
349                         arg_read_only = true;
350                         break;
351
352                 case ARG_CAPABILITY:
353                 case ARG_DROP_CAPABILITY: {
354                         char *state, *word;
355                         size_t length;
356
357                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
358                                 _cleanup_free_ char *t;
359                                 cap_value_t cap;
360
361                                 t = strndup(word, length);
362                                 if (!t)
363                                         return log_oom();
364
365                                 if (streq(t, "all")) {
366                                         if (c == ARG_CAPABILITY)
367                                                 plus = (uint64_t) -1;
368                                         else
369                                                 minus = (uint64_t) -1;
370                                 } else {
371                                         if (cap_from_name(t, &cap) < 0) {
372                                                 log_error("Failed to parse capability %s.", t);
373                                                 return -EINVAL;
374                                         }
375
376                                         if (c == ARG_CAPABILITY)
377                                                 plus |= 1ULL << (uint64_t) cap;
378                                         else
379                                                 minus |= 1ULL << (uint64_t) cap;
380                                 }
381                         }
382
383                         break;
384                 }
385
386                 case 'j':
387                         arg_link_journal = LINK_GUEST;
388                         break;
389
390                 case ARG_LINK_JOURNAL:
391                         if (streq(optarg, "auto"))
392                                 arg_link_journal = LINK_AUTO;
393                         else if (streq(optarg, "no"))
394                                 arg_link_journal = LINK_NO;
395                         else if (streq(optarg, "guest"))
396                                 arg_link_journal = LINK_GUEST;
397                         else if (streq(optarg, "host"))
398                                 arg_link_journal = LINK_HOST;
399                         else {
400                                 log_error("Failed to parse link journal mode %s", optarg);
401                                 return -EINVAL;
402                         }
403
404                         break;
405
406                 case ARG_BIND:
407                 case ARG_BIND_RO: {
408                         _cleanup_free_ char *a = NULL, *b = NULL;
409                         char *e;
410                         char ***x;
411
412                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
413
414                         e = strchr(optarg, ':');
415                         if (e) {
416                                 a = strndup(optarg, e - optarg);
417                                 b = strdup(e + 1);
418                         } else {
419                                 a = strdup(optarg);
420                                 b = strdup(optarg);
421                         }
422
423                         if (!a || !b)
424                                 return log_oom();
425
426                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
427                                 log_error("Invalid bind mount specification: %s", optarg);
428                                 return -EINVAL;
429                         }
430
431                         r = strv_extend(x, a);
432                         if (r < 0)
433                                 return log_oom();
434
435                         r = strv_extend(x, b);
436                         if (r < 0)
437                                 return log_oom();
438
439                         break;
440                 }
441
442                 case ARG_SETENV: {
443                         char **n;
444
445                         if (!env_assignment_is_valid(optarg)) {
446                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
447                                 return -EINVAL;
448                         }
449
450                         n = strv_env_set(arg_setenv, optarg);
451                         if (!n)
452                                 return log_oom();
453
454                         strv_free(arg_setenv);
455                         arg_setenv = n;
456                         break;
457                 }
458
459                 case 'q':
460                         arg_quiet = true;
461                         break;
462
463                 case ARG_SHARE_SYSTEM:
464                         arg_share_system = true;
465                         break;
466
467                 case ARG_REGISTER:
468                         r = parse_boolean(optarg);
469                         if (r < 0) {
470                                 log_error("Failed to parse --register= argument: %s", optarg);
471                                 return r;
472                         }
473
474                         arg_register = r;
475                         break;
476
477                 case ARG_KEEP_UNIT:
478                         arg_keep_unit = true;
479                         break;
480
481                 case ARG_PERSONALITY:
482
483                         arg_personality = parse_personality(optarg);
484                         if (arg_personality == 0xffffffffLU) {
485                                 log_error("Unknown or unsupported personality '%s'.", optarg);
486                                 return -EINVAL;
487                         }
488
489                         break;
490
491                 case '?':
492                         return -EINVAL;
493
494                 default:
495                         assert_not_reached("Unhandled option");
496                 }
497         }
498
499         if (arg_share_system)
500                 arg_register = false;
501
502         if (arg_boot && arg_share_system) {
503                 log_error("--boot and --share-system may not be combined.");
504                 return -EINVAL;
505         }
506
507         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
508                 log_error("--keep-unit may not be used when invoked from a user session.");
509                 return -EINVAL;
510         }
511
512         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
513
514         return 1;
515 }
516
517 static int mount_all(const char *dest) {
518
519         typedef struct MountPoint {
520                 const char *what;
521                 const char *where;
522                 const char *type;
523                 const char *options;
524                 unsigned long flags;
525                 bool fatal;
526         } MountPoint;
527
528         static const MountPoint mount_table[] = {
529                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
530                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
531                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
532                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
533                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
534                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
535                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
536                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
537 #ifdef HAVE_SELINUX
538                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
539                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
540 #endif
541         };
542
543         unsigned k;
544         int r = 0;
545
546         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
547                 _cleanup_free_ char *where = NULL;
548 #ifdef HAVE_SELINUX
549                 _cleanup_free_ char *options = NULL;
550 #endif
551                 const char *o;
552                 int t;
553
554                 where = strjoin(dest, "/", mount_table[k].where, NULL);
555                 if (!where)
556                         return log_oom();
557
558                 t = path_is_mount_point(where, true);
559                 if (t < 0) {
560                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
561
562                         if (r == 0)
563                                 r = t;
564
565                         continue;
566                 }
567
568                 /* Skip this entry if it is not a remount. */
569                 if (mount_table[k].what && t > 0)
570                         continue;
571
572                 mkdir_p(where, 0755);
573
574 #ifdef HAVE_SELINUX
575                 if (arg_selinux_apifs_context &&
576                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
577                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
578                         if (!options)
579                                 return log_oom();
580
581                         o = options;
582                 } else
583 #endif
584                         o = mount_table[k].options;
585
586
587                 if (mount(mount_table[k].what,
588                           where,
589                           mount_table[k].type,
590                           mount_table[k].flags,
591                           o) < 0 &&
592                     mount_table[k].fatal) {
593
594                         log_error("mount(%s) failed: %m", where);
595
596                         if (r == 0)
597                                 r = -errno;
598                 }
599         }
600
601         return r;
602 }
603
604 static int mount_binds(const char *dest, char **l, unsigned long flags) {
605         char **x, **y;
606
607         STRV_FOREACH_PAIR(x, y, l) {
608                 char *where;
609                 struct stat source_st, dest_st;
610                 int r;
611
612                 if (stat(*x, &source_st) < 0) {
613                         log_error("failed to stat %s: %m", *x);
614                         return -errno;
615                 }
616
617                 where = strappenda(dest, *y);
618                 r = stat(where, &dest_st);
619                 if (r == 0) {
620                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
621                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
622                                                 *x, where);
623                                 return -EINVAL;
624                         }
625                 } else if (errno == ENOENT) {
626                         r = mkdir_parents_label(where, 0755);
627                         if (r < 0) {
628                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
629                                 return r;
630                         }
631                 } else {
632                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
633                         return -errno;
634                 }
635                 /* Create the mount point, but be conservative -- refuse to create block
636                 * and char devices. */
637                 if (S_ISDIR(source_st.st_mode))
638                         mkdir_label(where, 0755);
639                 else if (S_ISFIFO(source_st.st_mode))
640                         mkfifo(where, 0644);
641                 else if (S_ISSOCK(source_st.st_mode))
642                         mknod(where, 0644 | S_IFSOCK, 0);
643                 else if (S_ISREG(source_st.st_mode))
644                         touch(where);
645                 else {
646                         log_error("Refusing to create mountpoint for file: %s", *x);
647                         return -ENOTSUP;
648                 }
649
650                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
651                         log_error("mount(%s) failed: %m", where);
652                         return -errno;
653                 }
654
655                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
656                         log_error("mount(%s) failed: %m", where);
657                         return -errno;
658                 }
659         }
660
661         return 0;
662 }
663
664 static int setup_timezone(const char *dest) {
665         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
666         char *z, *y;
667         int r;
668
669         assert(dest);
670
671         /* Fix the timezone, if possible */
672         r = readlink_malloc("/etc/localtime", &p);
673         if (r < 0) {
674                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
675                 return 0;
676         }
677
678         z = path_startswith(p, "../usr/share/zoneinfo/");
679         if (!z)
680                 z = path_startswith(p, "/usr/share/zoneinfo/");
681         if (!z) {
682                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
683                 return 0;
684         }
685
686         where = strappend(dest, "/etc/localtime");
687         if (!where)
688                 return log_oom();
689
690         r = readlink_malloc(where, &q);
691         if (r >= 0) {
692                 y = path_startswith(q, "../usr/share/zoneinfo/");
693                 if (!y)
694                         y = path_startswith(q, "/usr/share/zoneinfo/");
695
696
697                 /* Already pointing to the right place? Then do nothing .. */
698                 if (y && streq(y, z))
699                         return 0;
700         }
701
702         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
703         if (!check)
704                 return log_oom();
705
706         if (access(check, F_OK) < 0) {
707                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
708                 return 0;
709         }
710
711         what = strappend("../usr/share/zoneinfo/", z);
712         if (!what)
713                 return log_oom();
714
715         unlink(where);
716         if (symlink(what, where) < 0) {
717                 log_error("Failed to correct timezone of container: %m");
718                 return 0;
719         }
720
721         return 0;
722 }
723
724 static int setup_resolv_conf(const char *dest) {
725         char _cleanup_free_ *where = NULL;
726
727         assert(dest);
728
729         if (arg_private_network)
730                 return 0;
731
732         /* Fix resolv.conf, if possible */
733         where = strappend(dest, "/etc/resolv.conf");
734         if (!where)
735                 return log_oom();
736
737         /* We don't really care for the results of this really. If it
738          * fails, it fails, but meh... */
739         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
740
741         return 0;
742 }
743
744 static int setup_boot_id(const char *dest) {
745         _cleanup_free_ char *from = NULL, *to = NULL;
746         sd_id128_t rnd;
747         char as_uuid[37];
748         int r;
749
750         assert(dest);
751
752         if (arg_share_system)
753                 return 0;
754
755         /* Generate a new randomized boot ID, so that each boot-up of
756          * the container gets a new one */
757
758         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
759         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
760         if (!from || !to)
761                 return log_oom();
762
763         r = sd_id128_randomize(&rnd);
764         if (r < 0) {
765                 log_error("Failed to generate random boot id: %s", strerror(-r));
766                 return r;
767         }
768
769         snprintf(as_uuid, sizeof(as_uuid),
770                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
771                  SD_ID128_FORMAT_VAL(rnd));
772         char_array_0(as_uuid);
773
774         r = write_string_file(from, as_uuid);
775         if (r < 0) {
776                 log_error("Failed to write boot id: %s", strerror(-r));
777                 return r;
778         }
779
780         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
781                 log_error("Failed to bind mount boot id: %m");
782                 r = -errno;
783         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
784                 log_warning("Failed to make boot id read-only: %m");
785
786         unlink(from);
787         return r;
788 }
789
790 static int copy_devnodes(const char *dest) {
791
792         static const char devnodes[] =
793                 "null\0"
794                 "zero\0"
795                 "full\0"
796                 "random\0"
797                 "urandom\0"
798                 "tty\0";
799
800         const char *d;
801         int r = 0;
802         _cleanup_umask_ mode_t u;
803
804         assert(dest);
805
806         u = umask(0000);
807
808         NULSTR_FOREACH(d, devnodes) {
809                 _cleanup_free_ char *from = NULL, *to = NULL;
810                 struct stat st;
811
812                 from = strappend("/dev/", d);
813                 to = strjoin(dest, "/dev/", d, NULL);
814                 if (!from || !to)
815                         return log_oom();
816
817                 if (stat(from, &st) < 0) {
818
819                         if (errno != ENOENT) {
820                                 log_error("Failed to stat %s: %m", from);
821                                 return -errno;
822                         }
823
824                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
825
826                         log_error("%s is not a char or block device, cannot copy", from);
827                         return -EIO;
828
829                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
830
831                         log_error("mknod(%s) failed: %m", dest);
832                         return  -errno;
833                 }
834         }
835
836         return r;
837 }
838
839 static int setup_ptmx(const char *dest) {
840         _cleanup_free_ char *p = NULL;
841
842         p = strappend(dest, "/dev/ptmx");
843         if (!p)
844                 return log_oom();
845
846         if (symlink("pts/ptmx", p) < 0) {
847                 log_error("Failed to create /dev/ptmx symlink: %m");
848                 return -errno;
849         }
850
851         return 0;
852 }
853
854 static int setup_dev_console(const char *dest, const char *console) {
855         struct stat st;
856         _cleanup_free_ char *to = NULL;
857         int r;
858         _cleanup_umask_ mode_t u;
859
860         assert(dest);
861         assert(console);
862
863         u = umask(0000);
864
865         if (stat(console, &st) < 0) {
866                 log_error("Failed to stat %s: %m", console);
867                 return -errno;
868
869         } else if (!S_ISCHR(st.st_mode)) {
870                 log_error("/dev/console is not a char device");
871                 return -EIO;
872         }
873
874         r = chmod_and_chown(console, 0600, 0, 0);
875         if (r < 0) {
876                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
877                 return r;
878         }
879
880         if (asprintf(&to, "%s/dev/console", dest) < 0)
881                 return log_oom();
882
883         /* We need to bind mount the right tty to /dev/console since
884          * ptys can only exist on pts file systems. To have something
885          * to bind mount things on we create a device node first, that
886          * has the right major/minor (note that the major minor
887          * doesn't actually matter here, since we mount it over
888          * anyway). */
889
890         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
891                 log_error("mknod() for /dev/console failed: %m");
892                 return -errno;
893         }
894
895         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
896                 log_error("Bind mount for /dev/console failed: %m");
897                 return -errno;
898         }
899
900         return 0;
901 }
902
903 static int setup_kmsg(const char *dest, int kmsg_socket) {
904         _cleanup_free_ char *from = NULL, *to = NULL;
905         int r, fd, k;
906         _cleanup_umask_ mode_t u;
907         union {
908                 struct cmsghdr cmsghdr;
909                 uint8_t buf[CMSG_SPACE(sizeof(int))];
910         } control = {};
911         struct msghdr mh = {
912                 .msg_control = &control,
913                 .msg_controllen = sizeof(control),
914         };
915         struct cmsghdr *cmsg;
916
917         assert(dest);
918         assert(kmsg_socket >= 0);
919
920         u = umask(0000);
921
922         /* We create the kmsg FIFO as /dev/kmsg, but immediately
923          * delete it after bind mounting it to /proc/kmsg. While FIFOs
924          * on the reading side behave very similar to /proc/kmsg,
925          * their writing side behaves differently from /dev/kmsg in
926          * that writing blocks when nothing is reading. In order to
927          * avoid any problems with containers deadlocking due to this
928          * we simply make /dev/kmsg unavailable to the container. */
929         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
930             asprintf(&to, "%s/proc/kmsg", dest) < 0)
931                 return log_oom();
932
933         if (mkfifo(from, 0600) < 0) {
934                 log_error("mkfifo() for /dev/kmsg failed: %m");
935                 return -errno;
936         }
937
938         r = chmod_and_chown(from, 0600, 0, 0);
939         if (r < 0) {
940                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
941                 return r;
942         }
943
944         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
945                 log_error("Bind mount for /proc/kmsg failed: %m");
946                 return -errno;
947         }
948
949         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
950         if (fd < 0) {
951                 log_error("Failed to open fifo: %m");
952                 return -errno;
953         }
954
955         cmsg = CMSG_FIRSTHDR(&mh);
956         cmsg->cmsg_level = SOL_SOCKET;
957         cmsg->cmsg_type = SCM_RIGHTS;
958         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
959         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
960
961         mh.msg_controllen = cmsg->cmsg_len;
962
963         /* Store away the fd in the socket, so that it stays open as
964          * long as we run the child */
965         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
966         close_nointr_nofail(fd);
967
968         if (k < 0) {
969                 log_error("Failed to send FIFO fd: %m");
970                 return -errno;
971         }
972
973         /* And now make the FIFO unavailable as /dev/kmsg... */
974         unlink(from);
975         return 0;
976 }
977
978 static int setup_hostname(void) {
979
980         if (arg_share_system)
981                 return 0;
982
983         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
984                 return -errno;
985
986         return 0;
987 }
988
989 static int setup_journal(const char *directory) {
990         sd_id128_t machine_id, this_id;
991         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
992         char *id;
993         int r;
994
995         p = strappend(directory, "/etc/machine-id");
996         if (!p)
997                 return log_oom();
998
999         r = read_one_line_file(p, &b);
1000         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1001                 return 0;
1002         else if (r < 0) {
1003                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1004                 return r;
1005         }
1006
1007         id = strstrip(b);
1008         if (isempty(id) && arg_link_journal == LINK_AUTO)
1009                 return 0;
1010
1011         /* Verify validity */
1012         r = sd_id128_from_string(id, &machine_id);
1013         if (r < 0) {
1014                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1015                 return r;
1016         }
1017
1018         r = sd_id128_get_machine(&this_id);
1019         if (r < 0) {
1020                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1021                 return r;
1022         }
1023
1024         if (sd_id128_equal(machine_id, this_id)) {
1025                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1026                          "Host and machine ids are equal (%s): refusing to link journals", id);
1027                 if (arg_link_journal == LINK_AUTO)
1028                         return 0;
1029                 return
1030                         -EEXIST;
1031         }
1032
1033         if (arg_link_journal == LINK_NO)
1034                 return 0;
1035
1036         free(p);
1037         p = strappend("/var/log/journal/", id);
1038         q = strjoin(directory, "/var/log/journal/", id, NULL);
1039         if (!p || !q)
1040                 return log_oom();
1041
1042         if (path_is_mount_point(p, false) > 0) {
1043                 if (arg_link_journal != LINK_AUTO) {
1044                         log_error("%s: already a mount point, refusing to use for journal", p);
1045                         return -EEXIST;
1046                 }
1047
1048                 return 0;
1049         }
1050
1051         if (path_is_mount_point(q, false) > 0) {
1052                 if (arg_link_journal != LINK_AUTO) {
1053                         log_error("%s: already a mount point, refusing to use for journal", q);
1054                         return -EEXIST;
1055                 }
1056
1057                 return 0;
1058         }
1059
1060         r = readlink_and_make_absolute(p, &d);
1061         if (r >= 0) {
1062                 if ((arg_link_journal == LINK_GUEST ||
1063                      arg_link_journal == LINK_AUTO) &&
1064                     path_equal(d, q)) {
1065
1066                         r = mkdir_p(q, 0755);
1067                         if (r < 0)
1068                                 log_warning("failed to create directory %s: %m", q);
1069                         return 0;
1070                 }
1071
1072                 if (unlink(p) < 0) {
1073                         log_error("Failed to remove symlink %s: %m", p);
1074                         return -errno;
1075                 }
1076         } else if (r == -EINVAL) {
1077
1078                 if (arg_link_journal == LINK_GUEST &&
1079                     rmdir(p) < 0) {
1080
1081                         if (errno == ENOTDIR) {
1082                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1083                                 return r;
1084                         } else {
1085                                 log_error("Failed to remove %s: %m", p);
1086                                 return -errno;
1087                         }
1088                 }
1089         } else if (r != -ENOENT) {
1090                 log_error("readlink(%s) failed: %m", p);
1091                 return r;
1092         }
1093
1094         if (arg_link_journal == LINK_GUEST) {
1095
1096                 if (symlink(q, p) < 0) {
1097                         log_error("Failed to symlink %s to %s: %m", q, p);
1098                         return -errno;
1099                 }
1100
1101                 r = mkdir_p(q, 0755);
1102                 if (r < 0)
1103                         log_warning("failed to create directory %s: %m", q);
1104                 return 0;
1105         }
1106
1107         if (arg_link_journal == LINK_HOST) {
1108                 r = mkdir_p(p, 0755);
1109                 if (r < 0) {
1110                         log_error("Failed to create %s: %m", p);
1111                         return r;
1112                 }
1113
1114         } else if (access(p, F_OK) < 0)
1115                 return 0;
1116
1117         if (dir_is_empty(q) == 0) {
1118                 log_error("%s not empty.", q);
1119                 return -ENOTEMPTY;
1120         }
1121
1122         r = mkdir_p(q, 0755);
1123         if (r < 0) {
1124                 log_error("Failed to create %s: %m", q);
1125                 return r;
1126         }
1127
1128         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1129                 log_error("Failed to bind mount journal from host into guest: %m");
1130                 return -errno;
1131         }
1132
1133         return 0;
1134 }
1135
1136 static int setup_kdbus(const char *dest, const char *path) {
1137         const char *p;
1138
1139         if (!path)
1140                 return 0;
1141
1142         p = strappenda(dest, "/dev/kdbus");
1143         if (mkdir(p, 0755) < 0) {
1144                 log_error("Failed to create kdbus path: %m");
1145                 return  -errno;
1146         }
1147
1148         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1149                 log_error("Failed to mount kdbus domain path: %m");
1150                 return -errno;
1151         }
1152
1153         return 0;
1154 }
1155
1156 static int drop_capabilities(void) {
1157         return capability_bounding_set_drop(~arg_retain, false);
1158 }
1159
1160 static int register_machine(pid_t pid) {
1161         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1162         _cleanup_bus_unref_ sd_bus *bus = NULL;
1163         int r;
1164
1165         if (!arg_register)
1166                 return 0;
1167
1168         r = sd_bus_default_system(&bus);
1169         if (r < 0) {
1170                 log_error("Failed to open system bus: %s", strerror(-r));
1171                 return r;
1172         }
1173
1174         if (arg_keep_unit) {
1175                 r = sd_bus_call_method(
1176                                 bus,
1177                                 "org.freedesktop.machine1",
1178                                 "/org/freedesktop/machine1",
1179                                 "org.freedesktop.machine1.Manager",
1180                                 "RegisterMachine",
1181                                 &error,
1182                                 NULL,
1183                                 "sayssus",
1184                                 arg_machine,
1185                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1186                                 "nspawn",
1187                                 "container",
1188                                 (uint32_t) pid,
1189                                 strempty(arg_directory));
1190         } else {
1191                 r = sd_bus_call_method(
1192                                 bus,
1193                                 "org.freedesktop.machine1",
1194                                 "/org/freedesktop/machine1",
1195                                 "org.freedesktop.machine1.Manager",
1196                                 "CreateMachine",
1197                                 &error,
1198                                 NULL,
1199                                 "sayssusa(sv)",
1200                                 arg_machine,
1201                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1202                                 "nspawn",
1203                                 "container",
1204                                 (uint32_t) pid,
1205                                 strempty(arg_directory),
1206                                 !isempty(arg_slice), "Slice", "s", arg_slice);
1207         }
1208
1209         if (r < 0) {
1210                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1211                 return r;
1212         }
1213
1214         return 0;
1215 }
1216
1217 static int terminate_machine(pid_t pid) {
1218         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1219         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1220         _cleanup_bus_unref_ sd_bus *bus = NULL;
1221         const char *path;
1222         int r;
1223
1224         if (!arg_register)
1225                 return 0;
1226
1227         r = sd_bus_default_system(&bus);
1228         if (r < 0) {
1229                 log_error("Failed to open system bus: %s", strerror(-r));
1230                 return r;
1231         }
1232
1233         r = sd_bus_call_method(
1234                         bus,
1235                         "org.freedesktop.machine1",
1236                         "/org/freedesktop/machine1",
1237                         "org.freedesktop.machine1.Manager",
1238                         "GetMachineByPID",
1239                         &error,
1240                         &reply,
1241                         "u",
1242                         (uint32_t) pid);
1243         if (r < 0) {
1244                 /* Note that the machine might already have been
1245                  * cleaned up automatically, hence don't consider it a
1246                  * failure if we cannot get the machine object. */
1247                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1248                 return 0;
1249         }
1250
1251         r = sd_bus_message_read(reply, "o", &path);
1252         if (r < 0)
1253                 return bus_log_parse_error(r);
1254
1255         r = sd_bus_call_method(
1256                         bus,
1257                         "org.freedesktop.machine1",
1258                         path,
1259                         "org.freedesktop.machine1.Machine",
1260                         "Terminate",
1261                         &error,
1262                         NULL,
1263                         NULL);
1264         if (r < 0) {
1265                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1266                 return 0;
1267         }
1268
1269         return 0;
1270 }
1271
1272 static int reset_audit_loginuid(void) {
1273         _cleanup_free_ char *p = NULL;
1274         int r;
1275
1276         if (arg_share_system)
1277                 return 0;
1278
1279         r = read_one_line_file("/proc/self/loginuid", &p);
1280         if (r == -EEXIST)
1281                 return 0;
1282         if (r < 0) {
1283                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1284                 return r;
1285         }
1286
1287         /* Already reset? */
1288         if (streq(p, "4294967295"))
1289                 return 0;
1290
1291         r = write_string_file("/proc/self/loginuid", "4294967295");
1292         if (r < 0) {
1293                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1294                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1295                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1296                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1297                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1298
1299                 sleep(5);
1300         }
1301
1302         return 0;
1303 }
1304
1305 static int setup_veth(pid_t pid, char iface_name[]) {
1306         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1307         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1308         int r;
1309
1310         if (!arg_private_network)
1311                 return 0;
1312
1313         if (!arg_network_veth)
1314                 return 0;
1315
1316         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1317
1318         r = sd_rtnl_open(0, &rtnl);
1319         if (r < 0) {
1320                 log_error("Failed to connect to netlink: %s", strerror(-r));
1321                 return r;
1322         }
1323
1324         r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, 0, &m);
1325         if (r < 0) {
1326                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1327                 return r;
1328         }
1329
1330         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1331         if (r < 0) {
1332                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1333                 return r;
1334         }
1335
1336         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1337         if (r < 0) {
1338                 log_error("Failed to open netlink container: %s", strerror(-r));
1339                 return r;
1340         }
1341
1342         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1343         if (r < 0) {
1344                 log_error("Failed to append netlink kind: %s", strerror(-r));
1345                 return r;
1346         }
1347
1348         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1349         if (r < 0) {
1350                 log_error("Failed to open netlink container: %s", strerror(-r));
1351                 return r;
1352         }
1353
1354         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1355         if (r < 0) {
1356                 log_error("Failed to open netlink container: %s", strerror(-r));
1357                 return r;
1358         }
1359
1360         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1361         if (r < 0) {
1362                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1363                 return r;
1364         }
1365
1366         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1367         if (r < 0) {
1368                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1369                 return r;
1370         }
1371
1372         r = sd_rtnl_message_close_container(m);
1373         if (r < 0) {
1374                 log_error("Failed to close netlink container: %s", strerror(-r));
1375                 return r;
1376         }
1377
1378         r = sd_rtnl_message_close_container(m);
1379         if (r < 0) {
1380                 log_error("Failed to close netlink container: %s", strerror(-r));
1381                 return r;
1382         }
1383
1384         r = sd_rtnl_message_close_container(m);
1385         if (r < 0) {
1386                 log_error("Failed to close netlink container: %s", strerror(-r));
1387                 return r;
1388         }
1389
1390         r = sd_rtnl_call(rtnl, m, 0, NULL);
1391         if (r < 0) {
1392                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1393                 return r;
1394         }
1395
1396         return 0;
1397 }
1398
1399 static int setup_bridge(const char veth_name[]) {
1400         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1401         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1402         int r, bridge;
1403
1404         if (!arg_private_network)
1405                 return 0;
1406
1407         if (!arg_network_veth)
1408                 return 0;
1409
1410         if (!arg_network_bridge)
1411                 return 0;
1412
1413         bridge = (int) if_nametoindex(arg_network_bridge);
1414         if (bridge <= 0) {
1415                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1416                 return -errno;
1417         }
1418
1419         r = sd_rtnl_open(0, &rtnl);
1420         if (r < 0) {
1421                 log_error("Failed to connect to netlink: %s", strerror(-r));
1422                 return r;
1423         }
1424
1425         r = sd_rtnl_message_new_link(rtnl, RTM_SETLINK, 0, &m);
1426         if (r < 0) {
1427                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1428                 return r;
1429         }
1430
1431         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1432         if (r < 0) {
1433                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1434                 return r;
1435         }
1436
1437         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1438         if (r < 0) {
1439                 log_error("Failed to add netlink master field: %s", strerror(-r));
1440                 return r;
1441         }
1442
1443         r = sd_rtnl_call(rtnl, m, 0, NULL);
1444         if (r < 0) {
1445                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1446                 return r;
1447         }
1448
1449         return 0;
1450 }
1451
1452 static int move_network_interfaces(pid_t pid) {
1453         _cleanup_udev_unref_ struct udev *udev = NULL;
1454         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1455         char **i;
1456         int r;
1457
1458         if (!arg_private_network)
1459                 return 0;
1460
1461         if (strv_isempty(arg_network_interfaces))
1462                 return 0;
1463
1464         r = sd_rtnl_open(0, &rtnl);
1465         if (r < 0) {
1466                 log_error("Failed to connect to netlink: %s", strerror(-r));
1467                 return r;
1468         }
1469
1470         udev = udev_new();
1471         if (!udev) {
1472                 log_error("Failed to connect to udev.");
1473                 return -ENOMEM;
1474         }
1475
1476         STRV_FOREACH(i, arg_network_interfaces) {
1477                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1478                 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1479                 char ifi_str[2 + DECIMAL_STR_MAX(int)];
1480                 int ifi;
1481
1482                 ifi = (int) if_nametoindex(*i);
1483                 if (ifi <= 0) {
1484                         log_error("Failed to resolve interface %s: %m", *i);
1485                         return -errno;
1486                 }
1487
1488                 sprintf(ifi_str, "n%i", ifi);
1489                 d = udev_device_new_from_device_id(udev, ifi_str);
1490                 if (!d) {
1491                         log_error("Failed to get udev device for interface %s: %m", *i);
1492                         return -errno;
1493                 }
1494
1495                 if (udev_device_get_is_initialized(d) <= 0) {
1496                         log_error("Network interface %s is not initialized yet.", *i);
1497                         return -EBUSY;
1498                 }
1499
1500                 r = sd_rtnl_message_new_link(rtnl, RTM_NEWLINK, ifi, &m);
1501                 if (r < 0) {
1502                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1503                         return r;
1504                 }
1505
1506                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1507                 if (r < 0) {
1508                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1509                         return r;
1510                 }
1511
1512                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1513                 if (r < 0) {
1514                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1515                         return r;
1516                 }
1517         }
1518
1519         return 0;
1520 }
1521
1522 static int audit_still_doesnt_work_in_containers(void) {
1523
1524 #ifdef HAVE_SECCOMP
1525         scmp_filter_ctx seccomp;
1526         int r;
1527
1528         /*
1529            Audit is broken in containers, much of the userspace audit
1530            hookup will fail if running inside a container. We don't
1531            care and just turn off creation of audit sockets.
1532
1533            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1534            with EAFNOSUPPORT which audit userspace uses as indication
1535            that audit is disabled in the kernel.
1536          */
1537
1538         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1539         if (!seccomp)
1540                 return log_oom();
1541
1542         r = seccomp_add_secondary_archs(seccomp);
1543         if (r < 0 && r != -EEXIST) {
1544                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1545                 goto finish;
1546         }
1547
1548         r = seccomp_rule_add(
1549                         seccomp,
1550                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1551                         SCMP_SYS(socket),
1552                         2,
1553                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1554                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1555         if (r < 0) {
1556                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1557                 goto finish;
1558         }
1559
1560         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1561         if (r < 0) {
1562                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1563                 goto finish;
1564         }
1565
1566         r = seccomp_load(seccomp);
1567         if (r < 0)
1568                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1569
1570 finish:
1571         seccomp_release(seccomp);
1572         return r;
1573 #else
1574         return 0;
1575 #endif
1576
1577 }
1578
1579 int main(int argc, char *argv[]) {
1580
1581         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1;
1582         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1583         _cleanup_free_ char *kdbus_domain = NULL;
1584         _cleanup_fdset_free_ FDSet *fds = NULL;
1585         const char *console = NULL;
1586         int r = EXIT_FAILURE, k;
1587         int n_fd_passed;
1588         pid_t pid = 0;
1589         sigset_t mask;
1590         char veth_name[IFNAMSIZ] = "ve-";
1591
1592         log_parse_environment();
1593         log_open();
1594
1595         k = parse_argv(argc, argv);
1596         if (k < 0)
1597                 goto finish;
1598         else if (k == 0) {
1599                 r = EXIT_SUCCESS;
1600                 goto finish;
1601         }
1602
1603         if (arg_directory) {
1604                 char *p;
1605
1606                 p = path_make_absolute_cwd(arg_directory);
1607                 free(arg_directory);
1608                 arg_directory = p;
1609         } else
1610                 arg_directory = get_current_dir_name();
1611
1612         if (!arg_directory) {
1613                 log_error("Failed to determine path, please use -D.");
1614                 goto finish;
1615         }
1616
1617         path_kill_slashes(arg_directory);
1618
1619         if (!arg_machine) {
1620                 arg_machine = strdup(basename(arg_directory));
1621                 if (!arg_machine) {
1622                         log_oom();
1623                         goto finish;
1624                 }
1625
1626                 hostname_cleanup(arg_machine, false);
1627                 if (isempty(arg_machine)) {
1628                         log_error("Failed to determine machine name automatically, please use -M.");
1629                         goto finish;
1630                 }
1631         }
1632
1633         if (geteuid() != 0) {
1634                 log_error("Need to be root.");
1635                 goto finish;
1636         }
1637
1638         if (sd_booted() <= 0) {
1639                 log_error("Not running on a systemd system.");
1640                 goto finish;
1641         }
1642
1643         if (path_equal(arg_directory, "/")) {
1644                 log_error("Spawning container on root directory not supported.");
1645                 goto finish;
1646         }
1647
1648         if (arg_boot) {
1649                 if (path_is_os_tree(arg_directory) <= 0) {
1650                         log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1651                         goto finish;
1652                 }
1653         } else {
1654                 const char *p;
1655
1656                 p = strappenda(arg_directory,
1657                                argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
1658                 if (access(p, F_OK) < 0) {
1659                         log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
1660                         goto finish;
1661
1662                 }
1663         }
1664
1665         log_close();
1666         n_fd_passed = sd_listen_fds(false);
1667         if (n_fd_passed > 0) {
1668                 k = fdset_new_listen_fds(&fds, false);
1669                 if (k < 0) {
1670                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1671                         goto finish;
1672                 }
1673         }
1674         fdset_close_others(fds);
1675         log_open();
1676
1677         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1678         if (master < 0) {
1679                 log_error("Failed to acquire pseudo tty: %m");
1680                 goto finish;
1681         }
1682
1683         console = ptsname(master);
1684         if (!console) {
1685                 log_error("Failed to determine tty name: %m");
1686                 goto finish;
1687         }
1688
1689         if (!arg_quiet)
1690                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1691
1692         if (unlockpt(master) < 0) {
1693                 log_error("Failed to unlock tty: %m");
1694                 goto finish;
1695         }
1696
1697         if (access("/dev/kdbus/control", F_OK) >= 0) {
1698
1699                 if (arg_share_system) {
1700                         kdbus_domain = strdup("/dev/kdbus");
1701                         if (!kdbus_domain) {
1702                                 log_oom();
1703                                 goto finish;
1704                         }
1705                 } else {
1706                         const char *ns;
1707
1708                         ns = strappenda("machine-", arg_machine);
1709                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
1710                         if (r < 0)
1711                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
1712                         else
1713                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
1714                 }
1715         }
1716
1717         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1718                 log_error("Failed to create kmsg socket pair: %m");
1719                 goto finish;
1720         }
1721
1722         sd_notify(0, "READY=1");
1723
1724         assert_se(sigemptyset(&mask) == 0);
1725         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1726         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1727
1728         for (;;) {
1729                 siginfo_t status;
1730
1731                 sync_fd = eventfd(0, EFD_CLOEXEC);
1732                 if (sync_fd < 0) {
1733                         log_error("Failed to create event fd: %m");
1734                         goto finish;
1735                 }
1736
1737                 pid = syscall(__NR_clone,
1738                               SIGCHLD|CLONE_NEWNS|
1739                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
1740                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
1741                 if (pid < 0) {
1742                         if (errno == EINVAL)
1743                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1744                         else
1745                                 log_error("clone() failed: %m");
1746
1747                         goto finish;
1748                 }
1749
1750                 if (pid == 0) {
1751                         /* child */
1752                         const char *home = NULL;
1753                         uid_t uid = (uid_t) -1;
1754                         gid_t gid = (gid_t) -1;
1755                         unsigned n_env = 2;
1756                         const char *envp[] = {
1757                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
1758                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1759                                 NULL, /* TERM */
1760                                 NULL, /* HOME */
1761                                 NULL, /* USER */
1762                                 NULL, /* LOGNAME */
1763                                 NULL, /* container_uuid */
1764                                 NULL, /* LISTEN_FDS */
1765                                 NULL, /* LISTEN_PID */
1766                                 NULL
1767                         };
1768                         char **env_use;
1769                         eventfd_t x;
1770
1771                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1772                         if (envp[n_env])
1773                                 n_env ++;
1774
1775                         close_nointr_nofail(master);
1776                         master = -1;
1777
1778                         close_nointr(STDIN_FILENO);
1779                         close_nointr(STDOUT_FILENO);
1780                         close_nointr(STDERR_FILENO);
1781
1782                         close_nointr_nofail(kmsg_socket_pair[0]);
1783                         kmsg_socket_pair[0] = -1;
1784
1785                         reset_all_signal_handlers();
1786
1787                         assert_se(sigemptyset(&mask) == 0);
1788                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1789
1790                         k = open_terminal(console, O_RDWR);
1791                         if (k != STDIN_FILENO) {
1792                                 if (k >= 0) {
1793                                         close_nointr_nofail(k);
1794                                         k = -EINVAL;
1795                                 }
1796
1797                                 log_error("Failed to open console: %s", strerror(-k));
1798                                 goto child_fail;
1799                         }
1800
1801                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1802                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1803                                 log_error("Failed to duplicate console: %m");
1804                                 goto child_fail;
1805                         }
1806
1807                         if (setsid() < 0) {
1808                                 log_error("setsid() failed: %m");
1809                                 goto child_fail;
1810                         }
1811
1812                         if (reset_audit_loginuid() < 0)
1813                                 goto child_fail;
1814
1815                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1816                                 log_error("PR_SET_PDEATHSIG failed: %m");
1817                                 goto child_fail;
1818                         }
1819
1820                         /* Mark everything as slave, so that we still
1821                          * receive mounts from the real root, but don't
1822                          * propagate mounts to the real root. */
1823                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1824                                 log_error("MS_SLAVE|MS_REC failed: %m");
1825                                 goto child_fail;
1826                         }
1827
1828                         /* Turn directory into bind mount */
1829                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1830                                 log_error("Failed to make bind mount.");
1831                                 goto child_fail;
1832                         }
1833
1834                         if (arg_read_only)
1835                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1836                                         log_error("Failed to make read-only.");
1837                                         goto child_fail;
1838                                 }
1839
1840                         if (mount_all(arg_directory) < 0)
1841                                 goto child_fail;
1842
1843                         if (copy_devnodes(arg_directory) < 0)
1844                                 goto child_fail;
1845
1846                         if (setup_ptmx(arg_directory) < 0)
1847                                 goto child_fail;
1848
1849                         dev_setup(arg_directory);
1850
1851                         if (audit_still_doesnt_work_in_containers() < 0)
1852                                 goto child_fail;
1853
1854                         if (setup_dev_console(arg_directory, console) < 0)
1855                                 goto child_fail;
1856
1857                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1858                                 goto child_fail;
1859
1860                         close_nointr_nofail(kmsg_socket_pair[1]);
1861                         kmsg_socket_pair[1] = -1;
1862
1863                         if (setup_boot_id(arg_directory) < 0)
1864                                 goto child_fail;
1865
1866                         if (setup_timezone(arg_directory) < 0)
1867                                 goto child_fail;
1868
1869                         if (setup_resolv_conf(arg_directory) < 0)
1870                                 goto child_fail;
1871
1872                         if (setup_journal(arg_directory) < 0)
1873                                 goto child_fail;
1874
1875                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1876                                 goto child_fail;
1877
1878                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1879                                 goto child_fail;
1880
1881                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
1882                                 goto child_fail;
1883
1884                         if (chdir(arg_directory) < 0) {
1885                                 log_error("chdir(%s) failed: %m", arg_directory);
1886                                 goto child_fail;
1887                         }
1888
1889                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1890                                 log_error("mount(MS_MOVE) failed: %m");
1891                                 goto child_fail;
1892                         }
1893
1894                         if (chroot(".") < 0) {
1895                                 log_error("chroot() failed: %m");
1896                                 goto child_fail;
1897                         }
1898
1899                         if (chdir("/") < 0) {
1900                                 log_error("chdir() failed: %m");
1901                                 goto child_fail;
1902                         }
1903
1904                         umask(0022);
1905
1906                         if (arg_private_network)
1907                                 loopback_setup();
1908
1909                         if (drop_capabilities() < 0) {
1910                                 log_error("drop_capabilities() failed: %m");
1911                                 goto child_fail;
1912                         }
1913
1914                         if (arg_user) {
1915
1916                                 /* Note that this resolves user names
1917                                  * inside the container, and hence
1918                                  * accesses the NSS modules from the
1919                                  * container and not the host. This is
1920                                  * a bit weird... */
1921
1922                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1923                                         log_error("get_user_creds() failed: %m");
1924                                         goto child_fail;
1925                                 }
1926
1927                                 if (mkdir_parents_label(home, 0775) < 0) {
1928                                         log_error("mkdir_parents_label() failed: %m");
1929                                         goto child_fail;
1930                                 }
1931
1932                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1933                                         log_error("mkdir_safe_label() failed: %m");
1934                                         goto child_fail;
1935                                 }
1936
1937                                 if (initgroups((const char*)arg_user, gid) < 0) {
1938                                         log_error("initgroups() failed: %m");
1939                                         goto child_fail;
1940                                 }
1941
1942                                 if (setresgid(gid, gid, gid) < 0) {
1943                                         log_error("setregid() failed: %m");
1944                                         goto child_fail;
1945                                 }
1946
1947                                 if (setresuid(uid, uid, uid) < 0) {
1948                                         log_error("setreuid() failed: %m");
1949                                         goto child_fail;
1950                                 }
1951                         } else {
1952                                 /* Reset everything fully to 0, just in case */
1953
1954                                 if (setgroups(0, NULL) < 0) {
1955                                         log_error("setgroups() failed: %m");
1956                                         goto child_fail;
1957                                 }
1958
1959                                 if (setresgid(0, 0, 0) < 0) {
1960                                         log_error("setregid() failed: %m");
1961                                         goto child_fail;
1962                                 }
1963
1964                                 if (setresuid(0, 0, 0) < 0) {
1965                                         log_error("setreuid() failed: %m");
1966                                         goto child_fail;
1967                                 }
1968                         }
1969
1970                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1971                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1972                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1973                                 log_oom();
1974                                 goto child_fail;
1975                         }
1976
1977                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1978                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1979                                         log_oom();
1980                                         goto child_fail;
1981                                 }
1982                         }
1983
1984                         if (fdset_size(fds) > 0) {
1985                                 k = fdset_cloexec(fds, false);
1986                                 if (k < 0) {
1987                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1988                                         goto child_fail;
1989                                 }
1990
1991                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1992                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1993                                         log_oom();
1994                                         goto child_fail;
1995                                 }
1996                         }
1997
1998                         setup_hostname();
1999
2000                         if (arg_personality != 0xffffffffLU) {
2001                                 if (personality(arg_personality) < 0) {
2002                                         log_error("personality() failed: %m");
2003                                         goto child_fail;
2004                                 }
2005                         }
2006
2007                         eventfd_read(sync_fd, &x);
2008                         close_nointr_nofail(sync_fd);
2009                         sync_fd = -1;
2010
2011                         if (!strv_isempty(arg_setenv)) {
2012                                 char **n;
2013
2014                                 n = strv_env_merge(2, envp, arg_setenv);
2015                                 if (!n) {
2016                                         log_oom();
2017                                         goto child_fail;
2018                                 }
2019
2020                                 env_use = n;
2021                         } else
2022                                 env_use = (char**) envp;
2023
2024 #ifdef HAVE_SELINUX
2025                         if (arg_selinux_context)
2026                                 if (setexeccon(arg_selinux_context) < 0)
2027                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2028 #endif
2029                         if (arg_boot) {
2030                                 char **a;
2031                                 size_t l;
2032
2033                                 /* Automatically search for the init system */
2034
2035                                 l = 1 + argc - optind;
2036                                 a = newa(char*, l + 1);
2037                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2038
2039                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2040                                 execve(a[0], a, env_use);
2041
2042                                 a[0] = (char*) "/lib/systemd/systemd";
2043                                 execve(a[0], a, env_use);
2044
2045                                 a[0] = (char*) "/sbin/init";
2046                                 execve(a[0], a, env_use);
2047                         } else if (argc > optind)
2048                                 execvpe(argv[optind], argv + optind, env_use);
2049                         else {
2050                                 chdir(home ? home : "/root");
2051                                 execle("/bin/bash", "-bash", NULL, env_use);
2052                                 execle("/bin/sh", "-sh", NULL, env_use);
2053                         }
2054
2055                         log_error("execv() failed: %m");
2056
2057                 child_fail:
2058                         _exit(EXIT_FAILURE);
2059                 }
2060
2061                 fdset_free(fds);
2062                 fds = NULL;
2063
2064                 r = register_machine(pid);
2065                 if (r < 0)
2066                         goto finish;
2067
2068                 r = move_network_interfaces(pid);
2069                 if (r < 0)
2070                         goto finish;
2071
2072                 r = setup_veth(pid, veth_name);
2073                 if (r < 0)
2074                         goto finish;
2075
2076                 r = setup_bridge(veth_name);
2077                 if (r < 0)
2078                         goto finish;
2079
2080                 eventfd_write(sync_fd, 1);
2081                 close_nointr_nofail(sync_fd);
2082                 sync_fd = -1;
2083
2084                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2085                 if (k < 0) {
2086                         r = EXIT_FAILURE;
2087                         break;
2088                 }
2089
2090                 if (!arg_quiet)
2091                         putc('\n', stdout);
2092
2093                 /* Kill if it is not dead yet anyway */
2094                 terminate_machine(pid);
2095
2096                 /* Redundant, but better safe than sorry */
2097                 kill(pid, SIGKILL);
2098
2099                 k = wait_for_terminate(pid, &status);
2100                 pid = 0;
2101
2102                 if (k < 0) {
2103                         r = EXIT_FAILURE;
2104                         break;
2105                 }
2106
2107                 if (status.si_code == CLD_EXITED) {
2108                         r = status.si_status;
2109                         if (status.si_status != 0) {
2110                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2111                                 break;
2112                         }
2113
2114                         if (!arg_quiet)
2115                                 log_debug("Container %s exited successfully.", arg_machine);
2116                         break;
2117                 } else if (status.si_code == CLD_KILLED &&
2118                            status.si_status == SIGINT) {
2119
2120                         if (!arg_quiet)
2121                                 log_info("Container %s has been shut down.", arg_machine);
2122                         r = 0;
2123                         break;
2124                 } else if (status.si_code == CLD_KILLED &&
2125                            status.si_status == SIGHUP) {
2126
2127                         if (!arg_quiet)
2128                                 log_info("Container %s is being rebooted.", arg_machine);
2129                         continue;
2130                 } else if (status.si_code == CLD_KILLED ||
2131                            status.si_code == CLD_DUMPED) {
2132
2133                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2134                         r = EXIT_FAILURE;
2135                         break;
2136                 } else {
2137                         log_error("Container %s failed due to unknown reason.", arg_machine);
2138                         r = EXIT_FAILURE;
2139                         break;
2140                 }
2141         }
2142
2143 finish:
2144         if (pid > 0)
2145                 kill(pid, SIGKILL);
2146
2147         free(arg_directory);
2148         free(arg_machine);
2149         free(arg_setenv);
2150         free(arg_network_interfaces);
2151
2152         return r;
2153 }