chiark / gitweb /
06d627c0a9ce1840c938af070385073a292c7080
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43
44 #include "sd-daemon.h"
45 #include "sd-bus.h"
46 #include "sd-id128.h"
47 #include "log.h"
48 #include "util.h"
49 #include "mkdir.h"
50 #include "macro.h"
51 #include "audit.h"
52 #include "missing.h"
53 #include "cgroup-util.h"
54 #include "strv.h"
55 #include "path-util.h"
56 #include "loopback-setup.h"
57 #include "dev-setup.h"
58 #include "fdset.h"
59 #include "build.h"
60 #include "fileio.h"
61 #include "bus-util.h"
62 #include "bus-error.h"
63 #include "ptyfwd.h"
64
65 #ifndef TTY_GID
66 #define TTY_GID 5
67 #endif
68
69 typedef enum LinkJournal {
70         LINK_NO,
71         LINK_AUTO,
72         LINK_HOST,
73         LINK_GUEST
74 } LinkJournal;
75
76 static char *arg_directory = NULL;
77 static char *arg_user = NULL;
78 static sd_id128_t arg_uuid = {};
79 static char *arg_machine = NULL;
80 static const char *arg_slice = NULL;
81 static bool arg_private_network = false;
82 static bool arg_read_only = false;
83 static bool arg_boot = false;
84 static LinkJournal arg_link_journal = LINK_AUTO;
85 static uint64_t arg_retain =
86         (1ULL << CAP_CHOWN) |
87         (1ULL << CAP_DAC_OVERRIDE) |
88         (1ULL << CAP_DAC_READ_SEARCH) |
89         (1ULL << CAP_FOWNER) |
90         (1ULL << CAP_FSETID) |
91         (1ULL << CAP_IPC_OWNER) |
92         (1ULL << CAP_KILL) |
93         (1ULL << CAP_LEASE) |
94         (1ULL << CAP_LINUX_IMMUTABLE) |
95         (1ULL << CAP_NET_BIND_SERVICE) |
96         (1ULL << CAP_NET_BROADCAST) |
97         (1ULL << CAP_NET_RAW) |
98         (1ULL << CAP_SETGID) |
99         (1ULL << CAP_SETFCAP) |
100         (1ULL << CAP_SETPCAP) |
101         (1ULL << CAP_SETUID) |
102         (1ULL << CAP_SYS_ADMIN) |
103         (1ULL << CAP_SYS_CHROOT) |
104         (1ULL << CAP_SYS_NICE) |
105         (1ULL << CAP_SYS_PTRACE) |
106         (1ULL << CAP_SYS_TTY_CONFIG) |
107         (1ULL << CAP_SYS_RESOURCE) |
108         (1ULL << CAP_SYS_BOOT) |
109         (1ULL << CAP_AUDIT_WRITE) |
110         (1ULL << CAP_AUDIT_CONTROL);
111 static char **arg_bind = NULL;
112 static char **arg_bind_ro = NULL;
113
114 static int help(void) {
115
116         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
117                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
118                "  -h --help                Show this help\n"
119                "     --version             Print version string\n"
120                "  -D --directory=NAME      Root directory for the container\n"
121                "  -b --boot                Boot up full system (i.e. invoke init)\n"
122                "  -u --user=USER           Run the command under specified user or uid\n"
123                "     --uuid=UUID           Set a specific machine UUID for the container\n"
124                "  -M --machine=NAME        Set the machine name for the container\n"
125                "  -S --slice=SLICE         Place the container in the specified slice\n"
126                "     --private-network     Disable network in container\n"
127                "     --read-only           Mount the root directory read-only\n"
128                "     --capability=CAP      In addition to the default, retain specified\n"
129                "                           capability\n"
130                "     --link-journal=MODE   Link up guest journal, one of no, auto, guest, host\n"
131                "  -j                       Equivalent to --link-journal=host\n"
132                "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
133                "                           the container\n"
134                "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n",
135                program_invocation_short_name);
136
137         return 0;
138 }
139
140 static int parse_argv(int argc, char *argv[]) {
141
142         enum {
143                 ARG_VERSION = 0x100,
144                 ARG_PRIVATE_NETWORK,
145                 ARG_UUID,
146                 ARG_READ_ONLY,
147                 ARG_CAPABILITY,
148                 ARG_LINK_JOURNAL,
149                 ARG_BIND,
150                 ARG_BIND_RO
151         };
152
153         static const struct option options[] = {
154                 { "help",            no_argument,       NULL, 'h'                 },
155                 { "version",         no_argument,       NULL, ARG_VERSION         },
156                 { "directory",       required_argument, NULL, 'D'                 },
157                 { "user",            required_argument, NULL, 'u'                 },
158                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
159                 { "boot",            no_argument,       NULL, 'b'                 },
160                 { "uuid",            required_argument, NULL, ARG_UUID            },
161                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
162                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
163                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
164                 { "bind",            required_argument, NULL, ARG_BIND            },
165                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
166                 { "machine",         required_argument, NULL, 'M'                 },
167                 { "slice",           required_argument, NULL, 'S'                 },
168                 {}
169         };
170
171         int c, r;
172
173         assert(argc >= 0);
174         assert(argv);
175
176         while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
177
178                 switch (c) {
179
180                 case 'h':
181                         return help();
182
183                 case ARG_VERSION:
184                         puts(PACKAGE_STRING);
185                         puts(SYSTEMD_FEATURES);
186                         return 0;
187
188                 case 'D':
189                         free(arg_directory);
190                         arg_directory = canonicalize_file_name(optarg);
191                         if (!arg_directory) {
192                                 log_error("Failed to canonicalize root directory.");
193                                 return -ENOMEM;
194                         }
195
196                         break;
197
198                 case 'u':
199                         free(arg_user);
200                         arg_user = strdup(optarg);
201                         if (!arg_user)
202                                 return log_oom();
203
204                         break;
205
206                 case ARG_PRIVATE_NETWORK:
207                         arg_private_network = true;
208                         break;
209
210                 case 'b':
211                         arg_boot = true;
212                         break;
213
214                 case ARG_UUID:
215                         r = sd_id128_from_string(optarg, &arg_uuid);
216                         if (r < 0) {
217                                 log_error("Invalid UUID: %s", optarg);
218                                 return r;
219                         }
220                         break;
221
222                 case 'S':
223                         arg_slice = strdup(optarg);
224                         if (!arg_slice)
225                                 return log_oom();
226
227                         break;
228
229                 case 'M':
230                         if (!hostname_is_valid(optarg)) {
231                                 log_error("Invalid machine name: %s", optarg);
232                                 return -EINVAL;
233                         }
234
235                         free(arg_machine);
236                         arg_machine = strdup(optarg);
237                         if (!arg_machine)
238                                 return log_oom();
239
240                         break;
241
242                 case ARG_READ_ONLY:
243                         arg_read_only = true;
244                         break;
245
246                 case ARG_CAPABILITY: {
247                         char *state, *word;
248                         size_t length;
249
250                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
251                                 cap_value_t cap;
252                                 char *t;
253
254                                 t = strndup(word, length);
255                                 if (!t)
256                                         return log_oom();
257
258                                 if (cap_from_name(t, &cap) < 0) {
259                                         log_error("Failed to parse capability %s.", t);
260                                         free(t);
261                                         return -EINVAL;
262                                 }
263
264                                 free(t);
265                                 arg_retain |= 1ULL << (uint64_t) cap;
266                         }
267
268                         break;
269                 }
270
271                 case 'j':
272                         arg_link_journal = LINK_GUEST;
273                         break;
274
275                 case ARG_LINK_JOURNAL:
276                         if (streq(optarg, "auto"))
277                                 arg_link_journal = LINK_AUTO;
278                         else if (streq(optarg, "no"))
279                                 arg_link_journal = LINK_NO;
280                         else if (streq(optarg, "guest"))
281                                 arg_link_journal = LINK_GUEST;
282                         else if (streq(optarg, "host"))
283                                 arg_link_journal = LINK_HOST;
284                         else {
285                                 log_error("Failed to parse link journal mode %s", optarg);
286                                 return -EINVAL;
287                         }
288
289                         break;
290
291                 case ARG_BIND:
292                 case ARG_BIND_RO: {
293                         _cleanup_free_ char *a = NULL, *b = NULL;
294                         char *e;
295                         char ***x;
296
297                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
298
299                         e = strchr(optarg, ':');
300                         if (e) {
301                                 a = strndup(optarg, e - optarg);
302                                 b = strdup(e + 1);
303                         } else {
304                                 a = strdup(optarg);
305                                 b = strdup(optarg);
306                         }
307
308                         if (!a || !b)
309                                 return log_oom();
310
311                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
312                                 log_error("Invalid bind mount specification: %s", optarg);
313                                 return -EINVAL;
314                         }
315
316                         r = strv_extend(x, a);
317                         if (r < 0)
318                                 return log_oom();
319
320                         r = strv_extend(x, b);
321                         if (r < 0)
322                                 return log_oom();
323
324                         break;
325                 }
326
327                 case '?':
328                         return -EINVAL;
329
330                 default:
331                         assert_not_reached("Unhandled option");
332                 }
333         }
334
335         return 1;
336 }
337
338 static int mount_all(const char *dest) {
339
340         typedef struct MountPoint {
341                 const char *what;
342                 const char *where;
343                 const char *type;
344                 const char *options;
345                 unsigned long flags;
346                 bool fatal;
347         } MountPoint;
348
349         static const MountPoint mount_table[] = {
350                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
351                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
352                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
353                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
354                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
355                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
356                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
357                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
358 #ifdef HAVE_SELINUX
359                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
360                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
361 #endif
362         };
363
364         unsigned k;
365         int r = 0;
366
367         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
368                 _cleanup_free_ char *where = NULL;
369                 int t;
370
371                 where = strjoin(dest, "/", mount_table[k].where, NULL);
372                 if (!where)
373                         return log_oom();
374
375                 t = path_is_mount_point(where, true);
376                 if (t < 0) {
377                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
378
379                         if (r == 0)
380                                 r = t;
381
382                         continue;
383                 }
384
385                 /* Skip this entry if it is not a remount. */
386                 if (mount_table[k].what && t > 0)
387                         continue;
388
389                 mkdir_p(where, 0755);
390
391                 if (mount(mount_table[k].what,
392                           where,
393                           mount_table[k].type,
394                           mount_table[k].flags,
395                           mount_table[k].options) < 0 &&
396                     mount_table[k].fatal) {
397
398                         log_error("mount(%s) failed: %m", where);
399
400                         if (r == 0)
401                                 r = -errno;
402                 }
403         }
404
405         return r;
406 }
407
408 static int mount_binds(const char *dest, char **l, unsigned long flags) {
409         char **x, **y;
410
411         STRV_FOREACH_PAIR(x, y, l) {
412                 _cleanup_free_ char *where = NULL;
413                 struct stat source_st, dest_st;
414
415                 if (stat(*x, &source_st) < 0) {
416                         log_error("failed to stat %s: %m", *x);
417                         return -errno;
418                 }
419
420                 where = strjoin(dest, "/", *y, NULL);
421                 if (!where)
422                         return log_oom();
423
424                 if (stat(where, &dest_st) == 0) {
425                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
426                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
427                                                 *x, where);
428                                 return -EINVAL;
429                         }
430                 } else {
431                         /* Create the mount point, but be conservative -- refuse to create block
432                          * and char devices. */
433                         if (S_ISDIR(source_st.st_mode))
434                                 mkdir_p_label(where, 0755);
435                         else if (S_ISFIFO(source_st.st_mode))
436                                 mkfifo(where, 0644);
437                         else if (S_ISSOCK(source_st.st_mode))
438                                 mknod(where, 0644 | S_IFSOCK, 0);
439                         else if (S_ISREG(source_st.st_mode))
440                                 touch(where);
441                         else {
442                                 log_error("Refusing to create mountpoint for file: %s", *x);
443                                 return -ENOTSUP;
444                         }
445                 }
446
447                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
448                         log_error("mount(%s) failed: %m", where);
449                         return -errno;
450                 }
451
452                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
453                         log_error("mount(%s) failed: %m", where);
454                         return -errno;
455                 }
456         }
457
458         return 0;
459 }
460
461 static int setup_timezone(const char *dest) {
462         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
463         char *z, *y;
464         int r;
465
466         assert(dest);
467
468         /* Fix the timezone, if possible */
469         r = readlink_malloc("/etc/localtime", &p);
470         if (r < 0) {
471                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
472                 return 0;
473         }
474
475         z = path_startswith(p, "../usr/share/zoneinfo/");
476         if (!z)
477                 z = path_startswith(p, "/usr/share/zoneinfo/");
478         if (!z) {
479                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
480                 return 0;
481         }
482
483         where = strappend(dest, "/etc/localtime");
484         if (!where)
485                 return log_oom();
486
487         r = readlink_malloc(where, &q);
488         if (r >= 0) {
489                 y = path_startswith(q, "../usr/share/zoneinfo/");
490                 if (!y)
491                         y = path_startswith(q, "/usr/share/zoneinfo/");
492
493
494                 /* Already pointing to the right place? Then do nothing .. */
495                 if (y && streq(y, z))
496                         return 0;
497         }
498
499         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
500         if (!check)
501                 return log_oom();
502
503         if (access(check, F_OK) < 0) {
504                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
505                 return 0;
506         }
507
508         what = strappend("../usr/share/zoneinfo/", z);
509         if (!what)
510                 return log_oom();
511
512         unlink(where);
513         if (symlink(what, where) < 0) {
514                 log_error("Failed to correct timezone of container: %m");
515                 return 0;
516         }
517
518         return 0;
519 }
520
521 static int setup_resolv_conf(const char *dest) {
522         char _cleanup_free_ *where = NULL;
523
524         assert(dest);
525
526         if (arg_private_network)
527                 return 0;
528
529         /* Fix resolv.conf, if possible */
530         where = strappend(dest, "/etc/resolv.conf");
531         if (!where)
532                 return log_oom();
533
534         /* We don't really care for the results of this really. If it
535          * fails, it fails, but meh... */
536         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
537
538         return 0;
539 }
540
541 static int setup_boot_id(const char *dest) {
542         _cleanup_free_ char *from = NULL, *to = NULL;
543         sd_id128_t rnd;
544         char as_uuid[37];
545         int r;
546
547         assert(dest);
548
549         /* Generate a new randomized boot ID, so that each boot-up of
550          * the container gets a new one */
551
552         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
553         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
554         if (!from || !to)
555                 return log_oom();
556
557         r = sd_id128_randomize(&rnd);
558         if (r < 0) {
559                 log_error("Failed to generate random boot id: %s", strerror(-r));
560                 return r;
561         }
562
563         snprintf(as_uuid, sizeof(as_uuid),
564                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
565                  SD_ID128_FORMAT_VAL(rnd));
566         char_array_0(as_uuid);
567
568         r = write_string_file(from, as_uuid);
569         if (r < 0) {
570                 log_error("Failed to write boot id: %s", strerror(-r));
571                 return r;
572         }
573
574         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
575                 log_error("Failed to bind mount boot id: %m");
576                 r = -errno;
577         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
578                 log_warning("Failed to make boot id read-only: %m");
579
580         unlink(from);
581         return r;
582 }
583
584 static int copy_devnodes(const char *dest) {
585
586         static const char devnodes[] =
587                 "null\0"
588                 "zero\0"
589                 "full\0"
590                 "random\0"
591                 "urandom\0"
592                 "tty\0";
593
594         const char *d;
595         int r = 0;
596         _cleanup_umask_ mode_t u;
597
598         assert(dest);
599
600         u = umask(0000);
601
602         NULSTR_FOREACH(d, devnodes) {
603                 struct stat st;
604                 _cleanup_free_ char *from = NULL, *to = NULL;
605
606                 asprintf(&from, "/dev/%s", d);
607                 asprintf(&to, "%s/dev/%s", dest, d);
608
609                 if (!from || !to) {
610                         log_oom();
611
612                         if (r == 0)
613                                 r = -ENOMEM;
614
615                         break;
616                 }
617
618                 if (stat(from, &st) < 0) {
619
620                         if (errno != ENOENT) {
621                                 log_error("Failed to stat %s: %m", from);
622                                 if (r == 0)
623                                         r = -errno;
624                         }
625
626                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
627
628                         log_error("%s is not a char or block device, cannot copy", from);
629                         if (r == 0)
630                                 r = -EIO;
631
632                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
633
634                         log_error("mknod(%s) failed: %m", dest);
635                         if (r == 0)
636                                 r = -errno;
637                 }
638         }
639
640         return r;
641 }
642
643 static int setup_ptmx(const char *dest) {
644         _cleanup_free_ char *p = NULL;
645
646         p = strappend(dest, "/dev/ptmx");
647         if (!p)
648                 return log_oom();
649
650         if (symlink("pts/ptmx", p) < 0) {
651                 log_error("Failed to create /dev/ptmx symlink: %m");
652                 return -errno;
653         }
654
655         return 0;
656 }
657
658 static int setup_dev_console(const char *dest, const char *console) {
659         struct stat st;
660         _cleanup_free_ char *to = NULL;
661         int r;
662         _cleanup_umask_ mode_t u;
663
664         assert(dest);
665         assert(console);
666
667         u = umask(0000);
668
669         if (stat(console, &st) < 0) {
670                 log_error("Failed to stat %s: %m", console);
671                 return -errno;
672
673         } else if (!S_ISCHR(st.st_mode)) {
674                 log_error("/dev/console is not a char device");
675                 return -EIO;
676         }
677
678         r = chmod_and_chown(console, 0600, 0, 0);
679         if (r < 0) {
680                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
681                 return r;
682         }
683
684         if (asprintf(&to, "%s/dev/console", dest) < 0)
685                 return log_oom();
686
687         /* We need to bind mount the right tty to /dev/console since
688          * ptys can only exist on pts file systems. To have something
689          * to bind mount things on we create a device node first, that
690          * has the right major/minor (note that the major minor
691          * doesn't actually matter here, since we mount it over
692          * anyway). */
693
694         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
695                 log_error("mknod() for /dev/console failed: %m");
696                 return -errno;
697         }
698
699         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
700                 log_error("Bind mount for /dev/console failed: %m");
701                 return -errno;
702         }
703
704         return 0;
705 }
706
707 static int setup_kmsg(const char *dest, int kmsg_socket) {
708         _cleanup_free_ char *from = NULL, *to = NULL;
709         int r, fd, k;
710         _cleanup_umask_ mode_t u;
711         union {
712                 struct cmsghdr cmsghdr;
713                 uint8_t buf[CMSG_SPACE(sizeof(int))];
714         } control = {};
715         struct msghdr mh = {
716                 .msg_control = &control,
717                 .msg_controllen = sizeof(control),
718         };
719         struct cmsghdr *cmsg;
720
721         assert(dest);
722         assert(kmsg_socket >= 0);
723
724         u = umask(0000);
725
726         /* We create the kmsg FIFO as /dev/kmsg, but immediately
727          * delete it after bind mounting it to /proc/kmsg. While FIFOs
728          * on the reading side behave very similar to /proc/kmsg,
729          * their writing side behaves differently from /dev/kmsg in
730          * that writing blocks when nothing is reading. In order to
731          * avoid any problems with containers deadlocking due to this
732          * we simply make /dev/kmsg unavailable to the container. */
733         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
734             asprintf(&to, "%s/proc/kmsg", dest) < 0)
735                 return log_oom();
736
737         if (mkfifo(from, 0600) < 0) {
738                 log_error("mkfifo() for /dev/kmsg failed: %m");
739                 return -errno;
740         }
741
742         r = chmod_and_chown(from, 0600, 0, 0);
743         if (r < 0) {
744                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
745                 return r;
746         }
747
748         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
749                 log_error("Bind mount for /proc/kmsg failed: %m");
750                 return -errno;
751         }
752
753         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
754         if (fd < 0) {
755                 log_error("Failed to open fifo: %m");
756                 return -errno;
757         }
758
759         cmsg = CMSG_FIRSTHDR(&mh);
760         cmsg->cmsg_level = SOL_SOCKET;
761         cmsg->cmsg_type = SCM_RIGHTS;
762         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
763         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
764
765         mh.msg_controllen = cmsg->cmsg_len;
766
767         /* Store away the fd in the socket, so that it stays open as
768          * long as we run the child */
769         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
770         close_nointr_nofail(fd);
771
772         if (k < 0) {
773                 log_error("Failed to send FIFO fd: %m");
774                 return -errno;
775         }
776
777         /* And now make the FIFO unavailable as /dev/kmsg... */
778         unlink(from);
779         return 0;
780 }
781
782 static int setup_hostname(void) {
783
784         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
785                 return -errno;
786
787         return 0;
788 }
789
790 static int setup_journal(const char *directory) {
791         sd_id128_t machine_id;
792         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
793         char *id;
794         int r;
795
796         if (arg_link_journal == LINK_NO)
797                 return 0;
798
799         p = strappend(directory, "/etc/machine-id");
800         if (!p)
801                 return log_oom();
802
803         r = read_one_line_file(p, &b);
804         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
805                 return 0;
806         else if (r < 0) {
807                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
808                 return r;
809         }
810
811         id = strstrip(b);
812         if (isempty(id) && arg_link_journal == LINK_AUTO)
813                 return 0;
814
815         /* Verify validity */
816         r = sd_id128_from_string(id, &machine_id);
817         if (r < 0) {
818                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
819                 return r;
820         }
821
822         free(p);
823         p = strappend("/var/log/journal/", id);
824         q = strjoin(directory, "/var/log/journal/", id, NULL);
825         if (!p || !q)
826                 return log_oom();
827
828         if (path_is_mount_point(p, false) > 0) {
829                 if (arg_link_journal != LINK_AUTO) {
830                         log_error("%s: already a mount point, refusing to use for journal", p);
831                         return -EEXIST;
832                 }
833
834                 return 0;
835         }
836
837         if (path_is_mount_point(q, false) > 0) {
838                 if (arg_link_journal != LINK_AUTO) {
839                         log_error("%s: already a mount point, refusing to use for journal", q);
840                         return -EEXIST;
841                 }
842
843                 return 0;
844         }
845
846         r = readlink_and_make_absolute(p, &d);
847         if (r >= 0) {
848                 if ((arg_link_journal == LINK_GUEST ||
849                      arg_link_journal == LINK_AUTO) &&
850                     path_equal(d, q)) {
851
852                         r = mkdir_p(q, 0755);
853                         if (r < 0)
854                                 log_warning("failed to create directory %s: %m", q);
855                         return 0;
856                 }
857
858                 if (unlink(p) < 0) {
859                         log_error("Failed to remove symlink %s: %m", p);
860                         return -errno;
861                 }
862         } else if (r == -EINVAL) {
863
864                 if (arg_link_journal == LINK_GUEST &&
865                     rmdir(p) < 0) {
866
867                         if (errno == ENOTDIR) {
868                                 log_error("%s already exists and is neither a symlink nor a directory", p);
869                                 return r;
870                         } else {
871                                 log_error("Failed to remove %s: %m", p);
872                                 return -errno;
873                         }
874                 }
875         } else if (r != -ENOENT) {
876                 log_error("readlink(%s) failed: %m", p);
877                 return r;
878         }
879
880         if (arg_link_journal == LINK_GUEST) {
881
882                 if (symlink(q, p) < 0) {
883                         log_error("Failed to symlink %s to %s: %m", q, p);
884                         return -errno;
885                 }
886
887                 r = mkdir_p(q, 0755);
888                 if (r < 0)
889                         log_warning("failed to create directory %s: %m", q);
890                 return 0;
891         }
892
893         if (arg_link_journal == LINK_HOST) {
894                 r = mkdir_p(p, 0755);
895                 if (r < 0) {
896                         log_error("Failed to create %s: %m", p);
897                         return r;
898                 }
899
900         } else if (access(p, F_OK) < 0)
901                 return 0;
902
903         if (dir_is_empty(q) == 0) {
904                 log_error("%s not empty.", q);
905                 return -ENOTEMPTY;
906         }
907
908         r = mkdir_p(q, 0755);
909         if (r < 0) {
910                 log_error("Failed to create %s: %m", q);
911                 return r;
912         }
913
914         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
915                 log_error("Failed to bind mount journal from host into guest: %m");
916                 return -errno;
917         }
918
919         return 0;
920 }
921
922 static int drop_capabilities(void) {
923         return capability_bounding_set_drop(~arg_retain, false);
924 }
925
926 static int register_machine(void) {
927         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
928         _cleanup_bus_unref_ sd_bus *bus = NULL;
929         int r;
930
931         r = sd_bus_open_system(&bus);
932         if (r < 0) {
933                 log_error("Failed to open system bus: %s", strerror(-r));
934                 return r;
935         }
936
937         r = sd_bus_call_method(
938                         bus,
939                         "org.freedesktop.machine1",
940                         "/org/freedesktop/machine1",
941                         "org.freedesktop.machine1.Manager",
942                         "CreateMachine",
943                         &error,
944                         NULL,
945                         "sayssusa(sv)",
946                         arg_machine,
947                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
948                         "nspawn",
949                         "container",
950                         (uint32_t) 0,
951                         strempty(arg_directory),
952                         !isempty(arg_slice), "Slice", "s", arg_slice);
953         if (r < 0) {
954                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
955                 return r;
956         }
957
958         return 0;
959 }
960
961 static int terminate_machine(pid_t pid) {
962         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
963         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
964         _cleanup_bus_unref_ sd_bus *bus = NULL;
965         const char *path;
966         int r;
967
968         r = sd_bus_open_system(&bus);
969         if (r < 0) {
970                 log_error("Failed to open system bus: %s", strerror(-r));
971                 return r;
972         }
973
974         r = sd_bus_call_method(
975                         bus,
976                         "org.freedesktop.machine1",
977                         "/org/freedesktop/machine1",
978                         "org.freedesktop.machine1.Manager",
979                         "GetMachineByPID",
980                         &error,
981                         &reply,
982                         "u",
983                         (uint32_t) pid);
984         if (r < 0) {
985                 /* Note that the machine might already have been
986                  * cleaned up automatically, hence don't consider it a
987                  * failure if we cannot get the machine object. */
988                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
989                 return 0;
990         }
991
992         r = sd_bus_message_read(reply, "o", &path);
993         if (r < 0)
994                 return bus_log_parse_error(r);
995
996         r = sd_bus_call_method(
997                         bus,
998                         "org.freedesktop.machine1",
999                         path,
1000                         "org.freedesktop.machine1.Machine",
1001                         "Terminate",
1002                         &error,
1003                         NULL,
1004                         NULL);
1005         if (r < 0) {
1006                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1007                 return 0;
1008         }
1009
1010         return 0;
1011 }
1012
1013 static bool audit_enabled(void) {
1014         int fd;
1015
1016         fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
1017         if (fd >= 0) {
1018                 close_nointr_nofail(fd);
1019                 return true;
1020         }
1021         return false;
1022 }
1023
1024 int main(int argc, char *argv[]) {
1025         pid_t pid = 0;
1026         int r = EXIT_FAILURE, k;
1027         _cleanup_close_ int master = -1;
1028         int n_fd_passed;
1029         const char *console = NULL;
1030         sigset_t mask;
1031         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1032         _cleanup_fdset_free_ FDSet *fds = NULL;
1033
1034         log_parse_environment();
1035         log_open();
1036
1037         k = parse_argv(argc, argv);
1038         if (k < 0)
1039                 goto finish;
1040         else if (k == 0) {
1041                 r = EXIT_SUCCESS;
1042                 goto finish;
1043         }
1044
1045         if (arg_directory) {
1046                 char *p;
1047
1048                 p = path_make_absolute_cwd(arg_directory);
1049                 free(arg_directory);
1050                 arg_directory = p;
1051         } else
1052                 arg_directory = get_current_dir_name();
1053
1054         if (!arg_directory) {
1055                 log_error("Failed to determine path, please use -D.");
1056                 goto finish;
1057         }
1058
1059         path_kill_slashes(arg_directory);
1060
1061         if (!arg_machine) {
1062                 arg_machine = strdup(path_get_file_name(arg_directory));
1063                 if (!arg_machine) {
1064                         log_oom();
1065                         goto finish;
1066                 }
1067
1068                 hostname_cleanup(arg_machine, false);
1069                 if (isempty(arg_machine)) {
1070                         log_error("Failed to determine machine name automatically, please use -M.");
1071                         goto finish;
1072                 }
1073         }
1074
1075         if (geteuid() != 0) {
1076                 log_error("Need to be root.");
1077                 goto finish;
1078         }
1079
1080         if (sd_booted() <= 0) {
1081                 log_error("Not running on a systemd system.");
1082                 goto finish;
1083         }
1084
1085         if (arg_boot && audit_enabled()) {
1086                 log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
1087                             "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
1088                             "line before using systemd-nspawn. Sleeping for 5s...\n");
1089                 sleep(5);
1090         }
1091
1092         if (path_equal(arg_directory, "/")) {
1093                 log_error("Spawning container on root directory not supported.");
1094                 goto finish;
1095         }
1096
1097         if (path_is_os_tree(arg_directory) <= 0) {
1098                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1099                 goto finish;
1100         }
1101
1102         log_close();
1103         n_fd_passed = sd_listen_fds(false);
1104         if (n_fd_passed > 0) {
1105                 k = fdset_new_listen_fds(&fds, false);
1106                 if (k < 0) {
1107                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1108                         goto finish;
1109                 }
1110         }
1111         fdset_close_others(fds);
1112         log_open();
1113
1114         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1115         if (master < 0) {
1116                 log_error("Failed to acquire pseudo tty: %m");
1117                 goto finish;
1118         }
1119
1120         console = ptsname(master);
1121         if (!console) {
1122                 log_error("Failed to determine tty name: %m");
1123                 goto finish;
1124         }
1125
1126         log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1127
1128         if (unlockpt(master) < 0) {
1129                 log_error("Failed to unlock tty: %m");
1130                 goto finish;
1131         }
1132
1133         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1134                 log_error("Failed to create kmsg socket pair.");
1135                 goto finish;
1136         }
1137
1138         sd_notify(0, "READY=1");
1139
1140         assert_se(sigemptyset(&mask) == 0);
1141         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1142         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1143
1144         for (;;) {
1145                 siginfo_t status;
1146
1147                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1148                 if (pid < 0) {
1149                         if (errno == EINVAL)
1150                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1151                         else
1152                                 log_error("clone() failed: %m");
1153
1154                         goto finish;
1155                 }
1156
1157                 if (pid == 0) {
1158                         /* child */
1159                         const char *home = NULL;
1160                         uid_t uid = (uid_t) -1;
1161                         gid_t gid = (gid_t) -1;
1162                         unsigned n_env = 2;
1163                         const char *envp[] = {
1164                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1165                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1166                                 NULL, /* TERM */
1167                                 NULL, /* HOME */
1168                                 NULL, /* USER */
1169                                 NULL, /* LOGNAME */
1170                                 NULL, /* container_uuid */
1171                                 NULL, /* LISTEN_FDS */
1172                                 NULL, /* LISTEN_PID */
1173                                 NULL
1174                         };
1175
1176                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1177                         if (envp[n_env])
1178                                 n_env ++;
1179
1180                         close_nointr_nofail(master);
1181                         master = -1;
1182
1183                         close_nointr(STDIN_FILENO);
1184                         close_nointr(STDOUT_FILENO);
1185                         close_nointr(STDERR_FILENO);
1186
1187                         close_nointr_nofail(kmsg_socket_pair[0]);
1188                         kmsg_socket_pair[0] = -1;
1189
1190                         reset_all_signal_handlers();
1191
1192                         assert_se(sigemptyset(&mask) == 0);
1193                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1194
1195                         k = open_terminal(console, O_RDWR);
1196                         if (k != STDIN_FILENO) {
1197                                 if (k >= 0) {
1198                                         close_nointr_nofail(k);
1199                                         k = -EINVAL;
1200                                 }
1201
1202                                 log_error("Failed to open console: %s", strerror(-k));
1203                                 goto child_fail;
1204                         }
1205
1206                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1207                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1208                                 log_error("Failed to duplicate console: %m");
1209                                 goto child_fail;
1210                         }
1211
1212                         if (setsid() < 0) {
1213                                 log_error("setsid() failed: %m");
1214                                 goto child_fail;
1215                         }
1216
1217                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1218                                 log_error("PR_SET_PDEATHSIG failed: %m");
1219                                 goto child_fail;
1220                         }
1221
1222                         r = register_machine();
1223                         if (r < 0)
1224                                 goto finish;
1225
1226                         /* Mark everything as slave, so that we still
1227                          * receive mounts from the real root, but don't
1228                          * propagate mounts to the real root. */
1229                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1230                                 log_error("MS_SLAVE|MS_REC failed: %m");
1231                                 goto child_fail;
1232                         }
1233
1234                         /* Turn directory into bind mount */
1235                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1236                                 log_error("Failed to make bind mount.");
1237                                 goto child_fail;
1238                         }
1239
1240                         if (arg_read_only)
1241                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1242                                         log_error("Failed to make read-only.");
1243                                         goto child_fail;
1244                                 }
1245
1246                         if (mount_all(arg_directory) < 0)
1247                                 goto child_fail;
1248
1249                         if (copy_devnodes(arg_directory) < 0)
1250                                 goto child_fail;
1251
1252                         if (setup_ptmx(arg_directory) < 0)
1253                                 goto child_fail;
1254
1255                         dev_setup(arg_directory);
1256
1257                         if (setup_dev_console(arg_directory, console) < 0)
1258                                 goto child_fail;
1259
1260                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1261                                 goto child_fail;
1262
1263                         close_nointr_nofail(kmsg_socket_pair[1]);
1264                         kmsg_socket_pair[1] = -1;
1265
1266                         if (setup_boot_id(arg_directory) < 0)
1267                                 goto child_fail;
1268
1269                         if (setup_timezone(arg_directory) < 0)
1270                                 goto child_fail;
1271
1272                         if (setup_resolv_conf(arg_directory) < 0)
1273                                 goto child_fail;
1274
1275                         if (setup_journal(arg_directory) < 0)
1276                                 goto child_fail;
1277
1278                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1279                                 goto child_fail;
1280
1281                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1282                                 goto child_fail;
1283
1284                         if (chdir(arg_directory) < 0) {
1285                                 log_error("chdir(%s) failed: %m", arg_directory);
1286                                 goto child_fail;
1287                         }
1288
1289                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1290                                 log_error("mount(MS_MOVE) failed: %m");
1291                                 goto child_fail;
1292                         }
1293
1294                         if (chroot(".") < 0) {
1295                                 log_error("chroot() failed: %m");
1296                                 goto child_fail;
1297                         }
1298
1299                         if (chdir("/") < 0) {
1300                                 log_error("chdir() failed: %m");
1301                                 goto child_fail;
1302                         }
1303
1304                         umask(0022);
1305
1306                         loopback_setup();
1307
1308                         if (drop_capabilities() < 0) {
1309                                 log_error("drop_capabilities() failed: %m");
1310                                 goto child_fail;
1311                         }
1312
1313                         if (arg_user) {
1314
1315                                 /* Note that this resolves user names
1316                                  * inside the container, and hence
1317                                  * accesses the NSS modules from the
1318                                  * container and not the host. This is
1319                                  * a bit weird... */
1320
1321                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1322                                         log_error("get_user_creds() failed: %m");
1323                                         goto child_fail;
1324                                 }
1325
1326                                 if (mkdir_parents_label(home, 0775) < 0) {
1327                                         log_error("mkdir_parents_label() failed: %m");
1328                                         goto child_fail;
1329                                 }
1330
1331                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1332                                         log_error("mkdir_safe_label() failed: %m");
1333                                         goto child_fail;
1334                                 }
1335
1336                                 if (initgroups((const char*)arg_user, gid) < 0) {
1337                                         log_error("initgroups() failed: %m");
1338                                         goto child_fail;
1339                                 }
1340
1341                                 if (setresgid(gid, gid, gid) < 0) {
1342                                         log_error("setregid() failed: %m");
1343                                         goto child_fail;
1344                                 }
1345
1346                                 if (setresuid(uid, uid, uid) < 0) {
1347                                         log_error("setreuid() failed: %m");
1348                                         goto child_fail;
1349                                 }
1350                         } else {
1351                                 /* Reset everything fully to 0, just in case */
1352
1353                                 if (setgroups(0, NULL) < 0) {
1354                                         log_error("setgroups() failed: %m");
1355                                         goto child_fail;
1356                                 }
1357
1358                                 if (setresgid(0, 0, 0) < 0) {
1359                                         log_error("setregid() failed: %m");
1360                                         goto child_fail;
1361                                 }
1362
1363                                 if (setresuid(0, 0, 0) < 0) {
1364                                         log_error("setreuid() failed: %m");
1365                                         goto child_fail;
1366                                 }
1367                         }
1368
1369                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1370                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1371                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1372                                 log_oom();
1373                                 goto child_fail;
1374                         }
1375
1376                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1377                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1378                                         log_oom();
1379                                         goto child_fail;
1380                                 }
1381                         }
1382
1383                         if (fdset_size(fds) > 0) {
1384                                 k = fdset_cloexec(fds, false);
1385                                 if (k < 0) {
1386                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1387                                         goto child_fail;
1388                                 }
1389
1390                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1391                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1392                                         log_oom();
1393                                         goto child_fail;
1394                                 }
1395                         }
1396
1397                         setup_hostname();
1398
1399                         if (arg_boot) {
1400                                 char **a;
1401                                 size_t l;
1402
1403                                 /* Automatically search for the init system */
1404
1405                                 l = 1 + argc - optind;
1406                                 a = newa(char*, l + 1);
1407                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1408
1409                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1410                                 execve(a[0], a, (char**) envp);
1411
1412                                 a[0] = (char*) "/lib/systemd/systemd";
1413                                 execve(a[0], a, (char**) envp);
1414
1415                                 a[0] = (char*) "/sbin/init";
1416                                 execve(a[0], a, (char**) envp);
1417                         } else if (argc > optind)
1418                                 execvpe(argv[optind], argv + optind, (char**) envp);
1419                         else {
1420                                 chdir(home ? home : "/root");
1421                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1422                         }
1423
1424                         log_error("execv() failed: %m");
1425
1426                 child_fail:
1427                         _exit(EXIT_FAILURE);
1428                 }
1429
1430                 fdset_free(fds);
1431                 fds = NULL;
1432
1433                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1434                 if (k < 0) {
1435                         r = EXIT_FAILURE;
1436                         break;
1437                 }
1438
1439                 putc('\n', stdout);
1440
1441                 /* Kill if it is not dead yet anyway */
1442                 terminate_machine(pid);
1443
1444                 /* Redundant, but better safe than sorry */
1445                 kill(pid, SIGKILL);
1446
1447                 k = wait_for_terminate(pid, &status);
1448                 pid = 0;
1449
1450                 if (k < 0) {
1451                         r = EXIT_FAILURE;
1452                         break;
1453                 }
1454
1455                 if (status.si_code == CLD_EXITED) {
1456                         r = status.si_status;
1457                         if (status.si_status != 0) {
1458                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
1459                                 break;
1460                         }
1461
1462                         log_debug("Container %s exited successfully.", arg_machine);
1463                         break;
1464                 } else if (status.si_code == CLD_KILLED &&
1465                            status.si_status == SIGINT) {
1466                         log_info("Container %s has been shut down.", arg_machine);
1467                         r = 0;
1468                         break;
1469                 } else if (status.si_code == CLD_KILLED &&
1470                            status.si_status == SIGHUP) {
1471                         log_info("Container %s is being rebooted.", arg_machine);
1472                         continue;
1473                 } else if (status.si_code == CLD_KILLED ||
1474                            status.si_code == CLD_DUMPED) {
1475
1476                         log_error("Container %s terminated by signal %s.", arg_machine,  signal_to_string(status.si_status));
1477                         r = EXIT_FAILURE;
1478                         break;
1479                 } else {
1480                         log_error("Container %s failed due to unknown reason.", arg_machine);
1481                         r = EXIT_FAILURE;
1482                         break;
1483                 }
1484         }
1485
1486 finish:
1487         if (pid > 0)
1488                 kill(pid, SIGKILL);
1489
1490         free(arg_directory);
1491         free(arg_machine);
1492
1493         return r;
1494 }