chiark / gitweb /
clients: unify how we invoke getopt_long()
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43
44 #include "sd-daemon.h"
45 #include "sd-bus.h"
46 #include "sd-id128.h"
47 #include "log.h"
48 #include "util.h"
49 #include "mkdir.h"
50 #include "macro.h"
51 #include "audit.h"
52 #include "missing.h"
53 #include "cgroup-util.h"
54 #include "strv.h"
55 #include "path-util.h"
56 #include "loopback-setup.h"
57 #include "dev-setup.h"
58 #include "fdset.h"
59 #include "build.h"
60 #include "fileio.h"
61 #include "bus-util.h"
62 #include "bus-error.h"
63 #include "ptyfwd.h"
64
65 #ifndef TTY_GID
66 #define TTY_GID 5
67 #endif
68
69 typedef enum LinkJournal {
70         LINK_NO,
71         LINK_AUTO,
72         LINK_HOST,
73         LINK_GUEST
74 } LinkJournal;
75
76 static char *arg_directory = NULL;
77 static char *arg_user = NULL;
78 static sd_id128_t arg_uuid = {};
79 static char *arg_machine = NULL;
80 static const char *arg_slice = NULL;
81 static bool arg_private_network = false;
82 static bool arg_read_only = false;
83 static bool arg_boot = false;
84 static LinkJournal arg_link_journal = LINK_AUTO;
85 static uint64_t arg_retain =
86         (1ULL << CAP_CHOWN) |
87         (1ULL << CAP_DAC_OVERRIDE) |
88         (1ULL << CAP_DAC_READ_SEARCH) |
89         (1ULL << CAP_FOWNER) |
90         (1ULL << CAP_FSETID) |
91         (1ULL << CAP_IPC_OWNER) |
92         (1ULL << CAP_KILL) |
93         (1ULL << CAP_LEASE) |
94         (1ULL << CAP_LINUX_IMMUTABLE) |
95         (1ULL << CAP_NET_BIND_SERVICE) |
96         (1ULL << CAP_NET_BROADCAST) |
97         (1ULL << CAP_NET_RAW) |
98         (1ULL << CAP_SETGID) |
99         (1ULL << CAP_SETFCAP) |
100         (1ULL << CAP_SETPCAP) |
101         (1ULL << CAP_SETUID) |
102         (1ULL << CAP_SYS_ADMIN) |
103         (1ULL << CAP_SYS_CHROOT) |
104         (1ULL << CAP_SYS_NICE) |
105         (1ULL << CAP_SYS_PTRACE) |
106         (1ULL << CAP_SYS_TTY_CONFIG) |
107         (1ULL << CAP_SYS_RESOURCE) |
108         (1ULL << CAP_SYS_BOOT) |
109         (1ULL << CAP_AUDIT_WRITE) |
110         (1ULL << CAP_AUDIT_CONTROL);
111 static char **arg_bind = NULL;
112 static char **arg_bind_ro = NULL;
113
114 static int help(void) {
115
116         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
117                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
118                "  -h --help                Show this help\n"
119                "     --version             Print version string\n"
120                "  -D --directory=NAME      Root directory for the container\n"
121                "  -b --boot                Boot up full system (i.e. invoke init)\n"
122                "  -u --user=USER           Run the command under specified user or uid\n"
123                "     --uuid=UUID           Set a specific machine UUID for the container\n"
124                "  -M --machine=NAME        Set the machine name for the container\n"
125                "  -S --slice=SLICE         Place the container in the specified slice\n"
126                "     --private-network     Disable network in container\n"
127                "     --read-only           Mount the root directory read-only\n"
128                "     --capability=CAP      In addition to the default, retain specified\n"
129                "                           capability\n"
130                "     --link-journal=MODE   Link up guest journal, one of no, auto, guest, host\n"
131                "  -j                       Equivalent to --link-journal=host\n"
132                "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
133                "                           the container\n"
134                "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n",
135                program_invocation_short_name);
136
137         return 0;
138 }
139
140 static int parse_argv(int argc, char *argv[]) {
141
142         enum {
143                 ARG_VERSION = 0x100,
144                 ARG_PRIVATE_NETWORK,
145                 ARG_UUID,
146                 ARG_READ_ONLY,
147                 ARG_CAPABILITY,
148                 ARG_LINK_JOURNAL,
149                 ARG_BIND,
150                 ARG_BIND_RO
151         };
152
153         static const struct option options[] = {
154                 { "help",            no_argument,       NULL, 'h'                 },
155                 { "version",         no_argument,       NULL, ARG_VERSION         },
156                 { "directory",       required_argument, NULL, 'D'                 },
157                 { "user",            required_argument, NULL, 'u'                 },
158                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
159                 { "boot",            no_argument,       NULL, 'b'                 },
160                 { "uuid",            required_argument, NULL, ARG_UUID            },
161                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
162                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
163                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
164                 { "bind",            required_argument, NULL, ARG_BIND            },
165                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
166                 { "machine",         required_argument, NULL, 'M'                 },
167                 { "slice",           required_argument, NULL, 'S'                 },
168                 {}
169         };
170
171         int c, r;
172
173         assert(argc >= 0);
174         assert(argv);
175
176         while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
177
178                 switch (c) {
179
180                 case 'h':
181                         return help();
182
183                 case ARG_VERSION:
184                         puts(PACKAGE_STRING);
185                         puts(SYSTEMD_FEATURES);
186                         return 0;
187
188                 case 'D':
189                         free(arg_directory);
190                         arg_directory = canonicalize_file_name(optarg);
191                         if (!arg_directory) {
192                                 log_error("Failed to canonicalize root directory.");
193                                 return -ENOMEM;
194                         }
195
196                         break;
197
198                 case 'u':
199                         free(arg_user);
200                         arg_user = strdup(optarg);
201                         if (!arg_user)
202                                 return log_oom();
203
204                         break;
205
206                 case ARG_PRIVATE_NETWORK:
207                         arg_private_network = true;
208                         break;
209
210                 case 'b':
211                         arg_boot = true;
212                         break;
213
214                 case ARG_UUID:
215                         r = sd_id128_from_string(optarg, &arg_uuid);
216                         if (r < 0) {
217                                 log_error("Invalid UUID: %s", optarg);
218                                 return r;
219                         }
220                         break;
221
222                 case 'S':
223                         arg_slice = strdup(optarg);
224                         if (!arg_slice)
225                                 return log_oom();
226
227                         break;
228
229                 case 'M':
230                         if (!hostname_is_valid(optarg)) {
231                                 log_error("Invalid machine name: %s", optarg);
232                                 return -EINVAL;
233                         }
234
235                         free(arg_machine);
236                         arg_machine = strdup(optarg);
237                         if (!arg_machine)
238                                 return log_oom();
239
240                         break;
241
242                 case ARG_READ_ONLY:
243                         arg_read_only = true;
244                         break;
245
246                 case ARG_CAPABILITY: {
247                         char *state, *word;
248                         size_t length;
249
250                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
251                                 cap_value_t cap;
252                                 char *t;
253
254                                 t = strndup(word, length);
255                                 if (!t)
256                                         return log_oom();
257
258                                 if (cap_from_name(t, &cap) < 0) {
259                                         log_error("Failed to parse capability %s.", t);
260                                         free(t);
261                                         return -EINVAL;
262                                 }
263
264                                 free(t);
265                                 arg_retain |= 1ULL << (uint64_t) cap;
266                         }
267
268                         break;
269                 }
270
271                 case 'j':
272                         arg_link_journal = LINK_GUEST;
273                         break;
274
275                 case ARG_LINK_JOURNAL:
276                         if (streq(optarg, "auto"))
277                                 arg_link_journal = LINK_AUTO;
278                         else if (streq(optarg, "no"))
279                                 arg_link_journal = LINK_NO;
280                         else if (streq(optarg, "guest"))
281                                 arg_link_journal = LINK_GUEST;
282                         else if (streq(optarg, "host"))
283                                 arg_link_journal = LINK_HOST;
284                         else {
285                                 log_error("Failed to parse link journal mode %s", optarg);
286                                 return -EINVAL;
287                         }
288
289                         break;
290
291                 case ARG_BIND:
292                 case ARG_BIND_RO: {
293                         _cleanup_free_ char *a = NULL, *b = NULL;
294                         char *e;
295                         char ***x;
296
297                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
298
299                         e = strchr(optarg, ':');
300                         if (e) {
301                                 a = strndup(optarg, e - optarg);
302                                 b = strdup(e + 1);
303                         } else {
304                                 a = strdup(optarg);
305                                 b = strdup(optarg);
306                         }
307
308                         if (!a || !b)
309                                 return log_oom();
310
311                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
312                                 log_error("Invalid bind mount specification: %s", optarg);
313                                 return -EINVAL;
314                         }
315
316                         r = strv_extend(x, a);
317                         if (r < 0)
318                                 return log_oom();
319
320                         r = strv_extend(x, b);
321                         if (r < 0)
322                                 return log_oom();
323
324                         break;
325                 }
326
327                 case '?':
328                         return -EINVAL;
329
330                 default:
331                         assert_not_reached("Unhandled option");
332                 }
333         }
334
335         return 1;
336 }
337
338 static int mount_all(const char *dest) {
339
340         typedef struct MountPoint {
341                 const char *what;
342                 const char *where;
343                 const char *type;
344                 const char *options;
345                 unsigned long flags;
346                 bool fatal;
347         } MountPoint;
348
349         static const MountPoint mount_table[] = {
350                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
351                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
352                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
353                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
354                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
355                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
356                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
357                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
358 #ifdef HAVE_SELINUX
359                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
360                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
361 #endif
362         };
363
364         unsigned k;
365         int r = 0;
366
367         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
368                 _cleanup_free_ char *where = NULL;
369                 int t;
370
371                 where = strjoin(dest, "/", mount_table[k].where, NULL);
372                 if (!where)
373                         return log_oom();
374
375                 t = path_is_mount_point(where, true);
376                 if (t < 0) {
377                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
378
379                         if (r == 0)
380                                 r = t;
381
382                         continue;
383                 }
384
385                 /* Skip this entry if it is not a remount. */
386                 if (mount_table[k].what && t > 0)
387                         continue;
388
389                 mkdir_p(where, 0755);
390
391                 if (mount(mount_table[k].what,
392                           where,
393                           mount_table[k].type,
394                           mount_table[k].flags,
395                           mount_table[k].options) < 0 &&
396                     mount_table[k].fatal) {
397
398                         log_error("mount(%s) failed: %m", where);
399
400                         if (r == 0)
401                                 r = -errno;
402                 }
403         }
404
405         return r;
406 }
407
408 static int mount_binds(const char *dest, char **l, unsigned long flags) {
409         char **x, **y;
410
411         STRV_FOREACH_PAIR(x, y, l) {
412                 _cleanup_free_ char *where = NULL;
413                 struct stat source_st, dest_st;
414
415                 if (stat(*x, &source_st) < 0) {
416                         log_error("failed to stat %s: %m", *x);
417                         return -errno;
418                 }
419
420                 where = strjoin(dest, "/", *y, NULL);
421                 if (!where)
422                         return log_oom();
423
424                 if (stat(where, &dest_st) == 0) {
425                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
426                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
427                                                 *x, where);
428                                 return -EINVAL;
429                         }
430                 } else {
431                         /* Create the mount point, but be conservative -- refuse to create block
432                          * and char devices. */
433                         if (S_ISDIR(source_st.st_mode))
434                                 mkdir_p_label(where, 0755);
435                         else if (S_ISFIFO(source_st.st_mode))
436                                 mkfifo(where, 0644);
437                         else if (S_ISSOCK(source_st.st_mode))
438                                 mknod(where, 0644 | S_IFSOCK, 0);
439                         else if (S_ISREG(source_st.st_mode))
440                                 touch(where);
441                         else {
442                                 log_error("Refusing to create mountpoint for file: %s", *x);
443                                 return -ENOTSUP;
444                         }
445                 }
446
447                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
448                         log_error("mount(%s) failed: %m", where);
449                         return -errno;
450                 }
451
452                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
453                         log_error("mount(%s) failed: %m", where);
454                         return -errno;
455                 }
456         }
457
458         return 0;
459 }
460
461 static int setup_timezone(const char *dest) {
462         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
463         char *z, *y;
464         int r;
465
466         assert(dest);
467
468         /* Fix the timezone, if possible */
469         r = readlink_malloc("/etc/localtime", &p);
470         if (r < 0) {
471                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
472                 return 0;
473         }
474
475         z = path_startswith(p, "../usr/share/zoneinfo/");
476         if (!z)
477                 z = path_startswith(p, "/usr/share/zoneinfo/");
478         if (!z) {
479                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
480                 return 0;
481         }
482
483         where = strappend(dest, "/etc/localtime");
484         if (!where)
485                 return log_oom();
486
487         r = readlink_malloc(where, &q);
488         if (r >= 0) {
489                 y = path_startswith(q, "../usr/share/zoneinfo/");
490                 if (!y)
491                         y = path_startswith(q, "/usr/share/zoneinfo/");
492
493
494                 /* Already pointing to the right place? Then do nothing .. */
495                 if (y && streq(y, z))
496                         return 0;
497         }
498
499         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
500         if (!check)
501                 return log_oom();
502
503         if (access(check, F_OK) < 0) {
504                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
505                 return 0;
506         }
507
508         what = strappend("../usr/share/zoneinfo/", z);
509         if (!what)
510                 return log_oom();
511
512         unlink(where);
513         if (symlink(what, where) < 0) {
514                 log_error("Failed to correct timezone of container: %m");
515                 return 0;
516         }
517
518         return 0;
519 }
520
521 static int setup_resolv_conf(const char *dest) {
522         char _cleanup_free_ *where = NULL;
523
524         assert(dest);
525
526         if (arg_private_network)
527                 return 0;
528
529         /* Fix resolv.conf, if possible */
530         where = strappend(dest, "/etc/resolv.conf");
531         if (!where)
532                 return log_oom();
533
534         /* We don't really care for the results of this really. If it
535          * fails, it fails, but meh... */
536         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
537
538         return 0;
539 }
540
541 static int setup_boot_id(const char *dest) {
542         _cleanup_free_ char *from = NULL, *to = NULL;
543         sd_id128_t rnd;
544         char as_uuid[37];
545         int r;
546
547         assert(dest);
548
549         /* Generate a new randomized boot ID, so that each boot-up of
550          * the container gets a new one */
551
552         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
553         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
554         if (!from || !to)
555                 return log_oom();
556
557         r = sd_id128_randomize(&rnd);
558         if (r < 0) {
559                 log_error("Failed to generate random boot id: %s", strerror(-r));
560                 return r;
561         }
562
563         snprintf(as_uuid, sizeof(as_uuid),
564                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
565                  SD_ID128_FORMAT_VAL(rnd));
566         char_array_0(as_uuid);
567
568         r = write_string_file(from, as_uuid);
569         if (r < 0) {
570                 log_error("Failed to write boot id: %s", strerror(-r));
571                 return r;
572         }
573
574         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
575                 log_error("Failed to bind mount boot id: %m");
576                 r = -errno;
577         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
578                 log_warning("Failed to make boot id read-only: %m");
579
580         unlink(from);
581         return r;
582 }
583
584 static int copy_devnodes(const char *dest) {
585
586         static const char devnodes[] =
587                 "null\0"
588                 "zero\0"
589                 "full\0"
590                 "random\0"
591                 "urandom\0"
592                 "tty\0";
593
594         const char *d;
595         int r = 0;
596         _cleanup_umask_ mode_t u;
597
598         assert(dest);
599
600         u = umask(0000);
601
602         NULSTR_FOREACH(d, devnodes) {
603                 struct stat st;
604                 _cleanup_free_ char *from = NULL, *to = NULL;
605
606                 asprintf(&from, "/dev/%s", d);
607                 asprintf(&to, "%s/dev/%s", dest, d);
608
609                 if (!from || !to) {
610                         log_oom();
611
612                         if (r == 0)
613                                 r = -ENOMEM;
614
615                         break;
616                 }
617
618                 if (stat(from, &st) < 0) {
619
620                         if (errno != ENOENT) {
621                                 log_error("Failed to stat %s: %m", from);
622                                 if (r == 0)
623                                         r = -errno;
624                         }
625
626                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
627
628                         log_error("%s is not a char or block device, cannot copy", from);
629                         if (r == 0)
630                                 r = -EIO;
631
632                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
633
634                         log_error("mknod(%s) failed: %m", dest);
635                         if (r == 0)
636                                 r = -errno;
637                 }
638         }
639
640         return r;
641 }
642
643 static int setup_ptmx(const char *dest) {
644         _cleanup_free_ char *p = NULL;
645
646         p = strappend(dest, "/dev/ptmx");
647         if (!p)
648                 return log_oom();
649
650         if (symlink("pts/ptmx", p) < 0) {
651                 log_error("Failed to create /dev/ptmx symlink: %m");
652                 return -errno;
653         }
654
655         return 0;
656 }
657
658 static int setup_dev_console(const char *dest, const char *console) {
659         struct stat st;
660         _cleanup_free_ char *to = NULL;
661         int r;
662         _cleanup_umask_ mode_t u;
663
664         assert(dest);
665         assert(console);
666
667         u = umask(0000);
668
669         if (stat(console, &st) < 0) {
670                 log_error("Failed to stat %s: %m", console);
671                 return -errno;
672
673         } else if (!S_ISCHR(st.st_mode)) {
674                 log_error("/dev/console is not a char device");
675                 return -EIO;
676         }
677
678         r = chmod_and_chown(console, 0600, 0, 0);
679         if (r < 0) {
680                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
681                 return r;
682         }
683
684         if (asprintf(&to, "%s/dev/console", dest) < 0)
685                 return log_oom();
686
687         /* We need to bind mount the right tty to /dev/console since
688          * ptys can only exist on pts file systems. To have something
689          * to bind mount things on we create a device node first, that
690          * has the right major/minor (note that the major minor
691          * doesn't actually matter here, since we mount it over
692          * anyway). */
693
694         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
695                 log_error("mknod() for /dev/console failed: %m");
696                 return -errno;
697         }
698
699         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
700                 log_error("Bind mount for /dev/console failed: %m");
701                 return -errno;
702         }
703
704         return 0;
705 }
706
707 static int setup_kmsg(const char *dest, int kmsg_socket) {
708         _cleanup_free_ char *from = NULL, *to = NULL;
709         int r, fd, k;
710         _cleanup_umask_ mode_t u;
711         union {
712                 struct cmsghdr cmsghdr;
713                 uint8_t buf[CMSG_SPACE(sizeof(int))];
714         } control = {};
715         struct msghdr mh = {
716                 .msg_control = &control,
717                 .msg_controllen = sizeof(control),
718         };
719         struct cmsghdr *cmsg;
720
721         assert(dest);
722         assert(kmsg_socket >= 0);
723
724         u = umask(0000);
725
726         /* We create the kmsg FIFO as /dev/kmsg, but immediately
727          * delete it after bind mounting it to /proc/kmsg. While FIFOs
728          * on the reading side behave very similar to /proc/kmsg,
729          * their writing side behaves differently from /dev/kmsg in
730          * that writing blocks when nothing is reading. In order to
731          * avoid any problems with containers deadlocking due to this
732          * we simply make /dev/kmsg unavailable to the container. */
733         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
734             asprintf(&to, "%s/proc/kmsg", dest) < 0)
735                 return log_oom();
736
737         if (mkfifo(from, 0600) < 0) {
738                 log_error("mkfifo() for /dev/kmsg failed: %m");
739                 return -errno;
740         }
741
742         r = chmod_and_chown(from, 0600, 0, 0);
743         if (r < 0) {
744                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
745                 return r;
746         }
747
748         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
749                 log_error("Bind mount for /proc/kmsg failed: %m");
750                 return -errno;
751         }
752
753         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
754         if (fd < 0) {
755                 log_error("Failed to open fifo: %m");
756                 return -errno;
757         }
758
759         cmsg = CMSG_FIRSTHDR(&mh);
760         cmsg->cmsg_level = SOL_SOCKET;
761         cmsg->cmsg_type = SCM_RIGHTS;
762         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
763         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
764
765         mh.msg_controllen = cmsg->cmsg_len;
766
767         /* Store away the fd in the socket, so that it stays open as
768          * long as we run the child */
769         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
770         close_nointr_nofail(fd);
771
772         if (k < 0) {
773                 log_error("Failed to send FIFO fd: %m");
774                 return -errno;
775         }
776
777         /* And now make the FIFO unavailable as /dev/kmsg... */
778         unlink(from);
779         return 0;
780 }
781
782 static int setup_hostname(void) {
783
784         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
785                 return -errno;
786
787         return 0;
788 }
789
790 static int setup_journal(const char *directory) {
791         sd_id128_t machine_id;
792         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
793         char *id;
794         int r;
795
796         if (arg_link_journal == LINK_NO)
797                 return 0;
798
799         p = strappend(directory, "/etc/machine-id");
800         if (!p)
801                 return log_oom();
802
803         r = read_one_line_file(p, &b);
804         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
805                 return 0;
806         else if (r < 0) {
807                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
808                 return r;
809         }
810
811         id = strstrip(b);
812         if (isempty(id) && arg_link_journal == LINK_AUTO)
813                 return 0;
814
815         /* Verify validity */
816         r = sd_id128_from_string(id, &machine_id);
817         if (r < 0) {
818                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
819                 return r;
820         }
821
822         free(p);
823         p = strappend("/var/log/journal/", id);
824         q = strjoin(directory, "/var/log/journal/", id, NULL);
825         if (!p || !q)
826                 return log_oom();
827
828         if (path_is_mount_point(p, false) > 0) {
829                 if (arg_link_journal != LINK_AUTO) {
830                         log_error("%s: already a mount point, refusing to use for journal", p);
831                         return -EEXIST;
832                 }
833
834                 return 0;
835         }
836
837         if (path_is_mount_point(q, false) > 0) {
838                 if (arg_link_journal != LINK_AUTO) {
839                         log_error("%s: already a mount point, refusing to use for journal", q);
840                         return -EEXIST;
841                 }
842
843                 return 0;
844         }
845
846         r = readlink_and_make_absolute(p, &d);
847         if (r >= 0) {
848                 if ((arg_link_journal == LINK_GUEST ||
849                      arg_link_journal == LINK_AUTO) &&
850                     path_equal(d, q)) {
851
852                         r = mkdir_p(q, 0755);
853                         if (r < 0)
854                                 log_warning("failed to create directory %s: %m", q);
855                         return 0;
856                 }
857
858                 if (unlink(p) < 0) {
859                         log_error("Failed to remove symlink %s: %m", p);
860                         return -errno;
861                 }
862         } else if (r == -EINVAL) {
863
864                 if (arg_link_journal == LINK_GUEST &&
865                     rmdir(p) < 0) {
866
867                         if (errno == ENOTDIR) {
868                                 log_error("%s already exists and is neither a symlink nor a directory", p);
869                                 return r;
870                         } else {
871                                 log_error("Failed to remove %s: %m", p);
872                                 return -errno;
873                         }
874                 }
875         } else if (r != -ENOENT) {
876                 log_error("readlink(%s) failed: %m", p);
877                 return r;
878         }
879
880         if (arg_link_journal == LINK_GUEST) {
881
882                 if (symlink(q, p) < 0) {
883                         log_error("Failed to symlink %s to %s: %m", q, p);
884                         return -errno;
885                 }
886
887                 r = mkdir_p(q, 0755);
888                 if (r < 0)
889                         log_warning("failed to create directory %s: %m", q);
890                 return 0;
891         }
892
893         if (arg_link_journal == LINK_HOST) {
894                 r = mkdir_p(p, 0755);
895                 if (r < 0) {
896                         log_error("Failed to create %s: %m", p);
897                         return r;
898                 }
899
900         } else if (access(p, F_OK) < 0)
901                 return 0;
902
903         if (dir_is_empty(q) == 0) {
904                 log_error("%s not empty.", q);
905                 return -ENOTEMPTY;
906         }
907
908         r = mkdir_p(q, 0755);
909         if (r < 0) {
910                 log_error("Failed to create %s: %m", q);
911                 return r;
912         }
913
914         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
915                 log_error("Failed to bind mount journal from host into guest: %m");
916                 return -errno;
917         }
918
919         return 0;
920 }
921
922 static int drop_capabilities(void) {
923         return capability_bounding_set_drop(~arg_retain, false);
924 }
925
926 static int register_machine(void) {
927         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
928         _cleanup_bus_unref_ sd_bus *bus = NULL;
929         int r;
930
931         r = sd_bus_open_system(&bus);
932         if (r < 0) {
933                 log_error("Failed to open system bus: %s", strerror(-r));
934                 return r;
935         }
936
937         r = sd_bus_call_method(
938                         bus,
939                         "org.freedesktop.machine1",
940                         "/org/freedesktop/machine1",
941                         "org.freedesktop.machine1.Manager",
942                         "CreateMachine",
943                         &error,
944                         NULL,
945                         "sayssusa(sv)",
946                         arg_machine,
947                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
948                         "nspawn",
949                         "container",
950                         (uint32_t) 0,
951                         strempty(arg_directory),
952                         !isempty(arg_slice), "Slice", "s", arg_slice);
953         if (r < 0) {
954                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
955                 return r;
956         }
957
958         return 0;
959 }
960
961 static int terminate_machine(pid_t pid) {
962         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
963         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
964         _cleanup_bus_unref_ sd_bus *bus = NULL;
965         const char *path;
966         int r;
967
968         r = sd_bus_open_system(&bus);
969         if (r < 0) {
970                 log_error("Failed to open system bus: %s", strerror(-r));
971                 return r;
972         }
973
974         r = sd_bus_call_method(
975                         bus,
976                         "org.freedesktop.machine1",
977                         "/org/freedesktop/machine1",
978                         "org.freedesktop.machine1.Manager",
979                         "GetMachineByPID",
980                         &error,
981                         &reply,
982                         "u",
983                         (uint32_t) pid);
984         if (r < 0) {
985                 /* Note that the machine might already have been
986                  * cleaned up automatically, hence don't consider it a
987                  * failure if we cannot get the machine object. */
988                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
989                 return 0;
990         }
991
992         r = sd_bus_message_read(reply, "o", &path);
993         if (r < 0) {
994                 log_error("Failed to parse GetMachineByPID() reply: %s", bus_error_message(&error, r));
995                 return r;
996         }
997
998         r = sd_bus_call_method(
999                         bus,
1000                         "org.freedesktop.machine1",
1001                         path,
1002                         "org.freedesktop.machine1.Machine",
1003                         "Terminate",
1004                         &error,
1005                         NULL,
1006                         NULL);
1007         if (r < 0) {
1008                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1009                 return 0;
1010         }
1011
1012         return 0;
1013 }
1014
1015 static bool audit_enabled(void) {
1016         int fd;
1017
1018         fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
1019         if (fd >= 0) {
1020                 close_nointr_nofail(fd);
1021                 return true;
1022         }
1023         return false;
1024 }
1025
1026 int main(int argc, char *argv[]) {
1027         pid_t pid = 0;
1028         int r = EXIT_FAILURE, k;
1029         _cleanup_close_ int master = -1;
1030         int n_fd_passed;
1031         const char *console = NULL;
1032         sigset_t mask;
1033         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1034         _cleanup_fdset_free_ FDSet *fds = NULL;
1035
1036         log_parse_environment();
1037         log_open();
1038
1039         k = parse_argv(argc, argv);
1040         if (k < 0)
1041                 goto finish;
1042         else if (k == 0) {
1043                 r = EXIT_SUCCESS;
1044                 goto finish;
1045         }
1046
1047         if (arg_directory) {
1048                 char *p;
1049
1050                 p = path_make_absolute_cwd(arg_directory);
1051                 free(arg_directory);
1052                 arg_directory = p;
1053         } else
1054                 arg_directory = get_current_dir_name();
1055
1056         if (!arg_directory) {
1057                 log_error("Failed to determine path, please use -D.");
1058                 goto finish;
1059         }
1060
1061         path_kill_slashes(arg_directory);
1062
1063         if (!arg_machine) {
1064                 arg_machine = strdup(path_get_file_name(arg_directory));
1065                 if (!arg_machine) {
1066                         log_oom();
1067                         goto finish;
1068                 }
1069
1070                 hostname_cleanup(arg_machine, false);
1071                 if (isempty(arg_machine)) {
1072                         log_error("Failed to determine machine name automatically, please use -M.");
1073                         goto finish;
1074                 }
1075         }
1076
1077         if (geteuid() != 0) {
1078                 log_error("Need to be root.");
1079                 goto finish;
1080         }
1081
1082         if (sd_booted() <= 0) {
1083                 log_error("Not running on a systemd system.");
1084                 goto finish;
1085         }
1086
1087         if (arg_boot && audit_enabled()) {
1088                 log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
1089                             "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
1090                             "line before using systemd-nspawn. Sleeping for 5s...\n");
1091                 sleep(5);
1092         }
1093
1094         if (path_equal(arg_directory, "/")) {
1095                 log_error("Spawning container on root directory not supported.");
1096                 goto finish;
1097         }
1098
1099         if (path_is_os_tree(arg_directory) <= 0) {
1100                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1101                 goto finish;
1102         }
1103
1104         log_close();
1105         n_fd_passed = sd_listen_fds(false);
1106         if (n_fd_passed > 0) {
1107                 k = fdset_new_listen_fds(&fds, false);
1108                 if (k < 0) {
1109                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1110                         goto finish;
1111                 }
1112         }
1113         fdset_close_others(fds);
1114         log_open();
1115
1116         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1117         if (master < 0) {
1118                 log_error("Failed to acquire pseudo tty: %m");
1119                 goto finish;
1120         }
1121
1122         console = ptsname(master);
1123         if (!console) {
1124                 log_error("Failed to determine tty name: %m");
1125                 goto finish;
1126         }
1127
1128         log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1129
1130         if (unlockpt(master) < 0) {
1131                 log_error("Failed to unlock tty: %m");
1132                 goto finish;
1133         }
1134
1135         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1136                 log_error("Failed to create kmsg socket pair.");
1137                 goto finish;
1138         }
1139
1140         sd_notify(0, "READY=1");
1141
1142         assert_se(sigemptyset(&mask) == 0);
1143         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1144         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1145
1146         for (;;) {
1147                 siginfo_t status;
1148
1149                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1150                 if (pid < 0) {
1151                         if (errno == EINVAL)
1152                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1153                         else
1154                                 log_error("clone() failed: %m");
1155
1156                         goto finish;
1157                 }
1158
1159                 if (pid == 0) {
1160                         /* child */
1161                         const char *home = NULL;
1162                         uid_t uid = (uid_t) -1;
1163                         gid_t gid = (gid_t) -1;
1164                         unsigned n_env = 2;
1165                         const char *envp[] = {
1166                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1167                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1168                                 NULL, /* TERM */
1169                                 NULL, /* HOME */
1170                                 NULL, /* USER */
1171                                 NULL, /* LOGNAME */
1172                                 NULL, /* container_uuid */
1173                                 NULL, /* LISTEN_FDS */
1174                                 NULL, /* LISTEN_PID */
1175                                 NULL
1176                         };
1177
1178                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1179                         if (envp[n_env])
1180                                 n_env ++;
1181
1182                         close_nointr_nofail(master);
1183                         master = -1;
1184
1185                         close_nointr(STDIN_FILENO);
1186                         close_nointr(STDOUT_FILENO);
1187                         close_nointr(STDERR_FILENO);
1188
1189                         close_nointr_nofail(kmsg_socket_pair[0]);
1190                         kmsg_socket_pair[0] = -1;
1191
1192                         reset_all_signal_handlers();
1193
1194                         assert_se(sigemptyset(&mask) == 0);
1195                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1196
1197                         k = open_terminal(console, O_RDWR);
1198                         if (k != STDIN_FILENO) {
1199                                 if (k >= 0) {
1200                                         close_nointr_nofail(k);
1201                                         k = -EINVAL;
1202                                 }
1203
1204                                 log_error("Failed to open console: %s", strerror(-k));
1205                                 goto child_fail;
1206                         }
1207
1208                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1209                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1210                                 log_error("Failed to duplicate console: %m");
1211                                 goto child_fail;
1212                         }
1213
1214                         if (setsid() < 0) {
1215                                 log_error("setsid() failed: %m");
1216                                 goto child_fail;
1217                         }
1218
1219                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1220                                 log_error("PR_SET_PDEATHSIG failed: %m");
1221                                 goto child_fail;
1222                         }
1223
1224                         r = register_machine();
1225                         if (r < 0)
1226                                 goto finish;
1227
1228                         /* Mark everything as slave, so that we still
1229                          * receive mounts from the real root, but don't
1230                          * propagate mounts to the real root. */
1231                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1232                                 log_error("MS_SLAVE|MS_REC failed: %m");
1233                                 goto child_fail;
1234                         }
1235
1236                         /* Turn directory into bind mount */
1237                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1238                                 log_error("Failed to make bind mount.");
1239                                 goto child_fail;
1240                         }
1241
1242                         if (arg_read_only)
1243                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1244                                         log_error("Failed to make read-only.");
1245                                         goto child_fail;
1246                                 }
1247
1248                         if (mount_all(arg_directory) < 0)
1249                                 goto child_fail;
1250
1251                         if (copy_devnodes(arg_directory) < 0)
1252                                 goto child_fail;
1253
1254                         if (setup_ptmx(arg_directory) < 0)
1255                                 goto child_fail;
1256
1257                         dev_setup(arg_directory);
1258
1259                         if (setup_dev_console(arg_directory, console) < 0)
1260                                 goto child_fail;
1261
1262                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1263                                 goto child_fail;
1264
1265                         close_nointr_nofail(kmsg_socket_pair[1]);
1266                         kmsg_socket_pair[1] = -1;
1267
1268                         if (setup_boot_id(arg_directory) < 0)
1269                                 goto child_fail;
1270
1271                         if (setup_timezone(arg_directory) < 0)
1272                                 goto child_fail;
1273
1274                         if (setup_resolv_conf(arg_directory) < 0)
1275                                 goto child_fail;
1276
1277                         if (setup_journal(arg_directory) < 0)
1278                                 goto child_fail;
1279
1280                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1281                                 goto child_fail;
1282
1283                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1284                                 goto child_fail;
1285
1286                         if (chdir(arg_directory) < 0) {
1287                                 log_error("chdir(%s) failed: %m", arg_directory);
1288                                 goto child_fail;
1289                         }
1290
1291                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1292                                 log_error("mount(MS_MOVE) failed: %m");
1293                                 goto child_fail;
1294                         }
1295
1296                         if (chroot(".") < 0) {
1297                                 log_error("chroot() failed: %m");
1298                                 goto child_fail;
1299                         }
1300
1301                         if (chdir("/") < 0) {
1302                                 log_error("chdir() failed: %m");
1303                                 goto child_fail;
1304                         }
1305
1306                         umask(0022);
1307
1308                         loopback_setup();
1309
1310                         if (drop_capabilities() < 0) {
1311                                 log_error("drop_capabilities() failed: %m");
1312                                 goto child_fail;
1313                         }
1314
1315                         if (arg_user) {
1316
1317                                 /* Note that this resolves user names
1318                                  * inside the container, and hence
1319                                  * accesses the NSS modules from the
1320                                  * container and not the host. This is
1321                                  * a bit weird... */
1322
1323                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1324                                         log_error("get_user_creds() failed: %m");
1325                                         goto child_fail;
1326                                 }
1327
1328                                 if (mkdir_parents_label(home, 0775) < 0) {
1329                                         log_error("mkdir_parents_label() failed: %m");
1330                                         goto child_fail;
1331                                 }
1332
1333                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1334                                         log_error("mkdir_safe_label() failed: %m");
1335                                         goto child_fail;
1336                                 }
1337
1338                                 if (initgroups((const char*)arg_user, gid) < 0) {
1339                                         log_error("initgroups() failed: %m");
1340                                         goto child_fail;
1341                                 }
1342
1343                                 if (setresgid(gid, gid, gid) < 0) {
1344                                         log_error("setregid() failed: %m");
1345                                         goto child_fail;
1346                                 }
1347
1348                                 if (setresuid(uid, uid, uid) < 0) {
1349                                         log_error("setreuid() failed: %m");
1350                                         goto child_fail;
1351                                 }
1352                         } else {
1353                                 /* Reset everything fully to 0, just in case */
1354
1355                                 if (setgroups(0, NULL) < 0) {
1356                                         log_error("setgroups() failed: %m");
1357                                         goto child_fail;
1358                                 }
1359
1360                                 if (setresgid(0, 0, 0) < 0) {
1361                                         log_error("setregid() failed: %m");
1362                                         goto child_fail;
1363                                 }
1364
1365                                 if (setresuid(0, 0, 0) < 0) {
1366                                         log_error("setreuid() failed: %m");
1367                                         goto child_fail;
1368                                 }
1369                         }
1370
1371                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1372                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1373                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1374                                 log_oom();
1375                                 goto child_fail;
1376                         }
1377
1378                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1379                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1380                                         log_oom();
1381                                         goto child_fail;
1382                                 }
1383                         }
1384
1385                         if (fdset_size(fds) > 0) {
1386                                 k = fdset_cloexec(fds, false);
1387                                 if (k < 0) {
1388                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1389                                         goto child_fail;
1390                                 }
1391
1392                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1393                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1394                                         log_oom();
1395                                         goto child_fail;
1396                                 }
1397                         }
1398
1399                         setup_hostname();
1400
1401                         if (arg_boot) {
1402                                 char **a;
1403                                 size_t l;
1404
1405                                 /* Automatically search for the init system */
1406
1407                                 l = 1 + argc - optind;
1408                                 a = newa(char*, l + 1);
1409                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1410
1411                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1412                                 execve(a[0], a, (char**) envp);
1413
1414                                 a[0] = (char*) "/lib/systemd/systemd";
1415                                 execve(a[0], a, (char**) envp);
1416
1417                                 a[0] = (char*) "/sbin/init";
1418                                 execve(a[0], a, (char**) envp);
1419                         } else if (argc > optind)
1420                                 execvpe(argv[optind], argv + optind, (char**) envp);
1421                         else {
1422                                 chdir(home ? home : "/root");
1423                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1424                         }
1425
1426                         log_error("execv() failed: %m");
1427
1428                 child_fail:
1429                         _exit(EXIT_FAILURE);
1430                 }
1431
1432                 fdset_free(fds);
1433                 fds = NULL;
1434
1435                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1436                 if (k < 0) {
1437                         r = EXIT_FAILURE;
1438                         break;
1439                 }
1440
1441                 putc('\n', stdout);
1442
1443                 /* Kill if it is not dead yet anyway */
1444                 terminate_machine(pid);
1445
1446                 /* Redundant, but better safe than sorry */
1447                 kill(pid, SIGKILL);
1448
1449                 k = wait_for_terminate(pid, &status);
1450                 pid = 0;
1451
1452                 if (k < 0) {
1453                         r = EXIT_FAILURE;
1454                         break;
1455                 }
1456
1457                 if (status.si_code == CLD_EXITED) {
1458                         r = status.si_status;
1459                         if (status.si_status != 0) {
1460                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
1461                                 break;
1462                         }
1463
1464                         log_debug("Container %s exited successfully.", arg_machine);
1465                         break;
1466                 } else if (status.si_code == CLD_KILLED &&
1467                            status.si_status == SIGINT) {
1468                         log_info("Container %s has been shut down.", arg_machine);
1469                         r = 0;
1470                         break;
1471                 } else if (status.si_code == CLD_KILLED &&
1472                            status.si_status == SIGHUP) {
1473                         log_info("Container %s is being rebooted.", arg_machine);
1474                         continue;
1475                 } else if (status.si_code == CLD_KILLED ||
1476                            status.si_code == CLD_DUMPED) {
1477
1478                         log_error("Container %s terminated by signal %s.", arg_machine,  signal_to_string(status.si_status));
1479                         r = EXIT_FAILURE;
1480                         break;
1481                 } else {
1482                         log_error("Container %s failed due to unknown reason.", arg_machine);
1483                         r = EXIT_FAILURE;
1484                         break;
1485                 }
1486         }
1487
1488 finish:
1489         if (pid > 0)
1490                 kill(pid, SIGKILL);
1491
1492         free(arg_directory);
1493         free(arg_machine);
1494
1495         return r;
1496 }