chiark / gitweb /
nspawn: explicitly terminate machines when we exit nspawn
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43
44 #include "sd-daemon.h"
45 #include "sd-bus.h"
46 #include "sd-id128.h"
47 #include "log.h"
48 #include "util.h"
49 #include "mkdir.h"
50 #include "macro.h"
51 #include "audit.h"
52 #include "missing.h"
53 #include "cgroup-util.h"
54 #include "strv.h"
55 #include "path-util.h"
56 #include "loopback-setup.h"
57 #include "dev-setup.h"
58 #include "fdset.h"
59 #include "build.h"
60 #include "fileio.h"
61 #include "bus-util.h"
62 #include "bus-error.h"
63 #include "ptyfwd.h"
64
65 #ifndef TTY_GID
66 #define TTY_GID 5
67 #endif
68
69 typedef enum LinkJournal {
70         LINK_NO,
71         LINK_AUTO,
72         LINK_HOST,
73         LINK_GUEST
74 } LinkJournal;
75
76 static char *arg_directory = NULL;
77 static char *arg_user = NULL;
78 static sd_id128_t arg_uuid = {};
79 static char *arg_machine = NULL;
80 static const char *arg_slice = NULL;
81 static bool arg_private_network = false;
82 static bool arg_read_only = false;
83 static bool arg_boot = false;
84 static LinkJournal arg_link_journal = LINK_AUTO;
85 static uint64_t arg_retain =
86         (1ULL << CAP_CHOWN) |
87         (1ULL << CAP_DAC_OVERRIDE) |
88         (1ULL << CAP_DAC_READ_SEARCH) |
89         (1ULL << CAP_FOWNER) |
90         (1ULL << CAP_FSETID) |
91         (1ULL << CAP_IPC_OWNER) |
92         (1ULL << CAP_KILL) |
93         (1ULL << CAP_LEASE) |
94         (1ULL << CAP_LINUX_IMMUTABLE) |
95         (1ULL << CAP_NET_BIND_SERVICE) |
96         (1ULL << CAP_NET_BROADCAST) |
97         (1ULL << CAP_NET_RAW) |
98         (1ULL << CAP_SETGID) |
99         (1ULL << CAP_SETFCAP) |
100         (1ULL << CAP_SETPCAP) |
101         (1ULL << CAP_SETUID) |
102         (1ULL << CAP_SYS_ADMIN) |
103         (1ULL << CAP_SYS_CHROOT) |
104         (1ULL << CAP_SYS_NICE) |
105         (1ULL << CAP_SYS_PTRACE) |
106         (1ULL << CAP_SYS_TTY_CONFIG) |
107         (1ULL << CAP_SYS_RESOURCE) |
108         (1ULL << CAP_SYS_BOOT) |
109         (1ULL << CAP_AUDIT_WRITE) |
110         (1ULL << CAP_AUDIT_CONTROL);
111 static char **arg_bind = NULL;
112 static char **arg_bind_ro = NULL;
113
114 static int help(void) {
115
116         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
117                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
118                "  -h --help                Show this help\n"
119                "     --version             Print version string\n"
120                "  -D --directory=NAME      Root directory for the container\n"
121                "  -b --boot                Boot up full system (i.e. invoke init)\n"
122                "  -u --user=USER           Run the command under specified user or uid\n"
123                "     --uuid=UUID           Set a specific machine UUID for the container\n"
124                "  -M --machine=NAME        Set the machine name for the container\n"
125                "  -S --slice=SLICE         Place the container in the specified slice\n"
126                "     --private-network     Disable network in container\n"
127                "     --read-only           Mount the root directory read-only\n"
128                "     --capability=CAP      In addition to the default, retain specified\n"
129                "                           capability\n"
130                "     --link-journal=MODE   Link up guest journal, one of no, auto, guest, host\n"
131                "  -j                       Equivalent to --link-journal=host\n"
132                "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
133                "                           the container\n"
134                "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n",
135                program_invocation_short_name);
136
137         return 0;
138 }
139
140 static int parse_argv(int argc, char *argv[]) {
141
142         enum {
143                 ARG_VERSION = 0x100,
144                 ARG_PRIVATE_NETWORK,
145                 ARG_UUID,
146                 ARG_READ_ONLY,
147                 ARG_CAPABILITY,
148                 ARG_LINK_JOURNAL,
149                 ARG_BIND,
150                 ARG_BIND_RO
151         };
152
153         static const struct option options[] = {
154                 { "help",            no_argument,       NULL, 'h'                 },
155                 { "version",         no_argument,       NULL, ARG_VERSION         },
156                 { "directory",       required_argument, NULL, 'D'                 },
157                 { "user",            required_argument, NULL, 'u'                 },
158                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
159                 { "boot",            no_argument,       NULL, 'b'                 },
160                 { "uuid",            required_argument, NULL, ARG_UUID            },
161                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
162                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
163                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
164                 { "bind",            required_argument, NULL, ARG_BIND            },
165                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
166                 { "machine",         required_argument, NULL, 'M'                 },
167                 { "slice",           required_argument, NULL, 'S'                 },
168                 { NULL,              0,                 NULL, 0                   }
169         };
170
171         int c, r;
172
173         assert(argc >= 0);
174         assert(argv);
175
176         while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
177
178                 switch (c) {
179
180                 case 'h':
181                         help();
182                         return 0;
183
184                 case ARG_VERSION:
185                         puts(PACKAGE_STRING);
186                         puts(SYSTEMD_FEATURES);
187                         return 0;
188
189                 case 'D':
190                         free(arg_directory);
191                         arg_directory = canonicalize_file_name(optarg);
192                         if (!arg_directory) {
193                                 log_error("Failed to canonicalize root directory.");
194                                 return -ENOMEM;
195                         }
196
197                         break;
198
199                 case 'u':
200                         free(arg_user);
201                         arg_user = strdup(optarg);
202                         if (!arg_user)
203                                 return log_oom();
204
205                         break;
206
207                 case ARG_PRIVATE_NETWORK:
208                         arg_private_network = true;
209                         break;
210
211                 case 'b':
212                         arg_boot = true;
213                         break;
214
215                 case ARG_UUID:
216                         r = sd_id128_from_string(optarg, &arg_uuid);
217                         if (r < 0) {
218                                 log_error("Invalid UUID: %s", optarg);
219                                 return r;
220                         }
221                         break;
222
223                 case 'S':
224                         arg_slice = strdup(optarg);
225                         if (!arg_slice)
226                                 return log_oom();
227
228                         break;
229
230                 case 'M':
231                         if (!hostname_is_valid(optarg)) {
232                                 log_error("Invalid machine name: %s", optarg);
233                                 return -EINVAL;
234                         }
235
236                         free(arg_machine);
237                         arg_machine = strdup(optarg);
238                         if (!arg_machine)
239                                 return log_oom();
240
241                         break;
242
243                 case ARG_READ_ONLY:
244                         arg_read_only = true;
245                         break;
246
247                 case ARG_CAPABILITY: {
248                         char *state, *word;
249                         size_t length;
250
251                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
252                                 cap_value_t cap;
253                                 char *t;
254
255                                 t = strndup(word, length);
256                                 if (!t)
257                                         return log_oom();
258
259                                 if (cap_from_name(t, &cap) < 0) {
260                                         log_error("Failed to parse capability %s.", t);
261                                         free(t);
262                                         return -EINVAL;
263                                 }
264
265                                 free(t);
266                                 arg_retain |= 1ULL << (uint64_t) cap;
267                         }
268
269                         break;
270                 }
271
272                 case 'j':
273                         arg_link_journal = LINK_GUEST;
274                         break;
275
276                 case ARG_LINK_JOURNAL:
277                         if (streq(optarg, "auto"))
278                                 arg_link_journal = LINK_AUTO;
279                         else if (streq(optarg, "no"))
280                                 arg_link_journal = LINK_NO;
281                         else if (streq(optarg, "guest"))
282                                 arg_link_journal = LINK_GUEST;
283                         else if (streq(optarg, "host"))
284                                 arg_link_journal = LINK_HOST;
285                         else {
286                                 log_error("Failed to parse link journal mode %s", optarg);
287                                 return -EINVAL;
288                         }
289
290                         break;
291
292                 case ARG_BIND:
293                 case ARG_BIND_RO: {
294                         _cleanup_free_ char *a = NULL, *b = NULL;
295                         char *e;
296                         char ***x;
297
298                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
299
300                         e = strchr(optarg, ':');
301                         if (e) {
302                                 a = strndup(optarg, e - optarg);
303                                 b = strdup(e + 1);
304                         } else {
305                                 a = strdup(optarg);
306                                 b = strdup(optarg);
307                         }
308
309                         if (!a || !b)
310                                 return log_oom();
311
312                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
313                                 log_error("Invalid bind mount specification: %s", optarg);
314                                 return -EINVAL;
315                         }
316
317                         r = strv_extend(x, a);
318                         if (r < 0)
319                                 return log_oom();
320
321                         r = strv_extend(x, b);
322                         if (r < 0)
323                                 return log_oom();
324
325                         break;
326                 }
327
328                 case '?':
329                         return -EINVAL;
330
331                 default:
332                         log_error("Unknown option code %c", c);
333                         return -EINVAL;
334                 }
335         }
336
337         return 1;
338 }
339
340 static int mount_all(const char *dest) {
341
342         typedef struct MountPoint {
343                 const char *what;
344                 const char *where;
345                 const char *type;
346                 const char *options;
347                 unsigned long flags;
348                 bool fatal;
349         } MountPoint;
350
351         static const MountPoint mount_table[] = {
352                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
353                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
354                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
355                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
356                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
357                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
358                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
359                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
360 #ifdef HAVE_SELINUX
361                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
362                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
363 #endif
364         };
365
366         unsigned k;
367         int r = 0;
368
369         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
370                 _cleanup_free_ char *where = NULL;
371                 int t;
372
373                 where = strjoin(dest, "/", mount_table[k].where, NULL);
374                 if (!where)
375                         return log_oom();
376
377                 t = path_is_mount_point(where, true);
378                 if (t < 0) {
379                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
380
381                         if (r == 0)
382                                 r = t;
383
384                         continue;
385                 }
386
387                 /* Skip this entry if it is not a remount. */
388                 if (mount_table[k].what && t > 0)
389                         continue;
390
391                 mkdir_p(where, 0755);
392
393                 if (mount(mount_table[k].what,
394                           where,
395                           mount_table[k].type,
396                           mount_table[k].flags,
397                           mount_table[k].options) < 0 &&
398                     mount_table[k].fatal) {
399
400                         log_error("mount(%s) failed: %m", where);
401
402                         if (r == 0)
403                                 r = -errno;
404                 }
405         }
406
407         return r;
408 }
409
410 static int mount_binds(const char *dest, char **l, unsigned long flags) {
411         char **x, **y;
412
413         STRV_FOREACH_PAIR(x, y, l) {
414                 _cleanup_free_ char *where = NULL;
415                 struct stat source_st, dest_st;
416
417                 if (stat(*x, &source_st) < 0) {
418                         log_error("failed to stat %s: %m", *x);
419                         return -errno;
420                 }
421
422                 where = strjoin(dest, "/", *y, NULL);
423                 if (!where)
424                         return log_oom();
425
426                 if (stat(where, &dest_st) == 0) {
427                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
428                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
429                                                 *x, where);
430                                 return -EINVAL;
431                         }
432                 } else {
433                         /* Create the mount point, but be conservative -- refuse to create block
434                          * and char devices. */
435                         if (S_ISDIR(source_st.st_mode))
436                                 mkdir_p_label(where, 0755);
437                         else if (S_ISFIFO(source_st.st_mode))
438                                 mkfifo(where, 0644);
439                         else if (S_ISSOCK(source_st.st_mode))
440                                 mknod(where, 0644 | S_IFSOCK, 0);
441                         else if (S_ISREG(source_st.st_mode))
442                                 touch(where);
443                         else {
444                                 log_error("Refusing to create mountpoint for file: %s", *x);
445                                 return -ENOTSUP;
446                         }
447                 }
448
449                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
450                         log_error("mount(%s) failed: %m", where);
451                         return -errno;
452                 }
453
454                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
455                         log_error("mount(%s) failed: %m", where);
456                         return -errno;
457                 }
458         }
459
460         return 0;
461 }
462
463 static int setup_timezone(const char *dest) {
464         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
465         char *z, *y;
466         int r;
467
468         assert(dest);
469
470         /* Fix the timezone, if possible */
471         r = readlink_malloc("/etc/localtime", &p);
472         if (r < 0) {
473                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
474                 return 0;
475         }
476
477         z = path_startswith(p, "../usr/share/zoneinfo/");
478         if (!z)
479                 z = path_startswith(p, "/usr/share/zoneinfo/");
480         if (!z) {
481                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
482                 return 0;
483         }
484
485         where = strappend(dest, "/etc/localtime");
486         if (!where)
487                 return log_oom();
488
489         r = readlink_malloc(where, &q);
490         if (r >= 0) {
491                 y = path_startswith(q, "../usr/share/zoneinfo/");
492                 if (!y)
493                         y = path_startswith(q, "/usr/share/zoneinfo/");
494
495
496                 /* Already pointing to the right place? Then do nothing .. */
497                 if (y && streq(y, z))
498                         return 0;
499         }
500
501         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
502         if (!check)
503                 return log_oom();
504
505         if (access(check, F_OK) < 0) {
506                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
507                 return 0;
508         }
509
510         what = strappend("../usr/share/zoneinfo/", z);
511         if (!what)
512                 return log_oom();
513
514         unlink(where);
515         if (symlink(what, where) < 0) {
516                 log_error("Failed to correct timezone of container: %m");
517                 return 0;
518         }
519
520         return 0;
521 }
522
523 static int setup_resolv_conf(const char *dest) {
524         char _cleanup_free_ *where = NULL;
525
526         assert(dest);
527
528         if (arg_private_network)
529                 return 0;
530
531         /* Fix resolv.conf, if possible */
532         where = strappend(dest, "/etc/resolv.conf");
533         if (!where)
534                 return log_oom();
535
536         /* We don't really care for the results of this really. If it
537          * fails, it fails, but meh... */
538         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
539
540         return 0;
541 }
542
543 static int setup_boot_id(const char *dest) {
544         _cleanup_free_ char *from = NULL, *to = NULL;
545         sd_id128_t rnd;
546         char as_uuid[37];
547         int r;
548
549         assert(dest);
550
551         /* Generate a new randomized boot ID, so that each boot-up of
552          * the container gets a new one */
553
554         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
555         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
556         if (!from || !to)
557                 return log_oom();
558
559         r = sd_id128_randomize(&rnd);
560         if (r < 0) {
561                 log_error("Failed to generate random boot id: %s", strerror(-r));
562                 return r;
563         }
564
565         snprintf(as_uuid, sizeof(as_uuid),
566                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
567                  SD_ID128_FORMAT_VAL(rnd));
568         char_array_0(as_uuid);
569
570         r = write_string_file(from, as_uuid);
571         if (r < 0) {
572                 log_error("Failed to write boot id: %s", strerror(-r));
573                 return r;
574         }
575
576         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
577                 log_error("Failed to bind mount boot id: %m");
578                 r = -errno;
579         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
580                 log_warning("Failed to make boot id read-only: %m");
581
582         unlink(from);
583         return r;
584 }
585
586 static int copy_devnodes(const char *dest) {
587
588         static const char devnodes[] =
589                 "null\0"
590                 "zero\0"
591                 "full\0"
592                 "random\0"
593                 "urandom\0"
594                 "tty\0";
595
596         const char *d;
597         int r = 0;
598         _cleanup_umask_ mode_t u;
599
600         assert(dest);
601
602         u = umask(0000);
603
604         NULSTR_FOREACH(d, devnodes) {
605                 struct stat st;
606                 _cleanup_free_ char *from = NULL, *to = NULL;
607
608                 asprintf(&from, "/dev/%s", d);
609                 asprintf(&to, "%s/dev/%s", dest, d);
610
611                 if (!from || !to) {
612                         log_oom();
613
614                         if (r == 0)
615                                 r = -ENOMEM;
616
617                         break;
618                 }
619
620                 if (stat(from, &st) < 0) {
621
622                         if (errno != ENOENT) {
623                                 log_error("Failed to stat %s: %m", from);
624                                 if (r == 0)
625                                         r = -errno;
626                         }
627
628                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
629
630                         log_error("%s is not a char or block device, cannot copy", from);
631                         if (r == 0)
632                                 r = -EIO;
633
634                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
635
636                         log_error("mknod(%s) failed: %m", dest);
637                         if (r == 0)
638                                 r = -errno;
639                 }
640         }
641
642         return r;
643 }
644
645 static int setup_ptmx(const char *dest) {
646         _cleanup_free_ char *p = NULL;
647
648         p = strappend(dest, "/dev/ptmx");
649         if (!p)
650                 return log_oom();
651
652         if (symlink("pts/ptmx", p) < 0) {
653                 log_error("Failed to create /dev/ptmx symlink: %m");
654                 return -errno;
655         }
656
657         return 0;
658 }
659
660 static int setup_dev_console(const char *dest, const char *console) {
661         struct stat st;
662         _cleanup_free_ char *to = NULL;
663         int r;
664         _cleanup_umask_ mode_t u;
665
666         assert(dest);
667         assert(console);
668
669         u = umask(0000);
670
671         if (stat(console, &st) < 0) {
672                 log_error("Failed to stat %s: %m", console);
673                 return -errno;
674
675         } else if (!S_ISCHR(st.st_mode)) {
676                 log_error("/dev/console is not a char device");
677                 return -EIO;
678         }
679
680         r = chmod_and_chown(console, 0600, 0, 0);
681         if (r < 0) {
682                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
683                 return r;
684         }
685
686         if (asprintf(&to, "%s/dev/console", dest) < 0)
687                 return log_oom();
688
689         /* We need to bind mount the right tty to /dev/console since
690          * ptys can only exist on pts file systems. To have something
691          * to bind mount things on we create a device node first, that
692          * has the right major/minor (note that the major minor
693          * doesn't actually matter here, since we mount it over
694          * anyway). */
695
696         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
697                 log_error("mknod() for /dev/console failed: %m");
698                 return -errno;
699         }
700
701         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
702                 log_error("Bind mount for /dev/console failed: %m");
703                 return -errno;
704         }
705
706         return 0;
707 }
708
709 static int setup_kmsg(const char *dest, int kmsg_socket) {
710         _cleanup_free_ char *from = NULL, *to = NULL;
711         int r, fd, k;
712         _cleanup_umask_ mode_t u;
713         union {
714                 struct cmsghdr cmsghdr;
715                 uint8_t buf[CMSG_SPACE(sizeof(int))];
716         } control = {};
717         struct msghdr mh = {
718                 .msg_control = &control,
719                 .msg_controllen = sizeof(control),
720         };
721         struct cmsghdr *cmsg;
722
723         assert(dest);
724         assert(kmsg_socket >= 0);
725
726         u = umask(0000);
727
728         /* We create the kmsg FIFO as /dev/kmsg, but immediately
729          * delete it after bind mounting it to /proc/kmsg. While FIFOs
730          * on the reading side behave very similar to /proc/kmsg,
731          * their writing side behaves differently from /dev/kmsg in
732          * that writing blocks when nothing is reading. In order to
733          * avoid any problems with containers deadlocking due to this
734          * we simply make /dev/kmsg unavailable to the container. */
735         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
736             asprintf(&to, "%s/proc/kmsg", dest) < 0)
737                 return log_oom();
738
739         if (mkfifo(from, 0600) < 0) {
740                 log_error("mkfifo() for /dev/kmsg failed: %m");
741                 return -errno;
742         }
743
744         r = chmod_and_chown(from, 0600, 0, 0);
745         if (r < 0) {
746                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
747                 return r;
748         }
749
750         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
751                 log_error("Bind mount for /proc/kmsg failed: %m");
752                 return -errno;
753         }
754
755         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
756         if (fd < 0) {
757                 log_error("Failed to open fifo: %m");
758                 return -errno;
759         }
760
761         cmsg = CMSG_FIRSTHDR(&mh);
762         cmsg->cmsg_level = SOL_SOCKET;
763         cmsg->cmsg_type = SCM_RIGHTS;
764         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
765         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
766
767         mh.msg_controllen = cmsg->cmsg_len;
768
769         /* Store away the fd in the socket, so that it stays open as
770          * long as we run the child */
771         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
772         close_nointr_nofail(fd);
773
774         if (k < 0) {
775                 log_error("Failed to send FIFO fd: %m");
776                 return -errno;
777         }
778
779         /* And now make the FIFO unavailable as /dev/kmsg... */
780         unlink(from);
781         return 0;
782 }
783
784 static int setup_hostname(void) {
785
786         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
787                 return -errno;
788
789         return 0;
790 }
791
792 static int setup_journal(const char *directory) {
793         sd_id128_t machine_id;
794         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
795         char *id;
796         int r;
797
798         if (arg_link_journal == LINK_NO)
799                 return 0;
800
801         p = strappend(directory, "/etc/machine-id");
802         if (!p)
803                 return log_oom();
804
805         r = read_one_line_file(p, &b);
806         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
807                 return 0;
808         else if (r < 0) {
809                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
810                 return r;
811         }
812
813         id = strstrip(b);
814         if (isempty(id) && arg_link_journal == LINK_AUTO)
815                 return 0;
816
817         /* Verify validity */
818         r = sd_id128_from_string(id, &machine_id);
819         if (r < 0) {
820                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
821                 return r;
822         }
823
824         free(p);
825         p = strappend("/var/log/journal/", id);
826         q = strjoin(directory, "/var/log/journal/", id, NULL);
827         if (!p || !q)
828                 return log_oom();
829
830         if (path_is_mount_point(p, false) > 0) {
831                 if (arg_link_journal != LINK_AUTO) {
832                         log_error("%s: already a mount point, refusing to use for journal", p);
833                         return -EEXIST;
834                 }
835
836                 return 0;
837         }
838
839         if (path_is_mount_point(q, false) > 0) {
840                 if (arg_link_journal != LINK_AUTO) {
841                         log_error("%s: already a mount point, refusing to use for journal", q);
842                         return -EEXIST;
843                 }
844
845                 return 0;
846         }
847
848         r = readlink_and_make_absolute(p, &d);
849         if (r >= 0) {
850                 if ((arg_link_journal == LINK_GUEST ||
851                      arg_link_journal == LINK_AUTO) &&
852                     path_equal(d, q)) {
853
854                         r = mkdir_p(q, 0755);
855                         if (r < 0)
856                                 log_warning("failed to create directory %s: %m", q);
857                         return 0;
858                 }
859
860                 if (unlink(p) < 0) {
861                         log_error("Failed to remove symlink %s: %m", p);
862                         return -errno;
863                 }
864         } else if (r == -EINVAL) {
865
866                 if (arg_link_journal == LINK_GUEST &&
867                     rmdir(p) < 0) {
868
869                         if (errno == ENOTDIR) {
870                                 log_error("%s already exists and is neither a symlink nor a directory", p);
871                                 return r;
872                         } else {
873                                 log_error("Failed to remove %s: %m", p);
874                                 return -errno;
875                         }
876                 }
877         } else if (r != -ENOENT) {
878                 log_error("readlink(%s) failed: %m", p);
879                 return r;
880         }
881
882         if (arg_link_journal == LINK_GUEST) {
883
884                 if (symlink(q, p) < 0) {
885                         log_error("Failed to symlink %s to %s: %m", q, p);
886                         return -errno;
887                 }
888
889                 r = mkdir_p(q, 0755);
890                 if (r < 0)
891                         log_warning("failed to create directory %s: %m", q);
892                 return 0;
893         }
894
895         if (arg_link_journal == LINK_HOST) {
896                 r = mkdir_p(p, 0755);
897                 if (r < 0) {
898                         log_error("Failed to create %s: %m", p);
899                         return r;
900                 }
901
902         } else if (access(p, F_OK) < 0)
903                 return 0;
904
905         if (dir_is_empty(q) == 0) {
906                 log_error("%s not empty.", q);
907                 return -ENOTEMPTY;
908         }
909
910         r = mkdir_p(q, 0755);
911         if (r < 0) {
912                 log_error("Failed to create %s: %m", q);
913                 return r;
914         }
915
916         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
917                 log_error("Failed to bind mount journal from host into guest: %m");
918                 return -errno;
919         }
920
921         return 0;
922 }
923
924 static int drop_capabilities(void) {
925         return capability_bounding_set_drop(~arg_retain, false);
926 }
927
928 static int register_machine(void) {
929         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
930         _cleanup_bus_unref_ sd_bus *bus = NULL;
931         int r;
932
933         r = sd_bus_open_system(&bus);
934         if (r < 0) {
935                 log_error("Failed to open system bus: %s", strerror(-r));
936                 return r;
937         }
938
939         r = sd_bus_call_method(
940                         bus,
941                         "org.freedesktop.machine1",
942                         "/org/freedesktop/machine1",
943                         "org.freedesktop.machine1.Manager",
944                         "CreateMachine",
945                         &error,
946                         NULL,
947                         "sayssusa(sv)",
948                         arg_machine,
949                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
950                         "nspawn",
951                         "container",
952                         (uint32_t) 0,
953                         strempty(arg_directory),
954                         !isempty(arg_slice), "Slice", "s", arg_slice);
955         if (r < 0) {
956                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
957                 return r;
958         }
959
960         return 0;
961 }
962
963 static int terminate_machine(pid_t pid) {
964         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
965         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
966         _cleanup_bus_unref_ sd_bus *bus = NULL;
967         const char *path;
968         int r;
969
970         r = sd_bus_open_system(&bus);
971         if (r < 0) {
972                 log_error("Failed to open system bus: %s", strerror(-r));
973                 return r;
974         }
975
976         r = sd_bus_call_method(
977                         bus,
978                         "org.freedesktop.machine1",
979                         "/org/freedesktop/machine1",
980                         "org.freedesktop.machine1.Manager",
981                         "GetMachineByPID",
982                         &error,
983                         &reply,
984                         "u",
985                         (uint32_t) pid);
986         if (r < 0) {
987                 /* Note that the machine might already have been
988                  * cleaned up automatically, hence don't consider it a
989                  * failure if we cannot get the machine object. */
990                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
991                 return 0;
992         }
993
994         r = sd_bus_message_read(reply, "o", &path);
995         if (r < 0) {
996                 log_error("Failed to parse GetMachineByPID() reply: %s", bus_error_message(&error, r));
997                 return r;
998         }
999
1000         r = sd_bus_call_method(
1001                         bus,
1002                         "org.freedesktop.machine1",
1003                         path,
1004                         "org.freedesktop.machine1.Machine",
1005                         "Terminate",
1006                         &error,
1007                         NULL,
1008                         NULL);
1009         if (r < 0) {
1010                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1011                 return 0;
1012         }
1013
1014         return 0;
1015 }
1016
1017 static bool audit_enabled(void) {
1018         int fd;
1019
1020         fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
1021         if (fd >= 0) {
1022                 close_nointr_nofail(fd);
1023                 return true;
1024         }
1025         return false;
1026 }
1027
1028 int main(int argc, char *argv[]) {
1029         pid_t pid = 0;
1030         int r = EXIT_FAILURE, k;
1031         _cleanup_close_ int master = -1;
1032         int n_fd_passed;
1033         const char *console = NULL;
1034         sigset_t mask;
1035         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1036         _cleanup_fdset_free_ FDSet *fds = NULL;
1037
1038         log_parse_environment();
1039         log_open();
1040
1041         k = parse_argv(argc, argv);
1042         if (k < 0)
1043                 goto finish;
1044         else if (k == 0) {
1045                 r = EXIT_SUCCESS;
1046                 goto finish;
1047         }
1048
1049         if (arg_directory) {
1050                 char *p;
1051
1052                 p = path_make_absolute_cwd(arg_directory);
1053                 free(arg_directory);
1054                 arg_directory = p;
1055         } else
1056                 arg_directory = get_current_dir_name();
1057
1058         if (!arg_directory) {
1059                 log_error("Failed to determine path, please use -D.");
1060                 goto finish;
1061         }
1062
1063         path_kill_slashes(arg_directory);
1064
1065         if (!arg_machine) {
1066                 arg_machine = strdup(path_get_file_name(arg_directory));
1067                 if (!arg_machine) {
1068                         log_oom();
1069                         goto finish;
1070                 }
1071
1072                 hostname_cleanup(arg_machine, false);
1073                 if (isempty(arg_machine)) {
1074                         log_error("Failed to determine machine name automatically, please use -M.");
1075                         goto finish;
1076                 }
1077         }
1078
1079         if (geteuid() != 0) {
1080                 log_error("Need to be root.");
1081                 goto finish;
1082         }
1083
1084         if (sd_booted() <= 0) {
1085                 log_error("Not running on a systemd system.");
1086                 goto finish;
1087         }
1088
1089         if (arg_boot && audit_enabled()) {
1090                 log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
1091                             "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
1092                             "line before using systemd-nspawn. Sleeping for 5s...\n");
1093                 sleep(5);
1094         }
1095
1096         if (path_equal(arg_directory, "/")) {
1097                 log_error("Spawning container on root directory not supported.");
1098                 goto finish;
1099         }
1100
1101         if (path_is_os_tree(arg_directory) <= 0) {
1102                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1103                 goto finish;
1104         }
1105
1106         log_close();
1107         n_fd_passed = sd_listen_fds(false);
1108         if (n_fd_passed > 0) {
1109                 k = fdset_new_listen_fds(&fds, false);
1110                 if (k < 0) {
1111                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1112                         goto finish;
1113                 }
1114         }
1115         fdset_close_others(fds);
1116         log_open();
1117
1118         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1119         if (master < 0) {
1120                 log_error("Failed to acquire pseudo tty: %m");
1121                 goto finish;
1122         }
1123
1124         console = ptsname(master);
1125         if (!console) {
1126                 log_error("Failed to determine tty name: %m");
1127                 goto finish;
1128         }
1129
1130         log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1131
1132         if (unlockpt(master) < 0) {
1133                 log_error("Failed to unlock tty: %m");
1134                 goto finish;
1135         }
1136
1137         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1138                 log_error("Failed to create kmsg socket pair.");
1139                 goto finish;
1140         }
1141
1142         sd_notify(0, "READY=1");
1143
1144         assert_se(sigemptyset(&mask) == 0);
1145         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1146         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1147
1148         for (;;) {
1149                 siginfo_t status;
1150
1151                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1152                 if (pid < 0) {
1153                         if (errno == EINVAL)
1154                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1155                         else
1156                                 log_error("clone() failed: %m");
1157
1158                         goto finish;
1159                 }
1160
1161                 if (pid == 0) {
1162                         /* child */
1163                         const char *home = NULL;
1164                         uid_t uid = (uid_t) -1;
1165                         gid_t gid = (gid_t) -1;
1166                         unsigned n_env = 2;
1167                         const char *envp[] = {
1168                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1169                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1170                                 NULL, /* TERM */
1171                                 NULL, /* HOME */
1172                                 NULL, /* USER */
1173                                 NULL, /* LOGNAME */
1174                                 NULL, /* container_uuid */
1175                                 NULL, /* LISTEN_FDS */
1176                                 NULL, /* LISTEN_PID */
1177                                 NULL
1178                         };
1179
1180                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1181                         if (envp[n_env])
1182                                 n_env ++;
1183
1184                         close_nointr_nofail(master);
1185                         master = -1;
1186
1187                         close_nointr(STDIN_FILENO);
1188                         close_nointr(STDOUT_FILENO);
1189                         close_nointr(STDERR_FILENO);
1190
1191                         close_nointr_nofail(kmsg_socket_pair[0]);
1192                         kmsg_socket_pair[0] = -1;
1193
1194                         reset_all_signal_handlers();
1195
1196                         assert_se(sigemptyset(&mask) == 0);
1197                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1198
1199                         k = open_terminal(console, O_RDWR);
1200                         if (k != STDIN_FILENO) {
1201                                 if (k >= 0) {
1202                                         close_nointr_nofail(k);
1203                                         k = -EINVAL;
1204                                 }
1205
1206                                 log_error("Failed to open console: %s", strerror(-k));
1207                                 goto child_fail;
1208                         }
1209
1210                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1211                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1212                                 log_error("Failed to duplicate console: %m");
1213                                 goto child_fail;
1214                         }
1215
1216                         if (setsid() < 0) {
1217                                 log_error("setsid() failed: %m");
1218                                 goto child_fail;
1219                         }
1220
1221                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1222                                 log_error("PR_SET_PDEATHSIG failed: %m");
1223                                 goto child_fail;
1224                         }
1225
1226                         r = register_machine();
1227                         if (r < 0)
1228                                 goto finish;
1229
1230                         /* Mark everything as slave, so that we still
1231                          * receive mounts from the real root, but don't
1232                          * propagate mounts to the real root. */
1233                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1234                                 log_error("MS_SLAVE|MS_REC failed: %m");
1235                                 goto child_fail;
1236                         }
1237
1238                         /* Turn directory into bind mount */
1239                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1240                                 log_error("Failed to make bind mount.");
1241                                 goto child_fail;
1242                         }
1243
1244                         if (arg_read_only)
1245                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1246                                         log_error("Failed to make read-only.");
1247                                         goto child_fail;
1248                                 }
1249
1250                         if (mount_all(arg_directory) < 0)
1251                                 goto child_fail;
1252
1253                         if (copy_devnodes(arg_directory) < 0)
1254                                 goto child_fail;
1255
1256                         if (setup_ptmx(arg_directory) < 0)
1257                                 goto child_fail;
1258
1259                         dev_setup(arg_directory);
1260
1261                         if (setup_dev_console(arg_directory, console) < 0)
1262                                 goto child_fail;
1263
1264                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1265                                 goto child_fail;
1266
1267                         close_nointr_nofail(kmsg_socket_pair[1]);
1268                         kmsg_socket_pair[1] = -1;
1269
1270                         if (setup_boot_id(arg_directory) < 0)
1271                                 goto child_fail;
1272
1273                         if (setup_timezone(arg_directory) < 0)
1274                                 goto child_fail;
1275
1276                         if (setup_resolv_conf(arg_directory) < 0)
1277                                 goto child_fail;
1278
1279                         if (setup_journal(arg_directory) < 0)
1280                                 goto child_fail;
1281
1282                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1283                                 goto child_fail;
1284
1285                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1286                                 goto child_fail;
1287
1288                         if (chdir(arg_directory) < 0) {
1289                                 log_error("chdir(%s) failed: %m", arg_directory);
1290                                 goto child_fail;
1291                         }
1292
1293                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1294                                 log_error("mount(MS_MOVE) failed: %m");
1295                                 goto child_fail;
1296                         }
1297
1298                         if (chroot(".") < 0) {
1299                                 log_error("chroot() failed: %m");
1300                                 goto child_fail;
1301                         }
1302
1303                         if (chdir("/") < 0) {
1304                                 log_error("chdir() failed: %m");
1305                                 goto child_fail;
1306                         }
1307
1308                         umask(0022);
1309
1310                         loopback_setup();
1311
1312                         if (drop_capabilities() < 0) {
1313                                 log_error("drop_capabilities() failed: %m");
1314                                 goto child_fail;
1315                         }
1316
1317                         if (arg_user) {
1318
1319                                 /* Note that this resolves user names
1320                                  * inside the container, and hence
1321                                  * accesses the NSS modules from the
1322                                  * container and not the host. This is
1323                                  * a bit weird... */
1324
1325                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1326                                         log_error("get_user_creds() failed: %m");
1327                                         goto child_fail;
1328                                 }
1329
1330                                 if (mkdir_parents_label(home, 0775) < 0) {
1331                                         log_error("mkdir_parents_label() failed: %m");
1332                                         goto child_fail;
1333                                 }
1334
1335                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1336                                         log_error("mkdir_safe_label() failed: %m");
1337                                         goto child_fail;
1338                                 }
1339
1340                                 if (initgroups((const char*)arg_user, gid) < 0) {
1341                                         log_error("initgroups() failed: %m");
1342                                         goto child_fail;
1343                                 }
1344
1345                                 if (setresgid(gid, gid, gid) < 0) {
1346                                         log_error("setregid() failed: %m");
1347                                         goto child_fail;
1348                                 }
1349
1350                                 if (setresuid(uid, uid, uid) < 0) {
1351                                         log_error("setreuid() failed: %m");
1352                                         goto child_fail;
1353                                 }
1354                         } else {
1355                                 /* Reset everything fully to 0, just in case */
1356
1357                                 if (setgroups(0, NULL) < 0) {
1358                                         log_error("setgroups() failed: %m");
1359                                         goto child_fail;
1360                                 }
1361
1362                                 if (setresgid(0, 0, 0) < 0) {
1363                                         log_error("setregid() failed: %m");
1364                                         goto child_fail;
1365                                 }
1366
1367                                 if (setresuid(0, 0, 0) < 0) {
1368                                         log_error("setreuid() failed: %m");
1369                                         goto child_fail;
1370                                 }
1371                         }
1372
1373                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1374                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1375                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1376                                 log_oom();
1377                                 goto child_fail;
1378                         }
1379
1380                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1381                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1382                                         log_oom();
1383                                         goto child_fail;
1384                                 }
1385                         }
1386
1387                         if (fdset_size(fds) > 0) {
1388                                 k = fdset_cloexec(fds, false);
1389                                 if (k < 0) {
1390                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1391                                         goto child_fail;
1392                                 }
1393
1394                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1395                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1396                                         log_oom();
1397                                         goto child_fail;
1398                                 }
1399                         }
1400
1401                         setup_hostname();
1402
1403                         if (arg_boot) {
1404                                 char **a;
1405                                 size_t l;
1406
1407                                 /* Automatically search for the init system */
1408
1409                                 l = 1 + argc - optind;
1410                                 a = newa(char*, l + 1);
1411                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1412
1413                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1414                                 execve(a[0], a, (char**) envp);
1415
1416                                 a[0] = (char*) "/lib/systemd/systemd";
1417                                 execve(a[0], a, (char**) envp);
1418
1419                                 a[0] = (char*) "/sbin/init";
1420                                 execve(a[0], a, (char**) envp);
1421                         } else if (argc > optind)
1422                                 execvpe(argv[optind], argv + optind, (char**) envp);
1423                         else {
1424                                 chdir(home ? home : "/root");
1425                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1426                         }
1427
1428                         log_error("execv() failed: %m");
1429
1430                 child_fail:
1431                         _exit(EXIT_FAILURE);
1432                 }
1433
1434                 fdset_free(fds);
1435                 fds = NULL;
1436
1437                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1438                 if (k < 0) {
1439                         r = EXIT_FAILURE;
1440                         break;
1441                 }
1442
1443                 putc('\n', stdout);
1444
1445                 /* Kill if it is not dead yet anyway */
1446                 terminate_machine(pid);
1447
1448                 /* Redundant, but better safe than sorry */
1449                 kill(pid, SIGKILL);
1450
1451                 k = wait_for_terminate(pid, &status);
1452                 pid = 0;
1453
1454                 if (k < 0) {
1455                         r = EXIT_FAILURE;
1456                         break;
1457                 }
1458
1459                 if (status.si_code == CLD_EXITED) {
1460                         r = status.si_status;
1461                         if (status.si_status != 0) {
1462                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
1463                                 break;
1464                         }
1465
1466                         log_debug("Container %s exited successfully.", arg_machine);
1467                         break;
1468                 } else if (status.si_code == CLD_KILLED &&
1469                            status.si_status == SIGINT) {
1470                         log_info("Container %s has been shut down.", arg_machine);
1471                         r = 0;
1472                         break;
1473                 } else if (status.si_code == CLD_KILLED &&
1474                            status.si_status == SIGHUP) {
1475                         log_info("Container %s is being rebooted.", arg_machine);
1476                         continue;
1477                 } else if (status.si_code == CLD_KILLED ||
1478                            status.si_code == CLD_DUMPED) {
1479
1480                         log_error("Container %s terminated by signal %s.", arg_machine,  signal_to_string(status.si_status));
1481                         r = EXIT_FAILURE;
1482                         break;
1483                 } else {
1484                         log_error("Container %s failed due to unknown reason.", arg_machine);
1485                         r = EXIT_FAILURE;
1486                         break;
1487                 }
1488         }
1489
1490 finish:
1491         if (pid > 0)
1492                 kill(pid, SIGKILL);
1493
1494         free(arg_directory);
1495         free(arg_machine);
1496
1497         return r;
1498 }