chiark / gitweb /
bus: append unique and well known names to all messages unconditionally
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43
44 #include "sd-daemon.h"
45 #include "sd-bus.h"
46 #include "sd-id128.h"
47 #include "log.h"
48 #include "util.h"
49 #include "mkdir.h"
50 #include "macro.h"
51 #include "audit.h"
52 #include "missing.h"
53 #include "cgroup-util.h"
54 #include "strv.h"
55 #include "path-util.h"
56 #include "loopback-setup.h"
57 #include "dev-setup.h"
58 #include "fdset.h"
59 #include "build.h"
60 #include "fileio.h"
61 #include "bus-util.h"
62 #include "bus-error.h"
63 #include "ptyfwd.h"
64
65 #ifndef TTY_GID
66 #define TTY_GID 5
67 #endif
68
69 typedef enum LinkJournal {
70         LINK_NO,
71         LINK_AUTO,
72         LINK_HOST,
73         LINK_GUEST
74 } LinkJournal;
75
76 static char *arg_directory = NULL;
77 static char *arg_user = NULL;
78 static sd_id128_t arg_uuid = {};
79 static char *arg_machine = NULL;
80 static const char *arg_slice = NULL;
81 static bool arg_private_network = false;
82 static bool arg_read_only = false;
83 static bool arg_boot = false;
84 static LinkJournal arg_link_journal = LINK_AUTO;
85 static uint64_t arg_retain =
86         (1ULL << CAP_CHOWN) |
87         (1ULL << CAP_DAC_OVERRIDE) |
88         (1ULL << CAP_DAC_READ_SEARCH) |
89         (1ULL << CAP_FOWNER) |
90         (1ULL << CAP_FSETID) |
91         (1ULL << CAP_IPC_OWNER) |
92         (1ULL << CAP_KILL) |
93         (1ULL << CAP_LEASE) |
94         (1ULL << CAP_LINUX_IMMUTABLE) |
95         (1ULL << CAP_NET_BIND_SERVICE) |
96         (1ULL << CAP_NET_BROADCAST) |
97         (1ULL << CAP_NET_RAW) |
98         (1ULL << CAP_SETGID) |
99         (1ULL << CAP_SETFCAP) |
100         (1ULL << CAP_SETPCAP) |
101         (1ULL << CAP_SETUID) |
102         (1ULL << CAP_SYS_ADMIN) |
103         (1ULL << CAP_SYS_CHROOT) |
104         (1ULL << CAP_SYS_NICE) |
105         (1ULL << CAP_SYS_PTRACE) |
106         (1ULL << CAP_SYS_TTY_CONFIG) |
107         (1ULL << CAP_SYS_RESOURCE) |
108         (1ULL << CAP_SYS_BOOT) |
109         (1ULL << CAP_AUDIT_WRITE) |
110         (1ULL << CAP_AUDIT_CONTROL);
111 static char **arg_bind = NULL;
112 static char **arg_bind_ro = NULL;
113
114 static int help(void) {
115
116         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
117                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
118                "  -h --help                Show this help\n"
119                "     --version             Print version string\n"
120                "  -D --directory=NAME      Root directory for the container\n"
121                "  -b --boot                Boot up full system (i.e. invoke init)\n"
122                "  -u --user=USER           Run the command under specified user or uid\n"
123                "     --uuid=UUID           Set a specific machine UUID for the container\n"
124                "  -M --machine=NAME        Set the machine name for the container\n"
125                "  -S --slice=SLICE         Place the container in the specified slice\n"
126                "     --private-network     Disable network in container\n"
127                "     --read-only           Mount the root directory read-only\n"
128                "     --capability=CAP      In addition to the default, retain specified\n"
129                "                           capability\n"
130                "     --drop-capability=CAP Drop the specified capability from the default set\n"
131                "     --link-journal=MODE   Link up guest journal, one of no, auto, guest, host\n"
132                "  -j                       Equivalent to --link-journal=host\n"
133                "     --bind=PATH[:PATH]    Bind mount a file or directory from the host into\n"
134                "                           the container\n"
135                "     --bind-ro=PATH[:PATH] Similar, but creates a read-only bind mount\n",
136                program_invocation_short_name);
137
138         return 0;
139 }
140
141 static int parse_argv(int argc, char *argv[]) {
142
143         enum {
144                 ARG_VERSION = 0x100,
145                 ARG_PRIVATE_NETWORK,
146                 ARG_UUID,
147                 ARG_READ_ONLY,
148                 ARG_CAPABILITY,
149                 ARG_DROP_CAPABILITY,
150                 ARG_LINK_JOURNAL,
151                 ARG_BIND,
152                 ARG_BIND_RO
153         };
154
155         static const struct option options[] = {
156                 { "help",            no_argument,       NULL, 'h'                 },
157                 { "version",         no_argument,       NULL, ARG_VERSION         },
158                 { "directory",       required_argument, NULL, 'D'                 },
159                 { "user",            required_argument, NULL, 'u'                 },
160                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
161                 { "boot",            no_argument,       NULL, 'b'                 },
162                 { "uuid",            required_argument, NULL, ARG_UUID            },
163                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
164                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
165                 { "drop-capability", required_argument, NULL, ARG_DROP_CAPABILITY },
166                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
167                 { "bind",            required_argument, NULL, ARG_BIND            },
168                 { "bind-ro",         required_argument, NULL, ARG_BIND_RO         },
169                 { "machine",         required_argument, NULL, 'M'                 },
170                 { "slice",           required_argument, NULL, 'S'                 },
171                 {}
172         };
173
174         int c, r;
175
176         assert(argc >= 0);
177         assert(argv);
178
179         while ((c = getopt_long(argc, argv, "+hD:u:bM:jS:", options, NULL)) >= 0) {
180
181                 switch (c) {
182
183                 case 'h':
184                         return help();
185
186                 case ARG_VERSION:
187                         puts(PACKAGE_STRING);
188                         puts(SYSTEMD_FEATURES);
189                         return 0;
190
191                 case 'D':
192                         free(arg_directory);
193                         arg_directory = canonicalize_file_name(optarg);
194                         if (!arg_directory) {
195                                 log_error("Invalid root directory: %m");
196                                 return -ENOMEM;
197                         }
198
199                         break;
200
201                 case 'u':
202                         free(arg_user);
203                         arg_user = strdup(optarg);
204                         if (!arg_user)
205                                 return log_oom();
206
207                         break;
208
209                 case ARG_PRIVATE_NETWORK:
210                         arg_private_network = true;
211                         break;
212
213                 case 'b':
214                         arg_boot = true;
215                         break;
216
217                 case ARG_UUID:
218                         r = sd_id128_from_string(optarg, &arg_uuid);
219                         if (r < 0) {
220                                 log_error("Invalid UUID: %s", optarg);
221                                 return r;
222                         }
223                         break;
224
225                 case 'S':
226                         arg_slice = strdup(optarg);
227                         if (!arg_slice)
228                                 return log_oom();
229
230                         break;
231
232                 case 'M':
233                         if (!hostname_is_valid(optarg)) {
234                                 log_error("Invalid machine name: %s", optarg);
235                                 return -EINVAL;
236                         }
237
238                         free(arg_machine);
239                         arg_machine = strdup(optarg);
240                         if (!arg_machine)
241                                 return log_oom();
242
243                         break;
244
245                 case ARG_READ_ONLY:
246                         arg_read_only = true;
247                         break;
248
249                 case ARG_CAPABILITY:
250                 case ARG_DROP_CAPABILITY: {
251                         char *state, *word;
252                         size_t length;
253
254                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
255                                 cap_value_t cap;
256                                 char *t;
257
258                                 t = strndup(word, length);
259                                 if (!t)
260                                         return log_oom();
261
262                                 if (cap_from_name(t, &cap) < 0) {
263                                         log_error("Failed to parse capability %s.", t);
264                                         free(t);
265                                         return -EINVAL;
266                                 }
267
268                                 free(t);
269
270                                 if (c == ARG_CAPABILITY)
271                                         arg_retain |= 1ULL << (uint64_t) cap;
272                                 else
273                                         arg_retain &= ~(1ULL << (uint64_t) cap);
274                         }
275
276                         break;
277                 }
278
279                 case 'j':
280                         arg_link_journal = LINK_GUEST;
281                         break;
282
283                 case ARG_LINK_JOURNAL:
284                         if (streq(optarg, "auto"))
285                                 arg_link_journal = LINK_AUTO;
286                         else if (streq(optarg, "no"))
287                                 arg_link_journal = LINK_NO;
288                         else if (streq(optarg, "guest"))
289                                 arg_link_journal = LINK_GUEST;
290                         else if (streq(optarg, "host"))
291                                 arg_link_journal = LINK_HOST;
292                         else {
293                                 log_error("Failed to parse link journal mode %s", optarg);
294                                 return -EINVAL;
295                         }
296
297                         break;
298
299                 case ARG_BIND:
300                 case ARG_BIND_RO: {
301                         _cleanup_free_ char *a = NULL, *b = NULL;
302                         char *e;
303                         char ***x;
304
305                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
306
307                         e = strchr(optarg, ':');
308                         if (e) {
309                                 a = strndup(optarg, e - optarg);
310                                 b = strdup(e + 1);
311                         } else {
312                                 a = strdup(optarg);
313                                 b = strdup(optarg);
314                         }
315
316                         if (!a || !b)
317                                 return log_oom();
318
319                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
320                                 log_error("Invalid bind mount specification: %s", optarg);
321                                 return -EINVAL;
322                         }
323
324                         r = strv_extend(x, a);
325                         if (r < 0)
326                                 return log_oom();
327
328                         r = strv_extend(x, b);
329                         if (r < 0)
330                                 return log_oom();
331
332                         break;
333                 }
334
335                 case '?':
336                         return -EINVAL;
337
338                 default:
339                         assert_not_reached("Unhandled option");
340                 }
341         }
342
343         return 1;
344 }
345
346 static int mount_all(const char *dest) {
347
348         typedef struct MountPoint {
349                 const char *what;
350                 const char *where;
351                 const char *type;
352                 const char *options;
353                 unsigned long flags;
354                 bool fatal;
355         } MountPoint;
356
357         static const MountPoint mount_table[] = {
358                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
359                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
360                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
361                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
362                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
363                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
364                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
365                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
366 #ifdef HAVE_SELINUX
367                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
368                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
369 #endif
370         };
371
372         unsigned k;
373         int r = 0;
374
375         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
376                 _cleanup_free_ char *where = NULL;
377                 int t;
378
379                 where = strjoin(dest, "/", mount_table[k].where, NULL);
380                 if (!where)
381                         return log_oom();
382
383                 t = path_is_mount_point(where, true);
384                 if (t < 0) {
385                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
386
387                         if (r == 0)
388                                 r = t;
389
390                         continue;
391                 }
392
393                 /* Skip this entry if it is not a remount. */
394                 if (mount_table[k].what && t > 0)
395                         continue;
396
397                 mkdir_p(where, 0755);
398
399                 if (mount(mount_table[k].what,
400                           where,
401                           mount_table[k].type,
402                           mount_table[k].flags,
403                           mount_table[k].options) < 0 &&
404                     mount_table[k].fatal) {
405
406                         log_error("mount(%s) failed: %m", where);
407
408                         if (r == 0)
409                                 r = -errno;
410                 }
411         }
412
413         return r;
414 }
415
416 static int mount_binds(const char *dest, char **l, unsigned long flags) {
417         char **x, **y;
418
419         STRV_FOREACH_PAIR(x, y, l) {
420                 _cleanup_free_ char *where = NULL;
421                 struct stat source_st, dest_st;
422
423                 if (stat(*x, &source_st) < 0) {
424                         log_error("failed to stat %s: %m", *x);
425                         return -errno;
426                 }
427
428                 where = strjoin(dest, "/", *y, NULL);
429                 if (!where)
430                         return log_oom();
431
432                 if (stat(where, &dest_st) == 0) {
433                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
434                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
435                                                 *x, where);
436                                 return -EINVAL;
437                         }
438                 } else {
439                         /* Create the mount point, but be conservative -- refuse to create block
440                          * and char devices. */
441                         if (S_ISDIR(source_st.st_mode))
442                                 mkdir_p_label(where, 0755);
443                         else if (S_ISFIFO(source_st.st_mode))
444                                 mkfifo(where, 0644);
445                         else if (S_ISSOCK(source_st.st_mode))
446                                 mknod(where, 0644 | S_IFSOCK, 0);
447                         else if (S_ISREG(source_st.st_mode))
448                                 touch(where);
449                         else {
450                                 log_error("Refusing to create mountpoint for file: %s", *x);
451                                 return -ENOTSUP;
452                         }
453                 }
454
455                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
456                         log_error("mount(%s) failed: %m", where);
457                         return -errno;
458                 }
459
460                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
461                         log_error("mount(%s) failed: %m", where);
462                         return -errno;
463                 }
464         }
465
466         return 0;
467 }
468
469 static int setup_timezone(const char *dest) {
470         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
471         char *z, *y;
472         int r;
473
474         assert(dest);
475
476         /* Fix the timezone, if possible */
477         r = readlink_malloc("/etc/localtime", &p);
478         if (r < 0) {
479                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
480                 return 0;
481         }
482
483         z = path_startswith(p, "../usr/share/zoneinfo/");
484         if (!z)
485                 z = path_startswith(p, "/usr/share/zoneinfo/");
486         if (!z) {
487                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
488                 return 0;
489         }
490
491         where = strappend(dest, "/etc/localtime");
492         if (!where)
493                 return log_oom();
494
495         r = readlink_malloc(where, &q);
496         if (r >= 0) {
497                 y = path_startswith(q, "../usr/share/zoneinfo/");
498                 if (!y)
499                         y = path_startswith(q, "/usr/share/zoneinfo/");
500
501
502                 /* Already pointing to the right place? Then do nothing .. */
503                 if (y && streq(y, z))
504                         return 0;
505         }
506
507         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
508         if (!check)
509                 return log_oom();
510
511         if (access(check, F_OK) < 0) {
512                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
513                 return 0;
514         }
515
516         what = strappend("../usr/share/zoneinfo/", z);
517         if (!what)
518                 return log_oom();
519
520         unlink(where);
521         if (symlink(what, where) < 0) {
522                 log_error("Failed to correct timezone of container: %m");
523                 return 0;
524         }
525
526         return 0;
527 }
528
529 static int setup_resolv_conf(const char *dest) {
530         char _cleanup_free_ *where = NULL;
531
532         assert(dest);
533
534         if (arg_private_network)
535                 return 0;
536
537         /* Fix resolv.conf, if possible */
538         where = strappend(dest, "/etc/resolv.conf");
539         if (!where)
540                 return log_oom();
541
542         /* We don't really care for the results of this really. If it
543          * fails, it fails, but meh... */
544         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
545
546         return 0;
547 }
548
549 static int setup_boot_id(const char *dest) {
550         _cleanup_free_ char *from = NULL, *to = NULL;
551         sd_id128_t rnd;
552         char as_uuid[37];
553         int r;
554
555         assert(dest);
556
557         /* Generate a new randomized boot ID, so that each boot-up of
558          * the container gets a new one */
559
560         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
561         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
562         if (!from || !to)
563                 return log_oom();
564
565         r = sd_id128_randomize(&rnd);
566         if (r < 0) {
567                 log_error("Failed to generate random boot id: %s", strerror(-r));
568                 return r;
569         }
570
571         snprintf(as_uuid, sizeof(as_uuid),
572                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
573                  SD_ID128_FORMAT_VAL(rnd));
574         char_array_0(as_uuid);
575
576         r = write_string_file(from, as_uuid);
577         if (r < 0) {
578                 log_error("Failed to write boot id: %s", strerror(-r));
579                 return r;
580         }
581
582         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
583                 log_error("Failed to bind mount boot id: %m");
584                 r = -errno;
585         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
586                 log_warning("Failed to make boot id read-only: %m");
587
588         unlink(from);
589         return r;
590 }
591
592 static int copy_devnodes(const char *dest) {
593
594         static const char devnodes[] =
595                 "null\0"
596                 "zero\0"
597                 "full\0"
598                 "random\0"
599                 "urandom\0"
600                 "tty\0";
601
602         const char *d;
603         int r = 0;
604         _cleanup_umask_ mode_t u;
605
606         assert(dest);
607
608         u = umask(0000);
609
610         NULSTR_FOREACH(d, devnodes) {
611                 struct stat st;
612                 _cleanup_free_ char *from = NULL, *to = NULL;
613
614                 asprintf(&from, "/dev/%s", d);
615                 asprintf(&to, "%s/dev/%s", dest, d);
616
617                 if (!from || !to) {
618                         log_oom();
619
620                         if (r == 0)
621                                 r = -ENOMEM;
622
623                         break;
624                 }
625
626                 if (stat(from, &st) < 0) {
627
628                         if (errno != ENOENT) {
629                                 log_error("Failed to stat %s: %m", from);
630                                 if (r == 0)
631                                         r = -errno;
632                         }
633
634                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
635
636                         log_error("%s is not a char or block device, cannot copy", from);
637                         if (r == 0)
638                                 r = -EIO;
639
640                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
641
642                         log_error("mknod(%s) failed: %m", dest);
643                         if (r == 0)
644                                 r = -errno;
645                 }
646         }
647
648         return r;
649 }
650
651 static int setup_ptmx(const char *dest) {
652         _cleanup_free_ char *p = NULL;
653
654         p = strappend(dest, "/dev/ptmx");
655         if (!p)
656                 return log_oom();
657
658         if (symlink("pts/ptmx", p) < 0) {
659                 log_error("Failed to create /dev/ptmx symlink: %m");
660                 return -errno;
661         }
662
663         return 0;
664 }
665
666 static int setup_dev_console(const char *dest, const char *console) {
667         struct stat st;
668         _cleanup_free_ char *to = NULL;
669         int r;
670         _cleanup_umask_ mode_t u;
671
672         assert(dest);
673         assert(console);
674
675         u = umask(0000);
676
677         if (stat(console, &st) < 0) {
678                 log_error("Failed to stat %s: %m", console);
679                 return -errno;
680
681         } else if (!S_ISCHR(st.st_mode)) {
682                 log_error("/dev/console is not a char device");
683                 return -EIO;
684         }
685
686         r = chmod_and_chown(console, 0600, 0, 0);
687         if (r < 0) {
688                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
689                 return r;
690         }
691
692         if (asprintf(&to, "%s/dev/console", dest) < 0)
693                 return log_oom();
694
695         /* We need to bind mount the right tty to /dev/console since
696          * ptys can only exist on pts file systems. To have something
697          * to bind mount things on we create a device node first, that
698          * has the right major/minor (note that the major minor
699          * doesn't actually matter here, since we mount it over
700          * anyway). */
701
702         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
703                 log_error("mknod() for /dev/console failed: %m");
704                 return -errno;
705         }
706
707         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
708                 log_error("Bind mount for /dev/console failed: %m");
709                 return -errno;
710         }
711
712         return 0;
713 }
714
715 static int setup_kmsg(const char *dest, int kmsg_socket) {
716         _cleanup_free_ char *from = NULL, *to = NULL;
717         int r, fd, k;
718         _cleanup_umask_ mode_t u;
719         union {
720                 struct cmsghdr cmsghdr;
721                 uint8_t buf[CMSG_SPACE(sizeof(int))];
722         } control = {};
723         struct msghdr mh = {
724                 .msg_control = &control,
725                 .msg_controllen = sizeof(control),
726         };
727         struct cmsghdr *cmsg;
728
729         assert(dest);
730         assert(kmsg_socket >= 0);
731
732         u = umask(0000);
733
734         /* We create the kmsg FIFO as /dev/kmsg, but immediately
735          * delete it after bind mounting it to /proc/kmsg. While FIFOs
736          * on the reading side behave very similar to /proc/kmsg,
737          * their writing side behaves differently from /dev/kmsg in
738          * that writing blocks when nothing is reading. In order to
739          * avoid any problems with containers deadlocking due to this
740          * we simply make /dev/kmsg unavailable to the container. */
741         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
742             asprintf(&to, "%s/proc/kmsg", dest) < 0)
743                 return log_oom();
744
745         if (mkfifo(from, 0600) < 0) {
746                 log_error("mkfifo() for /dev/kmsg failed: %m");
747                 return -errno;
748         }
749
750         r = chmod_and_chown(from, 0600, 0, 0);
751         if (r < 0) {
752                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
753                 return r;
754         }
755
756         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
757                 log_error("Bind mount for /proc/kmsg failed: %m");
758                 return -errno;
759         }
760
761         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
762         if (fd < 0) {
763                 log_error("Failed to open fifo: %m");
764                 return -errno;
765         }
766
767         cmsg = CMSG_FIRSTHDR(&mh);
768         cmsg->cmsg_level = SOL_SOCKET;
769         cmsg->cmsg_type = SCM_RIGHTS;
770         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
771         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
772
773         mh.msg_controllen = cmsg->cmsg_len;
774
775         /* Store away the fd in the socket, so that it stays open as
776          * long as we run the child */
777         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
778         close_nointr_nofail(fd);
779
780         if (k < 0) {
781                 log_error("Failed to send FIFO fd: %m");
782                 return -errno;
783         }
784
785         /* And now make the FIFO unavailable as /dev/kmsg... */
786         unlink(from);
787         return 0;
788 }
789
790 static int setup_hostname(void) {
791
792         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
793                 return -errno;
794
795         return 0;
796 }
797
798 static int setup_journal(const char *directory) {
799         sd_id128_t machine_id;
800         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
801         char *id;
802         int r;
803
804         if (arg_link_journal == LINK_NO)
805                 return 0;
806
807         p = strappend(directory, "/etc/machine-id");
808         if (!p)
809                 return log_oom();
810
811         r = read_one_line_file(p, &b);
812         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
813                 return 0;
814         else if (r < 0) {
815                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
816                 return r;
817         }
818
819         id = strstrip(b);
820         if (isempty(id) && arg_link_journal == LINK_AUTO)
821                 return 0;
822
823         /* Verify validity */
824         r = sd_id128_from_string(id, &machine_id);
825         if (r < 0) {
826                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
827                 return r;
828         }
829
830         free(p);
831         p = strappend("/var/log/journal/", id);
832         q = strjoin(directory, "/var/log/journal/", id, NULL);
833         if (!p || !q)
834                 return log_oom();
835
836         if (path_is_mount_point(p, false) > 0) {
837                 if (arg_link_journal != LINK_AUTO) {
838                         log_error("%s: already a mount point, refusing to use for journal", p);
839                         return -EEXIST;
840                 }
841
842                 return 0;
843         }
844
845         if (path_is_mount_point(q, false) > 0) {
846                 if (arg_link_journal != LINK_AUTO) {
847                         log_error("%s: already a mount point, refusing to use for journal", q);
848                         return -EEXIST;
849                 }
850
851                 return 0;
852         }
853
854         r = readlink_and_make_absolute(p, &d);
855         if (r >= 0) {
856                 if ((arg_link_journal == LINK_GUEST ||
857                      arg_link_journal == LINK_AUTO) &&
858                     path_equal(d, q)) {
859
860                         r = mkdir_p(q, 0755);
861                         if (r < 0)
862                                 log_warning("failed to create directory %s: %m", q);
863                         return 0;
864                 }
865
866                 if (unlink(p) < 0) {
867                         log_error("Failed to remove symlink %s: %m", p);
868                         return -errno;
869                 }
870         } else if (r == -EINVAL) {
871
872                 if (arg_link_journal == LINK_GUEST &&
873                     rmdir(p) < 0) {
874
875                         if (errno == ENOTDIR) {
876                                 log_error("%s already exists and is neither a symlink nor a directory", p);
877                                 return r;
878                         } else {
879                                 log_error("Failed to remove %s: %m", p);
880                                 return -errno;
881                         }
882                 }
883         } else if (r != -ENOENT) {
884                 log_error("readlink(%s) failed: %m", p);
885                 return r;
886         }
887
888         if (arg_link_journal == LINK_GUEST) {
889
890                 if (symlink(q, p) < 0) {
891                         log_error("Failed to symlink %s to %s: %m", q, p);
892                         return -errno;
893                 }
894
895                 r = mkdir_p(q, 0755);
896                 if (r < 0)
897                         log_warning("failed to create directory %s: %m", q);
898                 return 0;
899         }
900
901         if (arg_link_journal == LINK_HOST) {
902                 r = mkdir_p(p, 0755);
903                 if (r < 0) {
904                         log_error("Failed to create %s: %m", p);
905                         return r;
906                 }
907
908         } else if (access(p, F_OK) < 0)
909                 return 0;
910
911         if (dir_is_empty(q) == 0) {
912                 log_error("%s not empty.", q);
913                 return -ENOTEMPTY;
914         }
915
916         r = mkdir_p(q, 0755);
917         if (r < 0) {
918                 log_error("Failed to create %s: %m", q);
919                 return r;
920         }
921
922         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
923                 log_error("Failed to bind mount journal from host into guest: %m");
924                 return -errno;
925         }
926
927         return 0;
928 }
929
930 static int drop_capabilities(void) {
931         return capability_bounding_set_drop(~arg_retain, false);
932 }
933
934 static int register_machine(void) {
935         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
936         _cleanup_bus_unref_ sd_bus *bus = NULL;
937         int r;
938
939         r = sd_bus_open_system(&bus);
940         if (r < 0) {
941                 log_error("Failed to open system bus: %s", strerror(-r));
942                 return r;
943         }
944
945         r = sd_bus_call_method(
946                         bus,
947                         "org.freedesktop.machine1",
948                         "/org/freedesktop/machine1",
949                         "org.freedesktop.machine1.Manager",
950                         "CreateMachine",
951                         &error,
952                         NULL,
953                         "sayssusa(sv)",
954                         arg_machine,
955                         SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
956                         "nspawn",
957                         "container",
958                         (uint32_t) 0,
959                         strempty(arg_directory),
960                         !isempty(arg_slice), "Slice", "s", arg_slice);
961         if (r < 0) {
962                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
963                 return r;
964         }
965
966         return 0;
967 }
968
969 static int terminate_machine(pid_t pid) {
970         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
971         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
972         _cleanup_bus_unref_ sd_bus *bus = NULL;
973         const char *path;
974         int r;
975
976         r = sd_bus_default_system(&bus);
977         if (r < 0) {
978                 log_error("Failed to open system bus: %s", strerror(-r));
979                 return r;
980         }
981
982         r = sd_bus_call_method(
983                         bus,
984                         "org.freedesktop.machine1",
985                         "/org/freedesktop/machine1",
986                         "org.freedesktop.machine1.Manager",
987                         "GetMachineByPID",
988                         &error,
989                         &reply,
990                         "u",
991                         (uint32_t) pid);
992         if (r < 0) {
993                 /* Note that the machine might already have been
994                  * cleaned up automatically, hence don't consider it a
995                  * failure if we cannot get the machine object. */
996                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
997                 return 0;
998         }
999
1000         r = sd_bus_message_read(reply, "o", &path);
1001         if (r < 0)
1002                 return bus_log_parse_error(r);
1003
1004         r = sd_bus_call_method(
1005                         bus,
1006                         "org.freedesktop.machine1",
1007                         path,
1008                         "org.freedesktop.machine1.Machine",
1009                         "Terminate",
1010                         &error,
1011                         NULL,
1012                         NULL);
1013         if (r < 0) {
1014                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1015                 return 0;
1016         }
1017
1018         return 0;
1019 }
1020
1021 static bool audit_enabled(void) {
1022         int fd;
1023
1024         fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
1025         if (fd >= 0) {
1026                 close_nointr_nofail(fd);
1027                 return true;
1028         }
1029         return false;
1030 }
1031
1032 int main(int argc, char *argv[]) {
1033         pid_t pid = 0;
1034         int r = EXIT_FAILURE, k;
1035         _cleanup_close_ int master = -1;
1036         int n_fd_passed;
1037         const char *console = NULL;
1038         sigset_t mask;
1039         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
1040         _cleanup_fdset_free_ FDSet *fds = NULL;
1041
1042         log_parse_environment();
1043         log_open();
1044
1045         k = parse_argv(argc, argv);
1046         if (k < 0)
1047                 goto finish;
1048         else if (k == 0) {
1049                 r = EXIT_SUCCESS;
1050                 goto finish;
1051         }
1052
1053         if (arg_directory) {
1054                 char *p;
1055
1056                 p = path_make_absolute_cwd(arg_directory);
1057                 free(arg_directory);
1058                 arg_directory = p;
1059         } else
1060                 arg_directory = get_current_dir_name();
1061
1062         if (!arg_directory) {
1063                 log_error("Failed to determine path, please use -D.");
1064                 goto finish;
1065         }
1066
1067         path_kill_slashes(arg_directory);
1068
1069         if (!arg_machine) {
1070                 arg_machine = strdup(path_get_file_name(arg_directory));
1071                 if (!arg_machine) {
1072                         log_oom();
1073                         goto finish;
1074                 }
1075
1076                 hostname_cleanup(arg_machine, false);
1077                 if (isempty(arg_machine)) {
1078                         log_error("Failed to determine machine name automatically, please use -M.");
1079                         goto finish;
1080                 }
1081         }
1082
1083         if (geteuid() != 0) {
1084                 log_error("Need to be root.");
1085                 goto finish;
1086         }
1087
1088         if (sd_booted() <= 0) {
1089                 log_error("Not running on a systemd system.");
1090                 goto finish;
1091         }
1092
1093         if (arg_boot && audit_enabled()) {
1094                 log_warning("The kernel auditing subsystem is known to be incompatible with containers.\n"
1095                             "Please make sure to turn off auditing with 'audit=0' on the kernel command\n"
1096                             "line before using systemd-nspawn. Sleeping for 5s...\n");
1097                 sleep(5);
1098         }
1099
1100         if (path_equal(arg_directory, "/")) {
1101                 log_error("Spawning container on root directory not supported.");
1102                 goto finish;
1103         }
1104
1105         if (path_is_os_tree(arg_directory) <= 0) {
1106                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
1107                 goto finish;
1108         }
1109
1110         log_close();
1111         n_fd_passed = sd_listen_fds(false);
1112         if (n_fd_passed > 0) {
1113                 k = fdset_new_listen_fds(&fds, false);
1114                 if (k < 0) {
1115                         log_error("Failed to collect file descriptors: %s", strerror(-k));
1116                         goto finish;
1117                 }
1118         }
1119         fdset_close_others(fds);
1120         log_open();
1121
1122         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1123         if (master < 0) {
1124                 log_error("Failed to acquire pseudo tty: %m");
1125                 goto finish;
1126         }
1127
1128         console = ptsname(master);
1129         if (!console) {
1130                 log_error("Failed to determine tty name: %m");
1131                 goto finish;
1132         }
1133
1134         log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_directory);
1135
1136         if (unlockpt(master) < 0) {
1137                 log_error("Failed to unlock tty: %m");
1138                 goto finish;
1139         }
1140
1141         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1142                 log_error("Failed to create kmsg socket pair.");
1143                 goto finish;
1144         }
1145
1146         sd_notify(0, "READY=1");
1147
1148         assert_se(sigemptyset(&mask) == 0);
1149         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1150         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1151
1152         for (;;) {
1153                 siginfo_t status;
1154
1155                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1156                 if (pid < 0) {
1157                         if (errno == EINVAL)
1158                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1159                         else
1160                                 log_error("clone() failed: %m");
1161
1162                         goto finish;
1163                 }
1164
1165                 if (pid == 0) {
1166                         /* child */
1167                         const char *home = NULL;
1168                         uid_t uid = (uid_t) -1;
1169                         gid_t gid = (gid_t) -1;
1170                         unsigned n_env = 2;
1171                         const char *envp[] = {
1172                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1173                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1174                                 NULL, /* TERM */
1175                                 NULL, /* HOME */
1176                                 NULL, /* USER */
1177                                 NULL, /* LOGNAME */
1178                                 NULL, /* container_uuid */
1179                                 NULL, /* LISTEN_FDS */
1180                                 NULL, /* LISTEN_PID */
1181                                 NULL
1182                         };
1183
1184                         envp[n_env] = strv_find_prefix(environ, "TERM=");
1185                         if (envp[n_env])
1186                                 n_env ++;
1187
1188                         close_nointr_nofail(master);
1189                         master = -1;
1190
1191                         close_nointr(STDIN_FILENO);
1192                         close_nointr(STDOUT_FILENO);
1193                         close_nointr(STDERR_FILENO);
1194
1195                         close_nointr_nofail(kmsg_socket_pair[0]);
1196                         kmsg_socket_pair[0] = -1;
1197
1198                         reset_all_signal_handlers();
1199
1200                         assert_se(sigemptyset(&mask) == 0);
1201                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1202
1203                         k = open_terminal(console, O_RDWR);
1204                         if (k != STDIN_FILENO) {
1205                                 if (k >= 0) {
1206                                         close_nointr_nofail(k);
1207                                         k = -EINVAL;
1208                                 }
1209
1210                                 log_error("Failed to open console: %s", strerror(-k));
1211                                 goto child_fail;
1212                         }
1213
1214                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1215                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
1216                                 log_error("Failed to duplicate console: %m");
1217                                 goto child_fail;
1218                         }
1219
1220                         if (setsid() < 0) {
1221                                 log_error("setsid() failed: %m");
1222                                 goto child_fail;
1223                         }
1224
1225                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1226                                 log_error("PR_SET_PDEATHSIG failed: %m");
1227                                 goto child_fail;
1228                         }
1229
1230                         r = register_machine();
1231                         if (r < 0)
1232                                 goto finish;
1233
1234                         /* Mark everything as slave, so that we still
1235                          * receive mounts from the real root, but don't
1236                          * propagate mounts to the real root. */
1237                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1238                                 log_error("MS_SLAVE|MS_REC failed: %m");
1239                                 goto child_fail;
1240                         }
1241
1242                         /* Turn directory into bind mount */
1243                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1244                                 log_error("Failed to make bind mount.");
1245                                 goto child_fail;
1246                         }
1247
1248                         if (arg_read_only)
1249                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1250                                         log_error("Failed to make read-only.");
1251                                         goto child_fail;
1252                                 }
1253
1254                         if (mount_all(arg_directory) < 0)
1255                                 goto child_fail;
1256
1257                         if (copy_devnodes(arg_directory) < 0)
1258                                 goto child_fail;
1259
1260                         if (setup_ptmx(arg_directory) < 0)
1261                                 goto child_fail;
1262
1263                         dev_setup(arg_directory);
1264
1265                         if (setup_dev_console(arg_directory, console) < 0)
1266                                 goto child_fail;
1267
1268                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1269                                 goto child_fail;
1270
1271                         close_nointr_nofail(kmsg_socket_pair[1]);
1272                         kmsg_socket_pair[1] = -1;
1273
1274                         if (setup_boot_id(arg_directory) < 0)
1275                                 goto child_fail;
1276
1277                         if (setup_timezone(arg_directory) < 0)
1278                                 goto child_fail;
1279
1280                         if (setup_resolv_conf(arg_directory) < 0)
1281                                 goto child_fail;
1282
1283                         if (setup_journal(arg_directory) < 0)
1284                                 goto child_fail;
1285
1286                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
1287                                 goto child_fail;
1288
1289                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
1290                                 goto child_fail;
1291
1292                         if (chdir(arg_directory) < 0) {
1293                                 log_error("chdir(%s) failed: %m", arg_directory);
1294                                 goto child_fail;
1295                         }
1296
1297                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1298                                 log_error("mount(MS_MOVE) failed: %m");
1299                                 goto child_fail;
1300                         }
1301
1302                         if (chroot(".") < 0) {
1303                                 log_error("chroot() failed: %m");
1304                                 goto child_fail;
1305                         }
1306
1307                         if (chdir("/") < 0) {
1308                                 log_error("chdir() failed: %m");
1309                                 goto child_fail;
1310                         }
1311
1312                         umask(0022);
1313
1314                         loopback_setup();
1315
1316                         if (drop_capabilities() < 0) {
1317                                 log_error("drop_capabilities() failed: %m");
1318                                 goto child_fail;
1319                         }
1320
1321                         if (arg_user) {
1322
1323                                 /* Note that this resolves user names
1324                                  * inside the container, and hence
1325                                  * accesses the NSS modules from the
1326                                  * container and not the host. This is
1327                                  * a bit weird... */
1328
1329                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1330                                         log_error("get_user_creds() failed: %m");
1331                                         goto child_fail;
1332                                 }
1333
1334                                 if (mkdir_parents_label(home, 0775) < 0) {
1335                                         log_error("mkdir_parents_label() failed: %m");
1336                                         goto child_fail;
1337                                 }
1338
1339                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1340                                         log_error("mkdir_safe_label() failed: %m");
1341                                         goto child_fail;
1342                                 }
1343
1344                                 if (initgroups((const char*)arg_user, gid) < 0) {
1345                                         log_error("initgroups() failed: %m");
1346                                         goto child_fail;
1347                                 }
1348
1349                                 if (setresgid(gid, gid, gid) < 0) {
1350                                         log_error("setregid() failed: %m");
1351                                         goto child_fail;
1352                                 }
1353
1354                                 if (setresuid(uid, uid, uid) < 0) {
1355                                         log_error("setreuid() failed: %m");
1356                                         goto child_fail;
1357                                 }
1358                         } else {
1359                                 /* Reset everything fully to 0, just in case */
1360
1361                                 if (setgroups(0, NULL) < 0) {
1362                                         log_error("setgroups() failed: %m");
1363                                         goto child_fail;
1364                                 }
1365
1366                                 if (setresgid(0, 0, 0) < 0) {
1367                                         log_error("setregid() failed: %m");
1368                                         goto child_fail;
1369                                 }
1370
1371                                 if (setresuid(0, 0, 0) < 0) {
1372                                         log_error("setreuid() failed: %m");
1373                                         goto child_fail;
1374                                 }
1375                         }
1376
1377                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
1378                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1379                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1380                                 log_oom();
1381                                 goto child_fail;
1382                         }
1383
1384                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
1385                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
1386                                         log_oom();
1387                                         goto child_fail;
1388                                 }
1389                         }
1390
1391                         if (fdset_size(fds) > 0) {
1392                                 k = fdset_cloexec(fds, false);
1393                                 if (k < 0) {
1394                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
1395                                         goto child_fail;
1396                                 }
1397
1398                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
1399                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
1400                                         log_oom();
1401                                         goto child_fail;
1402                                 }
1403                         }
1404
1405                         setup_hostname();
1406
1407                         if (arg_boot) {
1408                                 char **a;
1409                                 size_t l;
1410
1411                                 /* Automatically search for the init system */
1412
1413                                 l = 1 + argc - optind;
1414                                 a = newa(char*, l + 1);
1415                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1416
1417                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1418                                 execve(a[0], a, (char**) envp);
1419
1420                                 a[0] = (char*) "/lib/systemd/systemd";
1421                                 execve(a[0], a, (char**) envp);
1422
1423                                 a[0] = (char*) "/sbin/init";
1424                                 execve(a[0], a, (char**) envp);
1425                         } else if (argc > optind)
1426                                 execvpe(argv[optind], argv + optind, (char**) envp);
1427                         else {
1428                                 chdir(home ? home : "/root");
1429                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1430                         }
1431
1432                         log_error("execv() failed: %m");
1433
1434                 child_fail:
1435                         _exit(EXIT_FAILURE);
1436                 }
1437
1438                 fdset_free(fds);
1439                 fds = NULL;
1440
1441                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
1442                 if (k < 0) {
1443                         r = EXIT_FAILURE;
1444                         break;
1445                 }
1446
1447                 putc('\n', stdout);
1448
1449                 /* Kill if it is not dead yet anyway */
1450                 terminate_machine(pid);
1451
1452                 /* Redundant, but better safe than sorry */
1453                 kill(pid, SIGKILL);
1454
1455                 k = wait_for_terminate(pid, &status);
1456                 pid = 0;
1457
1458                 if (k < 0) {
1459                         r = EXIT_FAILURE;
1460                         break;
1461                 }
1462
1463                 if (status.si_code == CLD_EXITED) {
1464                         r = status.si_status;
1465                         if (status.si_status != 0) {
1466                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
1467                                 break;
1468                         }
1469
1470                         log_debug("Container %s exited successfully.", arg_machine);
1471                         break;
1472                 } else if (status.si_code == CLD_KILLED &&
1473                            status.si_status == SIGINT) {
1474                         log_info("Container %s has been shut down.", arg_machine);
1475                         r = 0;
1476                         break;
1477                 } else if (status.si_code == CLD_KILLED &&
1478                            status.si_status == SIGHUP) {
1479                         log_info("Container %s is being rebooted.", arg_machine);
1480                         continue;
1481                 } else if (status.si_code == CLD_KILLED ||
1482                            status.si_code == CLD_DUMPED) {
1483
1484                         log_error("Container %s terminated by signal %s.", arg_machine,  signal_to_string(status.si_status));
1485                         r = EXIT_FAILURE;
1486                         break;
1487                 } else {
1488                         log_error("Container %s failed due to unknown reason.", arg_machine);
1489                         r = EXIT_FAILURE;
1490                         break;
1491                 }
1492         }
1493
1494 finish:
1495         if (pid > 0)
1496                 kill(pid, SIGKILL);
1497
1498         free(arg_directory);
1499         free(arg_machine);
1500
1501         return r;
1502 }