chiark / gitweb /
4e4c5601e74d28f63eae431822d0afd78ecb888a
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <sys/epoll.h>
37 #include <termios.h>
38 #include <sys/signalfd.h>
39 #include <grp.h>
40 #include <linux/fs.h>
41 #include <sys/un.h>
42 #include <sys/socket.h>
43
44 #include <systemd/sd-daemon.h>
45
46 #include "log.h"
47 #include "util.h"
48 #include "mkdir.h"
49 #include "macro.h"
50 #include "audit.h"
51 #include "missing.h"
52 #include "cgroup-util.h"
53 #include "strv.h"
54 #include "path-util.h"
55 #include "loopback-setup.h"
56 #include "sd-id128.h"
57 #include "dev-setup.h"
58
59 typedef enum LinkJournal {
60         LINK_NO,
61         LINK_AUTO,
62         LINK_HOST,
63         LINK_GUEST
64 } LinkJournal;
65
66 static char *arg_directory = NULL;
67 static char *arg_user = NULL;
68 static char **arg_controllers = NULL;
69 static char *arg_uuid = NULL;
70 static bool arg_private_network = false;
71 static bool arg_read_only = false;
72 static bool arg_boot = false;
73 static LinkJournal arg_link_journal = LINK_AUTO;
74 static uint64_t arg_retain =
75         (1ULL << CAP_CHOWN) |
76         (1ULL << CAP_DAC_OVERRIDE) |
77         (1ULL << CAP_DAC_READ_SEARCH) |
78         (1ULL << CAP_FOWNER) |
79         (1ULL << CAP_FSETID) |
80         (1ULL << CAP_IPC_OWNER) |
81         (1ULL << CAP_KILL) |
82         (1ULL << CAP_LEASE) |
83         (1ULL << CAP_LINUX_IMMUTABLE) |
84         (1ULL << CAP_NET_BIND_SERVICE) |
85         (1ULL << CAP_NET_BROADCAST) |
86         (1ULL << CAP_NET_RAW) |
87         (1ULL << CAP_SETGID) |
88         (1ULL << CAP_SETFCAP) |
89         (1ULL << CAP_SETPCAP) |
90         (1ULL << CAP_SETUID) |
91         (1ULL << CAP_SYS_ADMIN) |
92         (1ULL << CAP_SYS_CHROOT) |
93         (1ULL << CAP_SYS_NICE) |
94         (1ULL << CAP_SYS_PTRACE) |
95         (1ULL << CAP_SYS_TTY_CONFIG) |
96         (1ULL << CAP_SYS_RESOURCE) |
97         (1ULL << CAP_SYS_BOOT);
98
99 static int help(void) {
100
101         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
102                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
103                "  -h --help               Show this help\n"
104                "  -D --directory=NAME     Root directory for the container\n"
105                "  -b --boot               Boot up full system (i.e. invoke init)\n"
106                "  -u --user=USER          Run the command under specified user or uid\n"
107                "  -C --controllers=LIST   Put the container in specified comma-separated cgroup hierarchies\n"
108                "     --uuid=UUID          Set a specific machine UUID for the container\n"
109                "     --private-network    Disable network in container\n"
110                "     --read-only          Mount the root directory read-only\n"
111                "     --capability=CAP     In addition to the default, retain specified capability\n"
112                "     --link-journal=MODE  Link up guest journal, one of no, auto, guest, host\n"
113                "  -j                      Equivalent to --link-journal=host\n",
114                program_invocation_short_name);
115
116         return 0;
117 }
118
119 static int parse_argv(int argc, char *argv[]) {
120
121         enum {
122                 ARG_PRIVATE_NETWORK = 0x100,
123                 ARG_UUID,
124                 ARG_READ_ONLY,
125                 ARG_CAPABILITY,
126                 ARG_LINK_JOURNAL
127         };
128
129         static const struct option options[] = {
130                 { "help",            no_argument,       NULL, 'h'                 },
131                 { "directory",       required_argument, NULL, 'D'                 },
132                 { "user",            required_argument, NULL, 'u'                 },
133                 { "controllers",     required_argument, NULL, 'C'                 },
134                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
135                 { "boot",            no_argument,       NULL, 'b'                 },
136                 { "uuid",            required_argument, NULL, ARG_UUID            },
137                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
138                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
139                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
140                 { NULL,              0,                 NULL, 0                   }
141         };
142
143         int c;
144
145         assert(argc >= 0);
146         assert(argv);
147
148         while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) {
149
150                 switch (c) {
151
152                 case 'h':
153                         help();
154                         return 0;
155
156                 case 'D':
157                         free(arg_directory);
158                         arg_directory = canonicalize_file_name(optarg);
159                         if (!arg_directory) {
160                                 log_error("Failed to canonicalize root directory.");
161                                 return -ENOMEM;
162                         }
163
164                         break;
165
166                 case 'u':
167                         free(arg_user);
168                         if (!(arg_user = strdup(optarg))) {
169                                 log_error("Failed to duplicate user name.");
170                                 return -ENOMEM;
171                         }
172
173                         break;
174
175                 case 'C':
176                         strv_free(arg_controllers);
177                         arg_controllers = strv_split(optarg, ",");
178                         if (!arg_controllers) {
179                                 log_error("Failed to split controllers list.");
180                                 return -ENOMEM;
181                         }
182                         strv_uniq(arg_controllers);
183
184                         break;
185
186                 case ARG_PRIVATE_NETWORK:
187                         arg_private_network = true;
188                         break;
189
190                 case 'b':
191                         arg_boot = true;
192                         break;
193
194                 case ARG_UUID:
195                         arg_uuid = optarg;
196                         break;
197
198                 case ARG_READ_ONLY:
199                         arg_read_only = true;
200                         break;
201
202                 case ARG_CAPABILITY: {
203                         char *state, *word;
204                         size_t length;
205
206                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
207                                 cap_value_t cap;
208                                 char *t;
209
210                                 t = strndup(word, length);
211                                 if (!t)
212                                         return log_oom();
213
214                                 if (cap_from_name(t, &cap) < 0) {
215                                         log_error("Failed to parse capability %s.", t);
216                                         free(t);
217                                         return -EINVAL;
218                                 }
219
220                                 free(t);
221                                 arg_retain |= 1ULL << (uint64_t) cap;
222                         }
223
224                         break;
225                 }
226
227                 case 'j':
228                         arg_link_journal = LINK_GUEST;
229                         break;
230
231                 case ARG_LINK_JOURNAL:
232                         if (streq(optarg, "auto"))
233                                 arg_link_journal = LINK_AUTO;
234                         else if (streq(optarg, "no"))
235                                 arg_link_journal = LINK_NO;
236                         else if (streq(optarg, "guest"))
237                                 arg_link_journal = LINK_GUEST;
238                         else if (streq(optarg, "host"))
239                                 arg_link_journal = LINK_HOST;
240                         else {
241                                 log_error("Failed to parse link journal mode %s", optarg);
242                                 return -EINVAL;
243                         }
244
245                         break;
246
247                 case '?':
248                         return -EINVAL;
249
250                 default:
251                         log_error("Unknown option code %c", c);
252                         return -EINVAL;
253                 }
254         }
255
256         return 1;
257 }
258
259 static int mount_all(const char *dest) {
260
261         typedef struct MountPoint {
262                 const char *what;
263                 const char *where;
264                 const char *type;
265                 const char *options;
266                 unsigned long flags;
267                 bool fatal;
268         } MountPoint;
269
270         static const MountPoint mount_table[] = {
271                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
272                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
273                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
274                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
275                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
276                 { "/dev/pts",  "/dev/pts",  NULL,    NULL,       MS_BIND,                      true  },
277                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
278                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
279 #ifdef HAVE_SELINUX
280                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
281                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
282 #endif
283         };
284
285         unsigned k;
286         int r = 0;
287
288         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
289                 char _cleanup_free_ *where = NULL;
290                 int t;
291
292                 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
293                         log_oom();
294
295                         if (r == 0)
296                                 r = -ENOMEM;
297
298                         break;
299                 }
300
301                 t = path_is_mount_point(where, true);
302                 if (t < 0) {
303                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
304
305                         if (r == 0)
306                                 r = t;
307
308                         continue;
309                 }
310
311                 /* Skip this entry if it is not a remount. */
312                 if (mount_table[k].what && t > 0)
313                         continue;
314
315                 mkdir_p_label(where, 0755);
316
317                 if (mount(mount_table[k].what,
318                           where,
319                           mount_table[k].type,
320                           mount_table[k].flags,
321                           mount_table[k].options) < 0 &&
322                     mount_table[k].fatal) {
323
324                         log_error("mount(%s) failed: %m", where);
325
326                         if (r == 0)
327                                 r = -errno;
328                 }
329         }
330
331         return r;
332 }
333
334 static int setup_timezone(const char *dest) {
335         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
336         char *z, *y;
337         int r;
338
339         assert(dest);
340
341         /* Fix the timezone, if possible */
342         r = readlink_malloc("/etc/localtime", &p);
343         if (r < 0) {
344                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
345                 return 0;
346         }
347
348         z = path_startswith(p, "../usr/share/zoneinfo/");
349         if (!z)
350                 z = path_startswith(p, "/usr/share/zoneinfo/");
351         if (!z) {
352                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
353                 return 0;
354         }
355
356         where = strappend(dest, "/etc/localtime");
357         if (!where)
358                 return log_oom();
359
360         r = readlink_malloc(where, &q);
361         if (r >= 0) {
362                 y = path_startswith(q, "../usr/share/zoneinfo/");
363                 if (!y)
364                         y = path_startswith(q, "/usr/share/zoneinfo/");
365
366
367                 /* Already pointing to the right place? Then do nothing .. */
368                 if (y && streq(y, z))
369                         return 0;
370         }
371
372         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
373         if (!check)
374                 return log_oom();
375
376         if (access(check, F_OK) < 0) {
377                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
378                 return 0;
379         }
380
381         what = strappend("../usr/share/zoneinfo/", z);
382         if (!what)
383                 return log_oom();
384
385         unlink(where);
386         if (symlink(what, where) < 0) {
387                 log_error("Failed to correct timezone of container: %m");
388                 return 0;
389         }
390
391         return 0;
392 }
393
394 static int setup_resolv_conf(const char *dest) {
395         char *where;
396
397         assert(dest);
398
399         if (arg_private_network)
400                 return 0;
401
402         /* Fix resolv.conf, if possible */
403         where = strappend(dest, "/etc/resolv.conf");
404         if (!where)
405                 return log_oom();
406
407         /* We don't really care for the results of this really. If it
408          * fails, it fails, but meh... */
409         if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0)
410                 mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
411
412         free(where);
413
414         return 0;
415 }
416
417 static int setup_boot_id(const char *dest) {
418         char _cleanup_free_ *from = NULL, *to = NULL;
419         sd_id128_t rnd;
420         char as_uuid[37];
421         int r;
422
423         assert(dest);
424
425         /* Generate a new randomized boot ID, so that each boot-up of
426          * the container gets a new one */
427
428         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
429         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
430         if (!from || !to)
431                 return log_oom();
432
433         r = sd_id128_randomize(&rnd);
434         if (r < 0) {
435                 log_error("Failed to generate random boot id: %s", strerror(-r));
436                 return r;
437         }
438
439         snprintf(as_uuid, sizeof(as_uuid),
440                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
441                  SD_ID128_FORMAT_VAL(rnd));
442         char_array_0(as_uuid);
443
444         r = write_one_line_file(from, as_uuid);
445         if (r < 0) {
446                 log_error("Failed to write boot id: %s", strerror(-r));
447                 return r;
448         }
449
450         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
451                 log_error("Failed to bind mount boot id: %m");
452                 r = -errno;
453         } else
454                 mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
455
456         unlink(from);
457         return r;
458 }
459
460 static int copy_devnodes(const char *dest) {
461
462         static const char devnodes[] =
463                 "null\0"
464                 "zero\0"
465                 "full\0"
466                 "random\0"
467                 "urandom\0"
468                 "tty\0"
469                 "ptmx\0";
470
471         const char *d;
472         int r = 0;
473         mode_t _cleanup_umask_ u;
474
475         assert(dest);
476
477         u = umask(0000);
478
479         NULSTR_FOREACH(d, devnodes) {
480                 struct stat st;
481                 char _cleanup_free_ *from = NULL, *to = NULL;
482
483                 asprintf(&from, "/dev/%s", d);
484                 asprintf(&to, "%s/dev/%s", dest, d);
485
486                 if (!from || !to) {
487                         log_oom();
488
489                         if (r == 0)
490                                 r = -ENOMEM;
491
492                         break;
493                 }
494
495                 if (stat(from, &st) < 0) {
496
497                         if (errno != ENOENT) {
498                                 log_error("Failed to stat %s: %m", from);
499                                 if (r == 0)
500                                         r = -errno;
501                         }
502
503                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
504
505                         log_error("%s is not a char or block device, cannot copy", from);
506                         if (r == 0)
507                                 r = -EIO;
508
509                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
510
511                         log_error("mknod(%s) failed: %m", dest);
512                         if (r == 0)
513                                 r = -errno;
514                 }
515         }
516
517         return r;
518 }
519
520 static int setup_dev_console(const char *dest, const char *console) {
521         struct stat st;
522         char _cleanup_free_ *to = NULL;
523         int r;
524         mode_t _cleanup_umask_ u;
525
526         assert(dest);
527         assert(console);
528
529         u = umask(0000);
530
531         if (stat(console, &st) < 0) {
532                 log_error("Failed to stat %s: %m", console);
533                 return -errno;
534
535         } else if (!S_ISCHR(st.st_mode)) {
536                 log_error("/dev/console is not a char device");
537                 return -EIO;
538         }
539
540         r = chmod_and_chown(console, 0600, 0, 0);
541         if (r < 0) {
542                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
543                 return r;
544         }
545
546         if (asprintf(&to, "%s/dev/console", dest) < 0)
547                 return log_oom();
548
549         /* We need to bind mount the right tty to /dev/console since
550          * ptys can only exist on pts file systems. To have something
551          * to bind mount things on we create a device node first, that
552          * has the right major/minor (note that the major minor
553          * doesn't actually matter here, since we mount it over
554          * anyway). */
555
556         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
557                 log_error("mknod() for /dev/console failed: %m");
558                 return -errno;
559         }
560
561         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
562                 log_error("Bind mount for /dev/console failed: %m");
563                 return -errno;
564         }
565
566         return 0;
567 }
568
569 static int setup_kmsg(const char *dest, int kmsg_socket) {
570         char _cleanup_free_ *from = NULL, *to = NULL;
571         int r, fd, k;
572         mode_t _cleanup_umask_ u;
573         union {
574                 struct cmsghdr cmsghdr;
575                 uint8_t buf[CMSG_SPACE(sizeof(int))];
576         } control;
577         struct msghdr mh;
578         struct cmsghdr *cmsg;
579
580         assert(dest);
581         assert(kmsg_socket >= 0);
582
583         u = umask(0000);
584
585         /* We create the kmsg FIFO as /dev/kmsg, but immediately
586          * delete it after bind mounting it to /proc/kmsg. While FIFOs
587          * on the reading side behave very similar to /proc/kmsg,
588          * their writing side behaves differently from /dev/kmsg in
589          * that writing blocks when nothing is reading. In order to
590          * avoid any problems with containers deadlocking due to this
591          * we simply make /dev/kmsg unavailable to the container. */
592         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
593             asprintf(&to, "%s/proc/kmsg", dest) < 0)
594                 return log_oom();
595
596         if (mkfifo(from, 0600) < 0) {
597                 log_error("mkfifo() for /dev/kmsg failed: %m");
598                 return -errno;
599         }
600
601         r = chmod_and_chown(from, 0600, 0, 0);
602         if (r < 0) {
603                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
604                 return r;
605         }
606
607         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
608                 log_error("Bind mount for /proc/kmsg failed: %m");
609                 return -errno;
610         }
611
612         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
613         if (fd < 0) {
614                 log_error("Failed to open fifo: %m");
615                 return -errno;
616         }
617
618         zero(mh);
619         zero(control);
620
621         mh.msg_control = &control;
622         mh.msg_controllen = sizeof(control);
623
624         cmsg = CMSG_FIRSTHDR(&mh);
625         cmsg->cmsg_level = SOL_SOCKET;
626         cmsg->cmsg_type = SCM_RIGHTS;
627         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
628         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
629
630         mh.msg_controllen = cmsg->cmsg_len;
631
632         /* Store away the fd in the socket, so that it stays open as
633          * long as we run the child */
634         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
635         close_nointr_nofail(fd);
636
637         if (k < 0) {
638                 log_error("Failed to send FIFO fd: %m");
639                 return -errno;
640         }
641
642         /* And now make the FIFO unavailable as /dev/kmsg... */
643         unlink(from);
644         return 0;
645 }
646
647 static int setup_hostname(void) {
648         char *hn;
649         int r = 0;
650
651         hn = path_get_file_name(arg_directory);
652         if (hn) {
653                 hn = strdup(hn);
654                 if (!hn)
655                         return -ENOMEM;
656
657                 hostname_cleanup(hn);
658
659                 if (!isempty(hn))
660                         if (sethostname(hn, strlen(hn)) < 0)
661                                 r = -errno;
662
663                 free(hn);
664         }
665
666         return r;
667 }
668
669 static int setup_journal(const char *directory) {
670         sd_id128_t machine_id;
671         char _cleanup_free_ *p = NULL, *b = NULL, *q = NULL, *d = NULL;
672         char *id;
673         int r;
674
675         if (arg_link_journal == LINK_NO)
676                 return 0;
677
678         p = strappend(directory, "/etc/machine-id");
679         if (!p)
680                 return log_oom();
681
682         r = read_one_line_file(p, &b);
683         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
684                 return 0;
685         else if (r < 0) {
686                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
687                 return r;
688         }
689
690         id = strstrip(b);
691         if (isempty(id) && arg_link_journal == LINK_AUTO)
692                 return 0;
693
694         /* Verify validity */
695         r = sd_id128_from_string(id, &machine_id);
696         if (r < 0) {
697                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
698                 return r;
699         }
700
701         free(p);
702         p = strappend("/var/log/journal/", id);
703         q = strjoin(directory, "/var/log/journal/", id, NULL);
704         if (!p || !q)
705                 return log_oom();
706
707         if (path_is_mount_point(p, false) > 0) {
708                 if (arg_link_journal != LINK_AUTO) {
709                         log_error("%s: already a mount point, refusing to use for journal", p);
710                         return -EEXIST;
711                 }
712
713                 return 0;
714         }
715
716         if (path_is_mount_point(q, false) > 0) {
717                 if (arg_link_journal != LINK_AUTO) {
718                         log_error("%s: already a mount point, refusing to use for journal", q);
719                         return -EEXIST;
720                 }
721
722                 return 0;
723         }
724
725         r = readlink_and_make_absolute(p, &d);
726         if (r >= 0) {
727                 if ((arg_link_journal == LINK_GUEST ||
728                      arg_link_journal == LINK_AUTO) &&
729                     path_equal(d, q)) {
730
731                         r = mkdir_p(q, 0755);
732                         if (r < 0)
733                                 log_warning("failed to create directory %s: %m", q);
734                         return 0;
735                 }
736
737                 if (unlink(p) < 0) {
738                         log_error("Failed to remove symlink %s: %m", p);
739                         return -errno;
740                 }
741         } else if (r == -EINVAL) {
742
743                 if (arg_link_journal == LINK_GUEST &&
744                     rmdir(p) < 0) {
745
746                         if (errno == ENOTDIR) {
747                                 log_error("%s already exists and is neither a symlink nor a directory", p);
748                                 return r;
749                         } else {
750                                 log_error("Failed to remove %s: %m", p);
751                                 return -errno;
752                         }
753                 }
754         } else if (r != -ENOENT) {
755                 log_error("readlink(%s) failed: %m", p);
756                 return r;
757         }
758
759         if (arg_link_journal == LINK_GUEST) {
760
761                 if (symlink(q, p) < 0) {
762                         log_error("Failed to symlink %s to %s: %m", q, p);
763                         return -errno;
764                 }
765
766                 r = mkdir_p(q, 0755);
767                 if (r < 0)
768                         log_warning("failed to create directory %s: %m", q);
769                 return 0;
770         }
771
772         if (arg_link_journal == LINK_HOST) {
773                 r = mkdir_p(p, 0755);
774                 if (r < 0) {
775                         log_error("Failed to create %s: %m", p);
776                         return r;
777                 }
778
779         } else if (access(p, F_OK) < 0)
780                 return 0;
781
782         if (dir_is_empty(q) == 0) {
783                 log_error("%s not empty.", q);
784                 return -ENOTEMPTY;
785         }
786
787         r = mkdir_p(q, 0755);
788         if (r < 0) {
789                 log_error("Failed to create %s: %m", q);
790                 return r;
791         }
792
793         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
794                 log_error("Failed to bind mount journal from host into guest: %m");
795                 return -errno;
796         }
797
798         return 0;
799 }
800
801 static int drop_capabilities(void) {
802         return capability_bounding_set_drop(~arg_retain, false);
803 }
804
805 static int is_os_tree(const char *path) {
806         int r;
807         char *p;
808         /* We use /bin/sh as flag file if something is an OS */
809
810         if (asprintf(&p, "%s/bin/sh", path) < 0)
811                 return -ENOMEM;
812
813         r = access(p, F_OK);
814         free(p);
815
816         return r < 0 ? 0 : 1;
817 }
818
819 static int process_pty(int master, sigset_t *mask) {
820
821         char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
822         size_t in_buffer_full = 0, out_buffer_full = 0;
823         struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
824         bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
825         int ep = -1, signal_fd = -1, r;
826
827         fd_nonblock(STDIN_FILENO, 1);
828         fd_nonblock(STDOUT_FILENO, 1);
829         fd_nonblock(master, 1);
830
831         signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC);
832         if (signal_fd < 0) {
833                 log_error("signalfd(): %m");
834                 r = -errno;
835                 goto finish;
836         }
837
838         ep = epoll_create1(EPOLL_CLOEXEC);
839         if (ep < 0) {
840                 log_error("Failed to create epoll: %m");
841                 r = -errno;
842                 goto finish;
843         }
844
845         /* We read from STDIN only if this is actually a TTY,
846          * otherwise we assume non-interactivity. */
847         if (isatty(STDIN_FILENO)) {
848                 zero(stdin_ev);
849                 stdin_ev.events = EPOLLIN|EPOLLET;
850                 stdin_ev.data.fd = STDIN_FILENO;
851
852                 if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0) {
853                         log_error("Failed to register STDIN in epoll: %m");
854                         r = -errno;
855                         goto finish;
856                 }
857         }
858
859         zero(stdout_ev);
860         stdout_ev.events = EPOLLOUT|EPOLLET;
861         stdout_ev.data.fd = STDOUT_FILENO;
862
863         zero(master_ev);
864         master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
865         master_ev.data.fd = master;
866
867         zero(signal_ev);
868         signal_ev.events = EPOLLIN;
869         signal_ev.data.fd = signal_fd;
870
871         if (epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
872             epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
873             epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
874                 log_error("Failed to register fds in epoll: %m");
875                 r = -errno;
876                 goto finish;
877         }
878
879         for (;;) {
880                 struct epoll_event ev[16];
881                 ssize_t k;
882                 int i, nfds;
883
884                 nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1);
885                 if (nfds < 0) {
886
887                         if (errno == EINTR || errno == EAGAIN)
888                                 continue;
889
890                         log_error("epoll_wait(): %m");
891                         r = -errno;
892                         goto finish;
893                 }
894
895                 assert(nfds >= 1);
896
897                 for (i = 0; i < nfds; i++) {
898                         if (ev[i].data.fd == STDIN_FILENO) {
899
900                                 if (ev[i].events & (EPOLLIN|EPOLLHUP))
901                                         stdin_readable = true;
902
903                         } else if (ev[i].data.fd == STDOUT_FILENO) {
904
905                                 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
906                                         stdout_writable = true;
907
908                         } else if (ev[i].data.fd == master) {
909
910                                 if (ev[i].events & (EPOLLIN|EPOLLHUP))
911                                         master_readable = true;
912
913                                 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
914                                         master_writable = true;
915
916                         } else if (ev[i].data.fd == signal_fd) {
917                                 struct signalfd_siginfo sfsi;
918                                 ssize_t n;
919
920                                 n = read(signal_fd, &sfsi, sizeof(sfsi));
921                                 if (n != sizeof(sfsi)) {
922
923                                         if (n >= 0) {
924                                                 log_error("Failed to read from signalfd: invalid block size");
925                                                 r = -EIO;
926                                                 goto finish;
927                                         }
928
929                                         if (errno != EINTR && errno != EAGAIN) {
930                                                 log_error("Failed to read from signalfd: %m");
931                                                 r = -errno;
932                                                 goto finish;
933                                         }
934                                 } else {
935
936                                         if (sfsi.ssi_signo == SIGWINCH) {
937                                                 struct winsize ws;
938
939                                                 /* The window size changed, let's forward that. */
940                                                 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
941                                                         ioctl(master, TIOCSWINSZ, &ws);
942                                         } else {
943                                                 r = 0;
944                                                 goto finish;
945                                         }
946                                 }
947                         }
948                 }
949
950                 while ((stdin_readable && in_buffer_full <= 0) ||
951                        (master_writable && in_buffer_full > 0) ||
952                        (master_readable && out_buffer_full <= 0) ||
953                        (stdout_writable && out_buffer_full > 0)) {
954
955                         if (stdin_readable && in_buffer_full < LINE_MAX) {
956
957                                 k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full);
958                                 if (k < 0) {
959
960                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
961                                                 stdin_readable = false;
962                                         else {
963                                                 log_error("read(): %m");
964                                                 r = -errno;
965                                                 goto finish;
966                                         }
967                                 } else
968                                         in_buffer_full += (size_t) k;
969                         }
970
971                         if (master_writable && in_buffer_full > 0) {
972
973                                 k = write(master, in_buffer, in_buffer_full);
974                                 if (k < 0) {
975
976                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
977                                                 master_writable = false;
978                                         else {
979                                                 log_error("write(): %m");
980                                                 r = -errno;
981                                                 goto finish;
982                                         }
983
984                                 } else {
985                                         assert(in_buffer_full >= (size_t) k);
986                                         memmove(in_buffer, in_buffer + k, in_buffer_full - k);
987                                         in_buffer_full -= k;
988                                 }
989                         }
990
991                         if (master_readable && out_buffer_full < LINE_MAX) {
992
993                                 k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full);
994                                 if (k < 0) {
995
996                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
997                                                 master_readable = false;
998                                         else {
999                                                 log_error("read(): %m");
1000                                                 r = -errno;
1001                                                 goto finish;
1002                                         }
1003                                 }  else
1004                                         out_buffer_full += (size_t) k;
1005                         }
1006
1007                         if (stdout_writable && out_buffer_full > 0) {
1008
1009                                 k = write(STDOUT_FILENO, out_buffer, out_buffer_full);
1010                                 if (k < 0) {
1011
1012                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1013                                                 stdout_writable = false;
1014                                         else {
1015                                                 log_error("write(): %m");
1016                                                 r = -errno;
1017                                                 goto finish;
1018                                         }
1019
1020                                 } else {
1021                                         assert(out_buffer_full >= (size_t) k);
1022                                         memmove(out_buffer, out_buffer + k, out_buffer_full - k);
1023                                         out_buffer_full -= k;
1024                                 }
1025                         }
1026                 }
1027         }
1028
1029 finish:
1030         if (ep >= 0)
1031                 close_nointr_nofail(ep);
1032
1033         if (signal_fd >= 0)
1034                 close_nointr_nofail(signal_fd);
1035
1036         return r;
1037 }
1038
1039 int main(int argc, char *argv[]) {
1040         pid_t pid = 0;
1041         int r = EXIT_FAILURE, k;
1042         char *oldcg = NULL, *newcg = NULL;
1043         char **controller = NULL;
1044         int master = -1;
1045         const char *console = NULL;
1046         struct termios saved_attr, raw_attr;
1047         sigset_t mask;
1048         bool saved_attr_valid = false;
1049         struct winsize ws;
1050         int kmsg_socket_pair[2] = { -1, -1 };
1051
1052         log_parse_environment();
1053         log_open();
1054
1055         r = parse_argv(argc, argv);
1056         if (r <= 0)
1057                 goto finish;
1058
1059         if (arg_directory) {
1060                 char *p;
1061
1062                 p = path_make_absolute_cwd(arg_directory);
1063                 free(arg_directory);
1064                 arg_directory = p;
1065         } else
1066                 arg_directory = get_current_dir_name();
1067
1068         if (!arg_directory) {
1069                 log_error("Failed to determine path");
1070                 goto finish;
1071         }
1072
1073         path_kill_slashes(arg_directory);
1074
1075         if (geteuid() != 0) {
1076                 log_error("Need to be root.");
1077                 goto finish;
1078         }
1079
1080         if (sd_booted() <= 0) {
1081                 log_error("Not running on a systemd system.");
1082                 goto finish;
1083         }
1084
1085         if (path_equal(arg_directory, "/")) {
1086                 log_error("Spawning container on root directory not supported.");
1087                 goto finish;
1088         }
1089
1090         if (is_os_tree(arg_directory) <= 0) {
1091                 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
1092                 goto finish;
1093         }
1094
1095         k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg);
1096         if (k < 0) {
1097                 log_error("Failed to determine current cgroup: %s", strerror(-k));
1098                 goto finish;
1099         }
1100
1101         if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
1102                 log_error("Failed to allocate cgroup path.");
1103                 goto finish;
1104         }
1105
1106         k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0);
1107         if (k < 0)  {
1108                 log_error("Failed to create cgroup: %s", strerror(-k));
1109                 goto finish;
1110         }
1111
1112         STRV_FOREACH(controller, arg_controllers) {
1113                 k = cg_create_and_attach(*controller, newcg, 0);
1114                 if (k < 0)
1115                         log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k));
1116         }
1117
1118         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1119         if (master < 0) {
1120                 log_error("Failed to acquire pseudo tty: %m");
1121                 goto finish;
1122         }
1123
1124         console = ptsname(master);
1125         if (!console) {
1126                 log_error("Failed to determine tty name: %m");
1127                 goto finish;
1128         }
1129
1130         log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
1131
1132         if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
1133                 ioctl(master, TIOCSWINSZ, &ws);
1134
1135         if (unlockpt(master) < 0) {
1136                 log_error("Failed to unlock tty: %m");
1137                 goto finish;
1138         }
1139
1140         if (tcgetattr(STDIN_FILENO, &saved_attr) >= 0) {
1141                 saved_attr_valid = true;
1142
1143                 raw_attr = saved_attr;
1144                 cfmakeraw(&raw_attr);
1145                 raw_attr.c_lflag &= ~ECHO;
1146         }
1147
1148         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1149                 log_error("Failed to create kmsg socket pair");
1150                 goto finish;
1151         }
1152
1153         assert_se(sigemptyset(&mask) == 0);
1154         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1155         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1156
1157         for (;;) {
1158                 siginfo_t status;
1159
1160                 if (saved_attr_valid) {
1161                         if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
1162                                 log_error("Failed to set terminal attributes: %m");
1163                                 goto finish;
1164                         }
1165                 }
1166
1167                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1168                 if (pid < 0) {
1169                         if (errno == EINVAL)
1170                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1171                         else
1172                                 log_error("clone() failed: %m");
1173
1174                         goto finish;
1175                 }
1176
1177                 if (pid == 0) {
1178                         /* child */
1179
1180                         const char *home = NULL;
1181                         uid_t uid = (uid_t) -1;
1182                         gid_t gid = (gid_t) -1;
1183                         const char *envp[] = {
1184                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1185                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1186                                 NULL, /* TERM */
1187                                 NULL, /* HOME */
1188                                 NULL, /* USER */
1189                                 NULL, /* LOGNAME */
1190                                 NULL, /* container_uuid */
1191                                 NULL
1192                         };
1193
1194                         envp[2] = strv_find_prefix(environ, "TERM=");
1195
1196                         close_nointr_nofail(master);
1197
1198                         close_nointr(STDIN_FILENO);
1199                         close_nointr(STDOUT_FILENO);
1200                         close_nointr(STDERR_FILENO);
1201
1202                         close_all_fds(&kmsg_socket_pair[1], 1);
1203
1204                         reset_all_signal_handlers();
1205
1206                         assert_se(sigemptyset(&mask) == 0);
1207                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1208
1209                         if (open_terminal(console, O_RDWR) != STDIN_FILENO ||
1210                             dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1211                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
1212                                 goto child_fail;
1213
1214                         if (setsid() < 0) {
1215                                 log_error("setsid() failed: %m");
1216                                 goto child_fail;
1217                         }
1218
1219                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1220                                 log_error("PR_SET_PDEATHSIG failed: %m");
1221                                 goto child_fail;
1222                         }
1223
1224                         /* Mark everything as slave, so that we still
1225                          * receive mounts from the real root, but don't
1226                          * propagate mounts to the real root. */
1227                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1228                                 log_error("MS_SLAVE|MS_REC failed: %m");
1229                                 goto child_fail;
1230                         }
1231
1232                         /* Turn directory into bind mount */
1233                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1234                                 log_error("Failed to make bind mount.");
1235                                 goto child_fail;
1236                         }
1237
1238                         if (arg_read_only)
1239                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1240                                         log_error("Failed to make read-only.");
1241                                         goto child_fail;
1242                                 }
1243
1244                         if (mount_all(arg_directory) < 0)
1245                                 goto child_fail;
1246
1247                         if (copy_devnodes(arg_directory) < 0)
1248                                 goto child_fail;
1249
1250                         dev_setup(arg_directory);
1251
1252                         if (setup_dev_console(arg_directory, console) < 0)
1253                                 goto child_fail;
1254
1255                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1256                                 goto child_fail;
1257
1258                         close_nointr_nofail(kmsg_socket_pair[1]);
1259
1260                         if (setup_boot_id(arg_directory) < 0)
1261                                 goto child_fail;
1262
1263                         if (setup_timezone(arg_directory) < 0)
1264                                 goto child_fail;
1265
1266                         if (setup_resolv_conf(arg_directory) < 0)
1267                                 goto child_fail;
1268
1269                         if (setup_journal(arg_directory) < 0)
1270                                 goto child_fail;
1271
1272                         if (chdir(arg_directory) < 0) {
1273                                 log_error("chdir(%s) failed: %m", arg_directory);
1274                                 goto child_fail;
1275                         }
1276
1277                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1278                                 log_error("mount(MS_MOVE) failed: %m");
1279                                 goto child_fail;
1280                         }
1281
1282                         if (chroot(".") < 0) {
1283                                 log_error("chroot() failed: %m");
1284                                 goto child_fail;
1285                         }
1286
1287                         if (chdir("/") < 0) {
1288                                 log_error("chdir() failed: %m");
1289                                 goto child_fail;
1290                         }
1291
1292                         umask(0022);
1293
1294                         loopback_setup();
1295
1296                         if (drop_capabilities() < 0) {
1297                                 log_error("drop_capabilities() failed: %m");
1298                                 goto child_fail;
1299                         }
1300
1301                         if (arg_user) {
1302
1303                                 /* Note that this resolves user names
1304                                  * inside the container, and hence
1305                                  * accesses the NSS modules from the
1306                                  * container and not the host. This is
1307                                  * a bit weird... */
1308
1309                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1310                                         log_error("get_user_creds() failed: %m");
1311                                         goto child_fail;
1312                                 }
1313
1314                                 if (mkdir_parents_label(home, 0775) < 0) {
1315                                         log_error("mkdir_parents_label() failed: %m");
1316                                         goto child_fail;
1317                                 }
1318
1319                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1320                                         log_error("mkdir_safe_label() failed: %m");
1321                                         goto child_fail;
1322                                 }
1323
1324                                 if (initgroups((const char*)arg_user, gid) < 0) {
1325                                         log_error("initgroups() failed: %m");
1326                                         goto child_fail;
1327                                 }
1328
1329                                 if (setresgid(gid, gid, gid) < 0) {
1330                                         log_error("setregid() failed: %m");
1331                                         goto child_fail;
1332                                 }
1333
1334                                 if (setresuid(uid, uid, uid) < 0) {
1335                                         log_error("setreuid() failed: %m");
1336                                         goto child_fail;
1337                                 }
1338                         } else {
1339                                 /* Reset everything fully to 0, just in case */
1340
1341                                 if (setgroups(0, NULL) < 0) {
1342                                         log_error("setgroups() failed: %m");
1343                                         goto child_fail;
1344                                 }
1345
1346                                 if (setresgid(0, 0, 0) < 0) {
1347                                         log_error("setregid() failed: %m");
1348                                         goto child_fail;
1349                                 }
1350
1351                                 if (setresuid(0, 0, 0) < 0) {
1352                                         log_error("setreuid() failed: %m");
1353                                         goto child_fail;
1354                                 }
1355                         }
1356
1357                         if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
1358                             (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1359                             (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1360                                 log_oom();
1361                                 goto child_fail;
1362                         }
1363
1364                         if (arg_uuid) {
1365                                 if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
1366                                         log_oom();
1367                                         goto child_fail;
1368                                 }
1369                         }
1370
1371                         setup_hostname();
1372
1373                         if (arg_boot) {
1374                                 char **a;
1375                                 size_t l;
1376
1377                                 /* Automatically search for the init system */
1378
1379                                 l = 1 + argc - optind;
1380                                 a = newa(char*, l + 1);
1381                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1382
1383                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1384                                 execve(a[0], a, (char**) envp);
1385
1386                                 a[0] = (char*) "/lib/systemd/systemd";
1387                                 execve(a[0], a, (char**) envp);
1388
1389                                 a[0] = (char*) "/sbin/init";
1390                                 execve(a[0], a, (char**) envp);
1391                         } else if (argc > optind)
1392                                 execvpe(argv[optind], argv + optind, (char**) envp);
1393                         else {
1394                                 chdir(home ? home : "/root");
1395                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1396                         }
1397
1398                         log_error("execv() failed: %m");
1399
1400                 child_fail:
1401                         _exit(EXIT_FAILURE);
1402                 }
1403
1404                 if (process_pty(master, &mask) < 0)
1405                         goto finish;
1406
1407
1408                 if (saved_attr_valid)
1409                         tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1410
1411                 r = wait_for_terminate(pid, &status);
1412                 if (r < 0) {
1413                         r = EXIT_FAILURE;
1414                         break;
1415                 }
1416
1417                 if (status.si_code == CLD_EXITED) {
1418                         if (status.si_status != 0) {
1419                                 log_error("Container failed with error code %i.", status.si_status);
1420                                 r = status.si_status;
1421                                 break;
1422                         }
1423
1424                         log_debug("Container exited successfully.");
1425                         break;
1426                 } else if (status.si_code == CLD_KILLED &&
1427                            status.si_status == SIGINT) {
1428                         log_info("Container has been shut down.");
1429                         r = 0;
1430                         break;
1431                 } else if (status.si_code == CLD_KILLED &&
1432                            status.si_status == SIGHUP) {
1433                         log_info("Container is being rebooted.");
1434                         continue;
1435                 } else if (status.si_code == CLD_KILLED ||
1436                            status.si_code == CLD_DUMPED) {
1437
1438                         log_error("Container terminated by signal %s.", signal_to_string(status.si_status));
1439                         r = EXIT_FAILURE;
1440                         break;
1441                 } else {
1442                         log_error("Container failed due to unknown reason.");
1443                         r = EXIT_FAILURE;
1444                         break;
1445                 }
1446         }
1447
1448 finish:
1449         if (saved_attr_valid)
1450                 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1451
1452         if (master >= 0)
1453                 close_nointr_nofail(master);
1454
1455         close_pipe(kmsg_socket_pair);
1456
1457         if (oldcg)
1458                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
1459
1460         if (newcg)
1461                 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
1462
1463         free(arg_directory);
1464         strv_free(arg_controllers);
1465         free(oldcg);
1466         free(newcg);
1467
1468         return r;
1469 }