chiark / gitweb /
5cac32cd8c84312ce28b97cfd515ecab1c2a7dba
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <sys/epoll.h>
37 #include <termios.h>
38 #include <sys/signalfd.h>
39 #include <grp.h>
40 #include <linux/fs.h>
41 #include <sys/un.h>
42 #include <sys/socket.h>
43
44 #include <systemd/sd-daemon.h>
45
46 #include "log.h"
47 #include "util.h"
48 #include "mkdir.h"
49 #include "macro.h"
50 #include "audit.h"
51 #include "missing.h"
52 #include "cgroup-util.h"
53 #include "strv.h"
54 #include "path-util.h"
55 #include "loopback-setup.h"
56 #include "sd-id128.h"
57 #include "dev-setup.h"
58
59 typedef enum LinkJournal {
60         LINK_NO,
61         LINK_AUTO,
62         LINK_HOST,
63         LINK_GUEST
64 } LinkJournal;
65
66 static char *arg_directory = NULL;
67 static char *arg_user = NULL;
68 static char **arg_controllers = NULL;
69 static char *arg_uuid = NULL;
70 static bool arg_private_network = false;
71 static bool arg_read_only = false;
72 static bool arg_boot = false;
73 static LinkJournal arg_link_journal = LINK_AUTO;
74 static uint64_t arg_retain =
75         (1ULL << CAP_CHOWN) |
76         (1ULL << CAP_DAC_OVERRIDE) |
77         (1ULL << CAP_DAC_READ_SEARCH) |
78         (1ULL << CAP_FOWNER) |
79         (1ULL << CAP_FSETID) |
80         (1ULL << CAP_IPC_OWNER) |
81         (1ULL << CAP_KILL) |
82         (1ULL << CAP_LEASE) |
83         (1ULL << CAP_LINUX_IMMUTABLE) |
84         (1ULL << CAP_NET_BIND_SERVICE) |
85         (1ULL << CAP_NET_BROADCAST) |
86         (1ULL << CAP_NET_RAW) |
87         (1ULL << CAP_SETGID) |
88         (1ULL << CAP_SETFCAP) |
89         (1ULL << CAP_SETPCAP) |
90         (1ULL << CAP_SETUID) |
91         (1ULL << CAP_SYS_ADMIN) |
92         (1ULL << CAP_SYS_CHROOT) |
93         (1ULL << CAP_SYS_NICE) |
94         (1ULL << CAP_SYS_PTRACE) |
95         (1ULL << CAP_SYS_TTY_CONFIG) |
96         (1ULL << CAP_SYS_RESOURCE) |
97         (1ULL << CAP_SYS_BOOT);
98
99 static int help(void) {
100
101         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
102                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
103                "  -h --help               Show this help\n"
104                "  -D --directory=NAME     Root directory for the container\n"
105                "  -b --boot               Boot up full system (i.e. invoke init)\n"
106                "  -u --user=USER          Run the command under specified user or uid\n"
107                "  -C --controllers=LIST   Put the container in specified comma-separated cgroup hierarchies\n"
108                "     --uuid=UUID          Set a specific machine UUID for the container\n"
109                "     --private-network    Disable network in container\n"
110                "     --read-only          Mount the root directory read-only\n"
111                "     --capability=CAP     In addition to the default, retain specified capability\n"
112                "     --link-journal=MODE  Link up guest journal, one of no, auto, guest, host\n"
113                "  -j                      Equivalent to --link-journal=host\n",
114                program_invocation_short_name);
115
116         return 0;
117 }
118
119 static int parse_argv(int argc, char *argv[]) {
120
121         enum {
122                 ARG_PRIVATE_NETWORK = 0x100,
123                 ARG_UUID,
124                 ARG_READ_ONLY,
125                 ARG_CAPABILITY,
126                 ARG_LINK_JOURNAL
127         };
128
129         static const struct option options[] = {
130                 { "help",            no_argument,       NULL, 'h'                 },
131                 { "directory",       required_argument, NULL, 'D'                 },
132                 { "user",            required_argument, NULL, 'u'                 },
133                 { "controllers",     required_argument, NULL, 'C'                 },
134                 { "private-network", no_argument,       NULL, ARG_PRIVATE_NETWORK },
135                 { "boot",            no_argument,       NULL, 'b'                 },
136                 { "uuid",            required_argument, NULL, ARG_UUID            },
137                 { "read-only",       no_argument,       NULL, ARG_READ_ONLY       },
138                 { "capability",      required_argument, NULL, ARG_CAPABILITY      },
139                 { "link-journal",    required_argument, NULL, ARG_LINK_JOURNAL    },
140                 { NULL,              0,                 NULL, 0                   }
141         };
142
143         int c;
144
145         assert(argc >= 0);
146         assert(argv);
147
148         while ((c = getopt_long(argc, argv, "+hD:u:C:bj", options, NULL)) >= 0) {
149
150                 switch (c) {
151
152                 case 'h':
153                         help();
154                         return 0;
155
156                 case 'D':
157                         free(arg_directory);
158                         arg_directory = canonicalize_file_name(optarg);
159                         if (!arg_directory) {
160                                 log_error("Failed to canonicalize root directory.");
161                                 return -ENOMEM;
162                         }
163
164                         break;
165
166                 case 'u':
167                         free(arg_user);
168                         if (!(arg_user = strdup(optarg))) {
169                                 log_error("Failed to duplicate user name.");
170                                 return -ENOMEM;
171                         }
172
173                         break;
174
175                 case 'C':
176                         strv_free(arg_controllers);
177                         arg_controllers = strv_split(optarg, ",");
178                         if (!arg_controllers) {
179                                 log_error("Failed to split controllers list.");
180                                 return -ENOMEM;
181                         }
182                         strv_uniq(arg_controllers);
183
184                         break;
185
186                 case ARG_PRIVATE_NETWORK:
187                         arg_private_network = true;
188                         break;
189
190                 case 'b':
191                         arg_boot = true;
192                         break;
193
194                 case ARG_UUID:
195                         arg_uuid = optarg;
196                         break;
197
198                 case ARG_READ_ONLY:
199                         arg_read_only = true;
200                         break;
201
202                 case ARG_CAPABILITY: {
203                         char *state, *word;
204                         size_t length;
205
206                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
207                                 cap_value_t cap;
208                                 char *t;
209
210                                 t = strndup(word, length);
211                                 if (!t)
212                                         return log_oom();
213
214                                 if (cap_from_name(t, &cap) < 0) {
215                                         log_error("Failed to parse capability %s.", t);
216                                         free(t);
217                                         return -EINVAL;
218                                 }
219
220                                 free(t);
221                                 arg_retain |= 1ULL << (uint64_t) cap;
222                         }
223
224                         break;
225                 }
226
227                 case 'j':
228                         arg_link_journal = LINK_GUEST;
229                         break;
230
231                 case ARG_LINK_JOURNAL:
232                         if (streq(optarg, "auto"))
233                                 arg_link_journal = LINK_AUTO;
234                         else if (streq(optarg, "no"))
235                                 arg_link_journal = LINK_NO;
236                         else if (streq(optarg, "guest"))
237                                 arg_link_journal = LINK_GUEST;
238                         else if (streq(optarg, "host"))
239                                 arg_link_journal = LINK_HOST;
240                         else {
241                                 log_error("Failed to parse link journal mode %s", optarg);
242                                 return -EINVAL;
243                         }
244
245                         break;
246
247                 case '?':
248                         return -EINVAL;
249
250                 default:
251                         log_error("Unknown option code %c", c);
252                         return -EINVAL;
253                 }
254         }
255
256         return 1;
257 }
258
259 static int mount_all(const char *dest) {
260
261         typedef struct MountPoint {
262                 const char *what;
263                 const char *where;
264                 const char *type;
265                 const char *options;
266                 unsigned long flags;
267                 bool fatal;
268         } MountPoint;
269
270         static const MountPoint mount_table[] = {
271                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
272                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
273                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
274                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
275                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
276                 { "/dev/pts",  "/dev/pts",  NULL,    NULL,       MS_BIND,                      true  },
277                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
278                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
279 #ifdef HAVE_SELINUX
280                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
281                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
282 #endif
283         };
284
285         unsigned k;
286         int r = 0;
287
288         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
289                 char _cleanup_free_ *where = NULL;
290                 int t;
291
292                 if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) {
293                         log_oom();
294
295                         if (r == 0)
296                                 r = -ENOMEM;
297
298                         break;
299                 }
300
301                 t = path_is_mount_point(where, true);
302                 if (t < 0) {
303                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
304
305                         if (r == 0)
306                                 r = t;
307
308                         continue;
309                 }
310
311                 /* Skip this entry if it is not a remount. */
312                 if (mount_table[k].what && t > 0)
313                         continue;
314
315                 mkdir_p_label(where, 0755);
316
317                 if (mount(mount_table[k].what,
318                           where,
319                           mount_table[k].type,
320                           mount_table[k].flags,
321                           mount_table[k].options) < 0 &&
322                     mount_table[k].fatal) {
323
324                         log_error("mount(%s) failed: %m", where);
325
326                         if (r == 0)
327                                 r = -errno;
328                 }
329         }
330
331         return r;
332 }
333
334 static int setup_timezone(const char *dest) {
335         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
336         char *z, *y;
337         int r;
338
339         assert(dest);
340
341         /* Fix the timezone, if possible */
342         r = readlink_malloc("/etc/localtime", &p);
343         if (r < 0) {
344                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
345                 return 0;
346         }
347
348         z = path_startswith(p, "../usr/share/zoneinfo/");
349         if (!z)
350                 z = path_startswith(p, "/usr/share/zoneinfo/");
351         if (!z) {
352                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
353                 return 0;
354         }
355
356         where = strappend(dest, "/etc/localtime");
357         if (!where)
358                 return log_oom();
359
360         r = readlink_malloc(where, &q);
361         if (r >= 0) {
362                 y = path_startswith(q, "../usr/share/zoneinfo/");
363                 if (!y)
364                         y = path_startswith(q, "/usr/share/zoneinfo/");
365
366
367                 /* Already pointing to the right place? Then do nothing .. */
368                 if (y && streq(y, z))
369                         return 0;
370         }
371
372         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
373         if (!check)
374                 return log_oom();
375
376         if (access(check, F_OK) < 0) {
377                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
378                 return 0;
379         }
380
381         what = strappend("../usr/share/zoneinfo/", z);
382         if (!what)
383                 return log_oom();
384
385         unlink(where);
386         if (symlink(what, where) < 0) {
387                 log_error("Failed to correct timezone of container: %m");
388                 return 0;
389         }
390
391         return 0;
392 }
393
394 static int setup_resolv_conf(const char *dest) {
395         char *where;
396
397         assert(dest);
398
399         if (arg_private_network)
400                 return 0;
401
402         /* Fix resolv.conf, if possible */
403         where = strappend(dest, "/etc/resolv.conf");
404         if (!where)
405                 return log_oom();
406
407         /* We don't really care for the results of this really. If it
408          * fails, it fails, but meh... */
409         if (mount("/etc/resolv.conf", where, "bind", MS_BIND, NULL) >= 0)
410                 mount("/etc/resolv.conf", where, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
411
412         free(where);
413
414         return 0;
415 }
416
417 static int setup_boot_id(const char *dest) {
418         char _cleanup_free_ *from = NULL, *to = NULL;
419         sd_id128_t rnd;
420         char as_uuid[37];
421         int r;
422
423         assert(dest);
424
425         /* Generate a new randomized boot ID, so that each boot-up of
426          * the container gets a new one */
427
428         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
429         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
430         if (!from || !to)
431                 return log_oom();
432
433         r = sd_id128_randomize(&rnd);
434         if (r < 0) {
435                 log_error("Failed to generate random boot id: %s", strerror(-r));
436                 return r;
437         }
438
439         snprintf(as_uuid, sizeof(as_uuid),
440                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
441                  SD_ID128_FORMAT_VAL(rnd));
442         char_array_0(as_uuid);
443
444         r = write_one_line_file(from, as_uuid);
445         if (r < 0) {
446                 log_error("Failed to write boot id: %s", strerror(-r));
447                 return r;
448         }
449
450         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
451                 log_error("Failed to bind mount boot id: %m");
452                 r = -errno;
453         } else
454                 mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL);
455
456         unlink(from);
457         return r;
458 }
459
460 static int copy_devnodes(const char *dest) {
461
462         static const char devnodes[] =
463                 "null\0"
464                 "zero\0"
465                 "full\0"
466                 "random\0"
467                 "urandom\0"
468                 "tty\0"
469                 "ptmx\0";
470
471         const char *d;
472         int r = 0;
473         mode_t _cleanup_umask_ u;
474
475         assert(dest);
476
477         u = umask(0000);
478
479         NULSTR_FOREACH(d, devnodes) {
480                 struct stat st;
481                 char _cleanup_free_ *from = NULL, *to = NULL;
482
483                 asprintf(&from, "/dev/%s", d);
484                 asprintf(&to, "%s/dev/%s", dest, d);
485
486                 if (!from || !to) {
487                         log_oom();
488
489                         if (r == 0)
490                                 r = -ENOMEM;
491
492                         break;
493                 }
494
495                 if (stat(from, &st) < 0) {
496
497                         if (errno != ENOENT) {
498                                 log_error("Failed to stat %s: %m", from);
499                                 if (r == 0)
500                                         r = -errno;
501                         }
502
503                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
504
505                         log_error("%s is not a char or block device, cannot copy", from);
506                         if (r == 0)
507                                 r = -EIO;
508
509                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
510
511                         log_error("mknod(%s) failed: %m", dest);
512                         if (r == 0)
513                                 r = -errno;
514                 }
515         }
516
517         return r;
518 }
519
520 static int setup_dev_console(const char *dest, const char *console) {
521         struct stat st;
522         char _cleanup_free_ *to = NULL;
523         int r;
524         mode_t _cleanup_umask_ u;
525
526         assert(dest);
527         assert(console);
528
529         u = umask(0000);
530
531         if (stat(console, &st) < 0) {
532                 log_error("Failed to stat %s: %m", console);
533                 return -errno;
534
535         } else if (!S_ISCHR(st.st_mode)) {
536                 log_error("/dev/console is not a char device");
537                 return -EIO;
538         }
539
540         r = chmod_and_chown(console, 0600, 0, 0);
541         if (r < 0) {
542                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
543                 return r;
544         }
545
546         if (asprintf(&to, "%s/dev/console", dest) < 0)
547                 return log_oom();
548
549         /* We need to bind mount the right tty to /dev/console since
550          * ptys can only exist on pts file systems. To have something
551          * to bind mount things on we create a device node first, that
552          * has the right major/minor (note that the major minor
553          * doesn't actually matter here, since we mount it over
554          * anyway). */
555
556         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
557                 log_error("mknod() for /dev/console failed: %m");
558                 return -errno;
559         }
560
561         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
562                 log_error("Bind mount for /dev/console failed: %m");
563                 return -errno;
564         }
565
566         return 0;
567 }
568
569 static int setup_kmsg(const char *dest, int kmsg_socket) {
570         char _cleanup_free_ *from = NULL, *to = NULL;
571         int r, fd, k;
572         mode_t _cleanup_umask_ u;
573         union {
574                 struct cmsghdr cmsghdr;
575                 uint8_t buf[CMSG_SPACE(sizeof(int))];
576         } control;
577         struct msghdr mh;
578         struct cmsghdr *cmsg;
579
580         assert(dest);
581         assert(kmsg_socket >= 0);
582
583         u = umask(0000);
584
585         /* We create the kmsg FIFO as /dev/kmsg, but immediately
586          * delete it after bind mounting it to /proc/kmsg. While FIFOs
587          * on the reading side behave very similar to /proc/kmsg,
588          * their writing side behaves differently from /dev/kmsg in
589          * that writing blocks when nothing is reading. In order to
590          * avoid any problems with containers deadlocking due to this
591          * we simply make /dev/kmsg unavailable to the container. */
592         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
593             asprintf(&to, "%s/proc/kmsg", dest) < 0)
594                 return log_oom();
595
596         if (mkfifo(from, 0600) < 0) {
597                 log_error("mkfifo() for /dev/kmsg failed: %m");
598                 return -errno;
599         }
600
601         r = chmod_and_chown(from, 0600, 0, 0);
602         if (r < 0) {
603                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
604                 return r;
605         }
606
607         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
608                 log_error("Bind mount for /proc/kmsg failed: %m");
609                 return -errno;
610         }
611
612         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
613         if (fd < 0) {
614                 log_error("Failed to open fifo: %m");
615                 return -errno;
616         }
617
618         zero(mh);
619         zero(control);
620
621         mh.msg_control = &control;
622         mh.msg_controllen = sizeof(control);
623
624         cmsg = CMSG_FIRSTHDR(&mh);
625         cmsg->cmsg_level = SOL_SOCKET;
626         cmsg->cmsg_type = SCM_RIGHTS;
627         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
628         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
629
630         mh.msg_controllen = cmsg->cmsg_len;
631
632         /* Store away the fd in the socket, so that it stays open as
633          * long as we run the child */
634         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
635         close_nointr_nofail(fd);
636
637         if (k < 0) {
638                 log_error("Failed to send FIFO fd: %m");
639                 return -errno;
640         }
641
642         /* And now make the FIFO unavailable as /dev/kmsg... */
643         unlink(from);
644         return 0;
645 }
646
647 static int setup_hostname(void) {
648         char *hn;
649         int r = 0;
650
651         hn = path_get_file_name(arg_directory);
652         if (hn) {
653                 hn = strdup(hn);
654                 if (!hn)
655                         return -ENOMEM;
656
657                 hostname_cleanup(hn);
658
659                 if (!isempty(hn))
660                         if (sethostname(hn, strlen(hn)) < 0)
661                                 r = -errno;
662
663                 free(hn);
664         }
665
666         return r;
667 }
668
669 static int setup_journal(const char *directory) {
670         sd_id128_t machine_id;
671         char *p = NULL, *b = NULL, *l, *q = NULL, *d = NULL;
672         int r;
673
674         if (arg_link_journal == LINK_NO)
675                 return 0;
676
677         p = strappend(directory, "/etc/machine-id");
678         if (!p) {
679                 r = log_oom();
680                 goto finish;
681         }
682
683         r = read_one_line_file(p, &b);
684         if (r == -ENOENT && arg_link_journal == LINK_AUTO) {
685                 r = 0;
686                 goto finish;
687         } else if (r < 0) {
688                 log_error("Failed to read machine ID: %s", strerror(-r));
689                 return r;
690         }
691
692         l = strstrip(b);
693         if (isempty(l) && arg_link_journal == LINK_AUTO) {
694                 r = 0;
695                 goto finish;
696         }
697
698         /* Verify validaty */
699         r = sd_id128_from_string(l, &machine_id);
700         if (r < 0) {
701                 log_error("Failed to parse machine ID: %s", strerror(-r));
702                 goto finish;
703         }
704
705         free(p);
706         p = strappend("/var/log/journal/", l);
707         q = strjoin(directory, "/var/log/journal/", l, NULL);
708         if (!p || !q) {
709                 r = log_oom();
710                 goto finish;
711         }
712
713         if (path_is_mount_point(p, false) > 0 ||
714             path_is_mount_point(q, false) > 0) {
715                 if (arg_link_journal != LINK_AUTO) {
716                         log_error("Journal already a mount point, refusing.");
717                         r = -EEXIST;
718                         goto finish;
719                 }
720
721                 r = 0;
722                 goto finish;
723         }
724
725         r = readlink_and_make_absolute(p, &d);
726         if (r >= 0) {
727                 if ((arg_link_journal == LINK_GUEST ||
728                      arg_link_journal == LINK_AUTO) &&
729                     path_equal(d, q)) {
730
731                         mkdir_p(q, 0755);
732
733                         r = 0;
734                         goto finish;
735                 }
736
737                 if (unlink(p) < 0) {
738                         log_error("Failed to remove symlink %s: %m", p);
739                         r = -errno;
740                         goto finish;
741                 }
742         } else if (r == -EINVAL) {
743
744                 if (arg_link_journal == LINK_GUEST &&
745                     rmdir(p) < 0) {
746
747                         if (errno == ENOTDIR)
748                                 log_error("%s already exists and is neither symlink nor directory.", p);
749                         else {
750                                 log_error("Failed to remove %s: %m", p);
751                                 r = -errno;
752                         }
753
754                         goto finish;
755                 }
756         } else if (r != -ENOENT) {
757                 log_error("readlink(%s) failed: %m", p);
758                 goto finish;
759         }
760
761         if (arg_link_journal == LINK_GUEST) {
762
763                 if (symlink(q, p) < 0) {
764                         log_error("Failed to symlink %s to %s: %m", q, p);
765                         r = -errno;
766                         goto finish;
767                 }
768
769                 mkdir_p(q, 0755);
770
771                 r = 0;
772                 goto finish;
773         }
774
775         if (arg_link_journal == LINK_HOST) {
776                 r = mkdir_p(p, 0755);
777                 if (r < 0) {
778                         log_error("Failed to create %s: %m", p);
779                         goto finish;
780                 }
781
782         } else if (access(p, F_OK) < 0) {
783                 r = 0;
784                 goto finish;
785         }
786
787         if (dir_is_empty(q) == 0) {
788                 log_error("%s not empty.", q);
789                 r = -ENOTEMPTY;
790                 goto finish;
791         }
792
793         r = mkdir_p(q, 0755);
794         if (r < 0) {
795                 log_error("Failed to create %s: %m", q);
796                 goto finish;
797         }
798
799         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
800                 log_error("Failed to bind mount journal from host into guest: %m");
801                 r = -errno;
802                 goto finish;
803         }
804
805         r = 0;
806
807 finish:
808         free(p);
809         free(q);
810         free(d);
811         free(b);
812         return r;
813
814 }
815
816 static int drop_capabilities(void) {
817         return capability_bounding_set_drop(~arg_retain, false);
818 }
819
820 static int is_os_tree(const char *path) {
821         int r;
822         char *p;
823         /* We use /bin/sh as flag file if something is an OS */
824
825         if (asprintf(&p, "%s/bin/sh", path) < 0)
826                 return -ENOMEM;
827
828         r = access(p, F_OK);
829         free(p);
830
831         return r < 0 ? 0 : 1;
832 }
833
834 static int process_pty(int master, sigset_t *mask) {
835
836         char in_buffer[LINE_MAX], out_buffer[LINE_MAX];
837         size_t in_buffer_full = 0, out_buffer_full = 0;
838         struct epoll_event stdin_ev, stdout_ev, master_ev, signal_ev;
839         bool stdin_readable = false, stdout_writable = false, master_readable = false, master_writable = false;
840         int ep = -1, signal_fd = -1, r;
841
842         fd_nonblock(STDIN_FILENO, 1);
843         fd_nonblock(STDOUT_FILENO, 1);
844         fd_nonblock(master, 1);
845
846         signal_fd = signalfd(-1, mask, SFD_NONBLOCK|SFD_CLOEXEC);
847         if (signal_fd < 0) {
848                 log_error("signalfd(): %m");
849                 r = -errno;
850                 goto finish;
851         }
852
853         ep = epoll_create1(EPOLL_CLOEXEC);
854         if (ep < 0) {
855                 log_error("Failed to create epoll: %m");
856                 r = -errno;
857                 goto finish;
858         }
859
860         zero(stdin_ev);
861         stdin_ev.events = EPOLLIN|EPOLLET;
862         stdin_ev.data.fd = STDIN_FILENO;
863
864         zero(stdout_ev);
865         stdout_ev.events = EPOLLOUT|EPOLLET;
866         stdout_ev.data.fd = STDOUT_FILENO;
867
868         zero(master_ev);
869         master_ev.events = EPOLLIN|EPOLLOUT|EPOLLET;
870         master_ev.data.fd = master;
871
872         zero(signal_ev);
873         signal_ev.events = EPOLLIN;
874         signal_ev.data.fd = signal_fd;
875
876         if (epoll_ctl(ep, EPOLL_CTL_ADD, STDIN_FILENO, &stdin_ev) < 0 ||
877             epoll_ctl(ep, EPOLL_CTL_ADD, STDOUT_FILENO, &stdout_ev) < 0 ||
878             epoll_ctl(ep, EPOLL_CTL_ADD, master, &master_ev) < 0 ||
879             epoll_ctl(ep, EPOLL_CTL_ADD, signal_fd, &signal_ev) < 0) {
880                 log_error("Failed to regiser fds in epoll: %m");
881                 r = -errno;
882                 goto finish;
883         }
884
885         for (;;) {
886                 struct epoll_event ev[16];
887                 ssize_t k;
888                 int i, nfds;
889
890                 nfds = epoll_wait(ep, ev, ELEMENTSOF(ev), -1);
891                 if (nfds < 0) {
892
893                         if (errno == EINTR || errno == EAGAIN)
894                                 continue;
895
896                         log_error("epoll_wait(): %m");
897                         r = -errno;
898                         goto finish;
899                 }
900
901                 assert(nfds >= 1);
902
903                 for (i = 0; i < nfds; i++) {
904                         if (ev[i].data.fd == STDIN_FILENO) {
905
906                                 if (ev[i].events & (EPOLLIN|EPOLLHUP))
907                                         stdin_readable = true;
908
909                         } else if (ev[i].data.fd == STDOUT_FILENO) {
910
911                                 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
912                                         stdout_writable = true;
913
914                         } else if (ev[i].data.fd == master) {
915
916                                 if (ev[i].events & (EPOLLIN|EPOLLHUP))
917                                         master_readable = true;
918
919                                 if (ev[i].events & (EPOLLOUT|EPOLLHUP))
920                                         master_writable = true;
921
922                         } else if (ev[i].data.fd == signal_fd) {
923                                 struct signalfd_siginfo sfsi;
924                                 ssize_t n;
925
926                                 n = read(signal_fd, &sfsi, sizeof(sfsi));
927                                 if (n != sizeof(sfsi)) {
928
929                                         if (n >= 0) {
930                                                 log_error("Failed to read from signalfd: invalid block size");
931                                                 r = -EIO;
932                                                 goto finish;
933                                         }
934
935                                         if (errno != EINTR && errno != EAGAIN) {
936                                                 log_error("Failed to read from signalfd: %m");
937                                                 r = -errno;
938                                                 goto finish;
939                                         }
940                                 } else {
941
942                                         if (sfsi.ssi_signo == SIGWINCH) {
943                                                 struct winsize ws;
944
945                                                 /* The window size changed, let's forward that. */
946                                                 if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
947                                                         ioctl(master, TIOCSWINSZ, &ws);
948                                         } else {
949                                                 r = 0;
950                                                 goto finish;
951                                         }
952                                 }
953                         }
954                 }
955
956                 while ((stdin_readable && in_buffer_full <= 0) ||
957                        (master_writable && in_buffer_full > 0) ||
958                        (master_readable && out_buffer_full <= 0) ||
959                        (stdout_writable && out_buffer_full > 0)) {
960
961                         if (stdin_readable && in_buffer_full < LINE_MAX) {
962
963                                 k = read(STDIN_FILENO, in_buffer + in_buffer_full, LINE_MAX - in_buffer_full);
964                                 if (k < 0) {
965
966                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
967                                                 stdin_readable = false;
968                                         else {
969                                                 log_error("read(): %m");
970                                                 r = -errno;
971                                                 goto finish;
972                                         }
973                                 } else
974                                         in_buffer_full += (size_t) k;
975                         }
976
977                         if (master_writable && in_buffer_full > 0) {
978
979                                 k = write(master, in_buffer, in_buffer_full);
980                                 if (k < 0) {
981
982                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
983                                                 master_writable = false;
984                                         else {
985                                                 log_error("write(): %m");
986                                                 r = -errno;
987                                                 goto finish;
988                                         }
989
990                                 } else {
991                                         assert(in_buffer_full >= (size_t) k);
992                                         memmove(in_buffer, in_buffer + k, in_buffer_full - k);
993                                         in_buffer_full -= k;
994                                 }
995                         }
996
997                         if (master_readable && out_buffer_full < LINE_MAX) {
998
999                                 k = read(master, out_buffer + out_buffer_full, LINE_MAX - out_buffer_full);
1000                                 if (k < 0) {
1001
1002                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1003                                                 master_readable = false;
1004                                         else {
1005                                                 log_error("read(): %m");
1006                                                 r = -errno;
1007                                                 goto finish;
1008                                         }
1009                                 }  else
1010                                         out_buffer_full += (size_t) k;
1011                         }
1012
1013                         if (stdout_writable && out_buffer_full > 0) {
1014
1015                                 k = write(STDOUT_FILENO, out_buffer, out_buffer_full);
1016                                 if (k < 0) {
1017
1018                                         if (errno == EAGAIN || errno == EPIPE || errno == ECONNRESET || errno == EIO)
1019                                                 stdout_writable = false;
1020                                         else {
1021                                                 log_error("write(): %m");
1022                                                 r = -errno;
1023                                                 goto finish;
1024                                         }
1025
1026                                 } else {
1027                                         assert(out_buffer_full >= (size_t) k);
1028                                         memmove(out_buffer, out_buffer + k, out_buffer_full - k);
1029                                         out_buffer_full -= k;
1030                                 }
1031                         }
1032                 }
1033         }
1034
1035 finish:
1036         if (ep >= 0)
1037                 close_nointr_nofail(ep);
1038
1039         if (signal_fd >= 0)
1040                 close_nointr_nofail(signal_fd);
1041
1042         return r;
1043 }
1044
1045 int main(int argc, char *argv[]) {
1046         pid_t pid = 0;
1047         int r = EXIT_FAILURE, k;
1048         char *oldcg = NULL, *newcg = NULL;
1049         char **controller = NULL;
1050         int master = -1;
1051         const char *console = NULL;
1052         struct termios saved_attr, raw_attr;
1053         sigset_t mask;
1054         bool saved_attr_valid = false;
1055         struct winsize ws;
1056         int kmsg_socket_pair[2] = { -1, -1 };
1057
1058         log_parse_environment();
1059         log_open();
1060
1061         r = parse_argv(argc, argv);
1062         if (r <= 0)
1063                 goto finish;
1064
1065         if (arg_directory) {
1066                 char *p;
1067
1068                 p = path_make_absolute_cwd(arg_directory);
1069                 free(arg_directory);
1070                 arg_directory = p;
1071         } else
1072                 arg_directory = get_current_dir_name();
1073
1074         if (!arg_directory) {
1075                 log_error("Failed to determine path");
1076                 goto finish;
1077         }
1078
1079         path_kill_slashes(arg_directory);
1080
1081         if (geteuid() != 0) {
1082                 log_error("Need to be root.");
1083                 goto finish;
1084         }
1085
1086         if (sd_booted() <= 0) {
1087                 log_error("Not running on a systemd system.");
1088                 goto finish;
1089         }
1090
1091         if (path_equal(arg_directory, "/")) {
1092                 log_error("Spawning container on root directory not supported.");
1093                 goto finish;
1094         }
1095
1096         if (is_os_tree(arg_directory) <= 0) {
1097                 log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory);
1098                 goto finish;
1099         }
1100
1101         k = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &oldcg);
1102         if (k < 0) {
1103                 log_error("Failed to determine current cgroup: %s", strerror(-k));
1104                 goto finish;
1105         }
1106
1107         if (asprintf(&newcg, "%s/nspawn-%lu", oldcg, (unsigned long) getpid()) < 0) {
1108                 log_error("Failed to allocate cgroup path.");
1109                 goto finish;
1110         }
1111
1112         k = cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER, newcg, 0);
1113         if (k < 0)  {
1114                 log_error("Failed to create cgroup: %s", strerror(-k));
1115                 goto finish;
1116         }
1117
1118         STRV_FOREACH(controller, arg_controllers) {
1119                 k = cg_create_and_attach(*controller, newcg, 0);
1120                 if (k < 0)
1121                         log_warning("Failed to create cgroup in controller %s: %s", *controller, strerror(-k));
1122         }
1123
1124         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
1125         if (master < 0) {
1126                 log_error("Failed to acquire pseudo tty: %m");
1127                 goto finish;
1128         }
1129
1130         console = ptsname(master);
1131         if (!console) {
1132                 log_error("Failed to determine tty name: %m");
1133                 goto finish;
1134         }
1135
1136         log_info("Spawning namespace container on %s (console is %s).", arg_directory, console);
1137
1138         if (ioctl(STDIN_FILENO, TIOCGWINSZ, &ws) >= 0)
1139                 ioctl(master, TIOCSWINSZ, &ws);
1140
1141         if (unlockpt(master) < 0) {
1142                 log_error("Failed to unlock tty: %m");
1143                 goto finish;
1144         }
1145
1146         if (tcgetattr(STDIN_FILENO, &saved_attr) < 0) {
1147                 log_error("Failed to get terminal attributes: %m");
1148                 goto finish;
1149         }
1150
1151         saved_attr_valid = true;
1152
1153         raw_attr = saved_attr;
1154         cfmakeraw(&raw_attr);
1155         raw_attr.c_lflag &= ~ECHO;
1156
1157         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
1158                 log_error("Failed to create kmsg socket pair");
1159                 goto finish;
1160         }
1161
1162         assert_se(sigemptyset(&mask) == 0);
1163         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
1164         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
1165
1166         for (;;) {
1167                 siginfo_t status;
1168
1169                 if (tcsetattr(STDIN_FILENO, TCSANOW, &raw_attr) < 0) {
1170                         log_error("Failed to set terminal attributes: %m");
1171                         goto finish;
1172                 }
1173
1174                 pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS|(arg_private_network ? CLONE_NEWNET : 0), NULL);
1175                 if (pid < 0) {
1176                         if (errno == EINVAL)
1177                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
1178                         else
1179                                 log_error("clone() failed: %m");
1180
1181                         goto finish;
1182                 }
1183
1184                 if (pid == 0) {
1185                         /* child */
1186
1187                         const char *home = NULL;
1188                         uid_t uid = (uid_t) -1;
1189                         gid_t gid = (gid_t) -1;
1190                         const char *envp[] = {
1191                                 "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
1192                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
1193                                 NULL, /* TERM */
1194                                 NULL, /* HOME */
1195                                 NULL, /* USER */
1196                                 NULL, /* LOGNAME */
1197                                 NULL, /* container_uuid */
1198                                 NULL
1199                         };
1200
1201                         envp[2] = strv_find_prefix(environ, "TERM=");
1202
1203                         close_nointr_nofail(master);
1204
1205                         close_nointr(STDIN_FILENO);
1206                         close_nointr(STDOUT_FILENO);
1207                         close_nointr(STDERR_FILENO);
1208
1209                         close_all_fds(&kmsg_socket_pair[1], 1);
1210
1211                         reset_all_signal_handlers();
1212
1213                         assert_se(sigemptyset(&mask) == 0);
1214                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
1215
1216                         if (open_terminal(console, O_RDWR) != STDIN_FILENO ||
1217                             dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
1218                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO)
1219                                 goto child_fail;
1220
1221                         if (setsid() < 0) {
1222                                 log_error("setsid() failed: %m");
1223                                 goto child_fail;
1224                         }
1225
1226                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
1227                                 log_error("PR_SET_PDEATHSIG failed: %m");
1228                                 goto child_fail;
1229                         }
1230
1231                         /* Mark everything as slave, so that we still
1232                          * receive mounts from the real root, but don't
1233                          * propagate mounts to the real root. */
1234                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
1235                                 log_error("MS_SLAVE|MS_REC failed: %m");
1236                                 goto child_fail;
1237                         }
1238
1239                         /* Turn directory into bind mount */
1240                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
1241                                 log_error("Failed to make bind mount.");
1242                                 goto child_fail;
1243                         }
1244
1245                         if (arg_read_only)
1246                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
1247                                         log_error("Failed to make read-only.");
1248                                         goto child_fail;
1249                                 }
1250
1251                         if (mount_all(arg_directory) < 0)
1252                                 goto child_fail;
1253
1254                         if (copy_devnodes(arg_directory) < 0)
1255                                 goto child_fail;
1256
1257                         dev_setup(arg_directory);
1258
1259                         if (setup_dev_console(arg_directory, console) < 0)
1260                                 goto child_fail;
1261
1262                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
1263                                 goto child_fail;
1264
1265                         close_nointr_nofail(kmsg_socket_pair[1]);
1266
1267                         if (setup_boot_id(arg_directory) < 0)
1268                                 goto child_fail;
1269
1270                         if (setup_timezone(arg_directory) < 0)
1271                                 goto child_fail;
1272
1273                         if (setup_resolv_conf(arg_directory) < 0)
1274                                 goto child_fail;
1275
1276                         if (setup_journal(arg_directory) < 0)
1277                                 goto child_fail;
1278
1279                         if (chdir(arg_directory) < 0) {
1280                                 log_error("chdir(%s) failed: %m", arg_directory);
1281                                 goto child_fail;
1282                         }
1283
1284                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
1285                                 log_error("mount(MS_MOVE) failed: %m");
1286                                 goto child_fail;
1287                         }
1288
1289                         if (chroot(".") < 0) {
1290                                 log_error("chroot() failed: %m");
1291                                 goto child_fail;
1292                         }
1293
1294                         if (chdir("/") < 0) {
1295                                 log_error("chdir() failed: %m");
1296                                 goto child_fail;
1297                         }
1298
1299                         umask(0022);
1300
1301                         loopback_setup();
1302
1303                         if (drop_capabilities() < 0) {
1304                                 log_error("drop_capabilities() failed: %m");
1305                                 goto child_fail;
1306                         }
1307
1308                         if (arg_user) {
1309
1310                                 /* Note that this resolves user names
1311                                  * inside the container, and hence
1312                                  * accesses the NSS modules from the
1313                                  * container and not the host. This is
1314                                  * a bit weird... */
1315
1316                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
1317                                         log_error("get_user_creds() failed: %m");
1318                                         goto child_fail;
1319                                 }
1320
1321                                 if (mkdir_parents_label(home, 0775) < 0) {
1322                                         log_error("mkdir_parents_label() failed: %m");
1323                                         goto child_fail;
1324                                 }
1325
1326                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
1327                                         log_error("mkdir_safe_label() failed: %m");
1328                                         goto child_fail;
1329                                 }
1330
1331                                 if (initgroups((const char*)arg_user, gid) < 0) {
1332                                         log_error("initgroups() failed: %m");
1333                                         goto child_fail;
1334                                 }
1335
1336                                 if (setresgid(gid, gid, gid) < 0) {
1337                                         log_error("setregid() failed: %m");
1338                                         goto child_fail;
1339                                 }
1340
1341                                 if (setresuid(uid, uid, uid) < 0) {
1342                                         log_error("setreuid() failed: %m");
1343                                         goto child_fail;
1344                                 }
1345                         }
1346
1347                         if ((asprintf((char**)(envp + 3), "HOME=%s", home ? home: "/root") < 0) ||
1348                             (asprintf((char**)(envp + 4), "USER=%s", arg_user ? arg_user : "root") < 0) ||
1349                             (asprintf((char**)(envp + 5), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
1350                                 log_oom();
1351                                 goto child_fail;
1352                         }
1353
1354                         if (arg_uuid) {
1355                                 if (asprintf((char**)(envp + 6), "container_uuid=%s", arg_uuid) < 0) {
1356                                         log_oom();
1357                                         goto child_fail;
1358                                 }
1359                         }
1360
1361                         setup_hostname();
1362
1363                         if (arg_boot) {
1364                                 char **a;
1365                                 size_t l;
1366
1367                                 /* Automatically search for the init system */
1368
1369                                 l = 1 + argc - optind;
1370                                 a = newa(char*, l + 1);
1371                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
1372
1373                                 a[0] = (char*) "/usr/lib/systemd/systemd";
1374                                 execve(a[0], a, (char**) envp);
1375
1376                                 a[0] = (char*) "/lib/systemd/systemd";
1377                                 execve(a[0], a, (char**) envp);
1378
1379                                 a[0] = (char*) "/sbin/init";
1380                                 execve(a[0], a, (char**) envp);
1381                         } else if (argc > optind)
1382                                 execvpe(argv[optind], argv + optind, (char**) envp);
1383                         else {
1384                                 chdir(home ? home : "/root");
1385                                 execle("/bin/bash", "-bash", NULL, (char**) envp);
1386                         }
1387
1388                         log_error("execv() failed: %m");
1389
1390                 child_fail:
1391                         _exit(EXIT_FAILURE);
1392                 }
1393
1394                 if (process_pty(master, &mask) < 0)
1395                         goto finish;
1396
1397
1398                 if (saved_attr_valid)
1399                         tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1400
1401                 r = wait_for_terminate(pid, &status);
1402                 if (r < 0) {
1403                         r = EXIT_FAILURE;
1404                         break;
1405                 }
1406
1407                 if (status.si_code == CLD_EXITED) {
1408                         if (status.si_status != 0) {
1409                                 log_error("Container failed with error code %i.", status.si_status);
1410                                 r = status.si_status;
1411                                 break;
1412                         }
1413
1414                         log_debug("Container exited successfully.");
1415                         break;
1416                 } else if (status.si_code == CLD_KILLED &&
1417                            status.si_status == SIGINT) {
1418                         log_info("Container has been shut down.");
1419                         r = 0;
1420                         break;
1421                 } else if (status.si_code == CLD_KILLED &&
1422                            status.si_status == SIGHUP) {
1423                         log_info("Container is being rebooted.");
1424                         continue;
1425                 } else if (status.si_code == CLD_KILLED ||
1426                            status.si_code == CLD_DUMPED) {
1427
1428                         log_error("Container terminated by signal %s.", signal_to_string(status.si_status));
1429                         r = EXIT_FAILURE;
1430                         break;
1431                 } else {
1432                         log_error("Container failed due to unknown reason.");
1433                         r = EXIT_FAILURE;
1434                         break;
1435                 }
1436         }
1437
1438 finish:
1439         if (saved_attr_valid)
1440                 tcsetattr(STDIN_FILENO, TCSANOW, &saved_attr);
1441
1442         if (master >= 0)
1443                 close_nointr_nofail(master);
1444
1445         close_pipe(kmsg_socket_pair);
1446
1447         if (oldcg)
1448                 cg_attach(SYSTEMD_CGROUP_CONTROLLER, oldcg, 0);
1449
1450         if (newcg)
1451                 cg_kill_recursive_and_wait(SYSTEMD_CGROUP_CONTROLLER, newcg, true);
1452
1453         free(arg_directory);
1454         strv_free(arg_controllers);
1455         free(oldcg);
1456         free(newcg);
1457
1458         return r;
1459 }