chiark / gitweb /
d8d0dae16426828372347a022e1d0aafb5fed414
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
47 #include <linux/loop.h>
48
49 #ifdef HAVE_SELINUX
50 #include <selinux/selinux.h>
51 #endif
52
53 #ifdef HAVE_SECCOMP
54 #include <seccomp.h>
55 #endif
56
57 #ifdef HAVE_BLKID
58 #include <blkid/blkid.h>
59 #endif
60
61 #include "sd-daemon.h"
62 #include "sd-bus.h"
63 #include "sd-id128.h"
64 #include "sd-rtnl.h"
65 #include "log.h"
66 #include "util.h"
67 #include "mkdir.h"
68 #include "macro.h"
69 #include "audit.h"
70 #include "missing.h"
71 #include "cgroup-util.h"
72 #include "strv.h"
73 #include "path-util.h"
74 #include "loopback-setup.h"
75 #include "dev-setup.h"
76 #include "fdset.h"
77 #include "build.h"
78 #include "fileio.h"
79 #include "bus-util.h"
80 #include "bus-error.h"
81 #include "ptyfwd.h"
82 #include "bus-kernel.h"
83 #include "env-util.h"
84 #include "def.h"
85 #include "rtnl-util.h"
86 #include "udev-util.h"
87 #include "blkid-util.h"
88 #include "gpt.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 typedef enum LinkJournal {
95         LINK_NO,
96         LINK_AUTO,
97         LINK_HOST,
98         LINK_GUEST
99 } LinkJournal;
100
101 static char *arg_directory = NULL;
102 static char *arg_user = NULL;
103 static sd_id128_t arg_uuid = {};
104 static char *arg_machine = NULL;
105 static const char *arg_selinux_context = NULL;
106 static const char *arg_selinux_apifs_context = NULL;
107 static const char *arg_slice = NULL;
108 static bool arg_private_network = false;
109 static bool arg_read_only = false;
110 static bool arg_boot = false;
111 static LinkJournal arg_link_journal = LINK_AUTO;
112 static uint64_t arg_retain =
113         (1ULL << CAP_CHOWN) |
114         (1ULL << CAP_DAC_OVERRIDE) |
115         (1ULL << CAP_DAC_READ_SEARCH) |
116         (1ULL << CAP_FOWNER) |
117         (1ULL << CAP_FSETID) |
118         (1ULL << CAP_IPC_OWNER) |
119         (1ULL << CAP_KILL) |
120         (1ULL << CAP_LEASE) |
121         (1ULL << CAP_LINUX_IMMUTABLE) |
122         (1ULL << CAP_NET_BIND_SERVICE) |
123         (1ULL << CAP_NET_BROADCAST) |
124         (1ULL << CAP_NET_RAW) |
125         (1ULL << CAP_SETGID) |
126         (1ULL << CAP_SETFCAP) |
127         (1ULL << CAP_SETPCAP) |
128         (1ULL << CAP_SETUID) |
129         (1ULL << CAP_SYS_ADMIN) |
130         (1ULL << CAP_SYS_CHROOT) |
131         (1ULL << CAP_SYS_NICE) |
132         (1ULL << CAP_SYS_PTRACE) |
133         (1ULL << CAP_SYS_TTY_CONFIG) |
134         (1ULL << CAP_SYS_RESOURCE) |
135         (1ULL << CAP_SYS_BOOT) |
136         (1ULL << CAP_AUDIT_WRITE) |
137         (1ULL << CAP_AUDIT_CONTROL) |
138         (1ULL << CAP_MKNOD);
139 static char **arg_bind = NULL;
140 static char **arg_bind_ro = NULL;
141 static char **arg_setenv = NULL;
142 static bool arg_quiet = false;
143 static bool arg_share_system = false;
144 static bool arg_register = true;
145 static bool arg_keep_unit = false;
146 static char **arg_network_interfaces = NULL;
147 static char **arg_network_macvlan = NULL;
148 static bool arg_network_veth = false;
149 static const char *arg_network_bridge = NULL;
150 static unsigned long arg_personality = 0xffffffffLU;
151 static const char *arg_image = NULL;
152
153 static int help(void) {
154
155         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
156                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
157                "  -h --help                 Show this help\n"
158                "     --version              Print version string\n"
159                "  -q --quiet                Do not show status information\n"
160                "  -D --directory=PATH       Root directory for the container\n"
161                "  -i --image=PATH           File system device or image for the container\n"
162                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
163                "  -u --user=USER            Run the command under specified user or uid\n"
164                "  -M --machine=NAME         Set the machine name for the container\n"
165                "     --uuid=UUID            Set a specific machine UUID for the container\n"
166                "  -S --slice=SLICE          Place the container in the specified slice\n"
167                "     --private-network      Disable network in container\n"
168                "     --network-interface=INTERFACE\n"
169                "                            Assign an existing network interface to the\n"
170                "                            container\n"
171                "     --network-macvlan=INTERFACE\n"
172                "                            Create a macvlan network interface based on an\n"
173                "                            existing network interface to the container\n"
174                "     --network-veth         Add a virtual ethernet connection between host\n"
175                "                            and container\n"
176                "     --network-bridge=INTERFACE\n"
177                "                            Add a virtual ethernet connection between host\n"
178                "                            and container and add it to an existing bridge on\n"
179                "                            the host\n"
180                "  -Z --selinux-context=SECLABEL\n"
181                "                            Set the SELinux security context to be used by\n"
182                "                            processes in the container\n"
183                "  -L --selinux-apifs-context=SECLABEL\n"
184                "                            Set the SELinux security context to be used by\n"
185                "                            API/tmpfs file systems in the container\n"
186                "     --capability=CAP       In addition to the default, retain specified\n"
187                "                            capability\n"
188                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
189                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
190                "  -j                        Equivalent to --link-journal=host\n"
191                "     --read-only            Mount the root directory read-only\n"
192                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
193                "                            the container\n"
194                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
195                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
196                "     --share-system         Share system namespaces with host\n"
197                "     --register=BOOLEAN     Register container as machine\n"
198                "     --keep-unit            Do not register a scope for the machine, reuse\n"
199                "                            the service unit nspawn is running in\n",
200                program_invocation_short_name);
201
202         return 0;
203 }
204
205 static int parse_argv(int argc, char *argv[]) {
206
207         enum {
208                 ARG_VERSION = 0x100,
209                 ARG_PRIVATE_NETWORK,
210                 ARG_UUID,
211                 ARG_READ_ONLY,
212                 ARG_CAPABILITY,
213                 ARG_DROP_CAPABILITY,
214                 ARG_LINK_JOURNAL,
215                 ARG_BIND,
216                 ARG_BIND_RO,
217                 ARG_SETENV,
218                 ARG_SHARE_SYSTEM,
219                 ARG_REGISTER,
220                 ARG_KEEP_UNIT,
221                 ARG_NETWORK_INTERFACE,
222                 ARG_NETWORK_MACVLAN,
223                 ARG_NETWORK_VETH,
224                 ARG_NETWORK_BRIDGE,
225                 ARG_PERSONALITY,
226         };
227
228         static const struct option options[] = {
229                 { "help",                  no_argument,       NULL, 'h'                   },
230                 { "version",               no_argument,       NULL, ARG_VERSION           },
231                 { "directory",             required_argument, NULL, 'D'                   },
232                 { "user",                  required_argument, NULL, 'u'                   },
233                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
234                 { "boot",                  no_argument,       NULL, 'b'                   },
235                 { "uuid",                  required_argument, NULL, ARG_UUID              },
236                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
237                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
238                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
239                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
240                 { "bind",                  required_argument, NULL, ARG_BIND              },
241                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
242                 { "machine",               required_argument, NULL, 'M'                   },
243                 { "slice",                 required_argument, NULL, 'S'                   },
244                 { "setenv",                required_argument, NULL, ARG_SETENV            },
245                 { "selinux-context",       required_argument, NULL, 'Z'                   },
246                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
247                 { "quiet",                 no_argument,       NULL, 'q'                   },
248                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
249                 { "register",              required_argument, NULL, ARG_REGISTER          },
250                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
251                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
252                 { "network-macvlan",       required_argument, NULL, ARG_NETWORK_MACVLAN   },
253                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
254                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
255                 { "personality",           required_argument, NULL, ARG_PERSONALITY       },
256                 { "image",                 required_argument, NULL, 'i'                   },
257                 {}
258         };
259
260         int c, r;
261         uint64_t plus = 0, minus = 0;
262
263         assert(argc >= 0);
264         assert(argv);
265
266         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:", options, NULL)) >= 0) {
267
268                 switch (c) {
269
270                 case 'h':
271                         return help();
272
273                 case ARG_VERSION:
274                         puts(PACKAGE_STRING);
275                         puts(SYSTEMD_FEATURES);
276                         return 0;
277
278                 case 'D':
279                         free(arg_directory);
280                         arg_directory = canonicalize_file_name(optarg);
281                         if (!arg_directory) {
282                                 log_error("Invalid root directory: %m");
283                                 return -ENOMEM;
284                         }
285
286                         break;
287
288                 case 'i':
289                         arg_image = optarg;
290                         break;
291
292                 case 'u':
293                         free(arg_user);
294                         arg_user = strdup(optarg);
295                         if (!arg_user)
296                                 return log_oom();
297
298                         break;
299
300                 case ARG_NETWORK_BRIDGE:
301                         arg_network_bridge = optarg;
302
303                         /* fall through */
304
305                 case ARG_NETWORK_VETH:
306                         arg_network_veth = true;
307                         arg_private_network = true;
308                         break;
309
310                 case ARG_NETWORK_INTERFACE:
311                         if (strv_extend(&arg_network_interfaces, optarg) < 0)
312                                 return log_oom();
313
314                         arg_private_network = true;
315                         break;
316
317                 case ARG_NETWORK_MACVLAN:
318                         if (strv_extend(&arg_network_macvlan, optarg) < 0)
319                                 return log_oom();
320
321                         /* fall through */
322
323                 case ARG_PRIVATE_NETWORK:
324                         arg_private_network = true;
325                         break;
326
327                 case 'b':
328                         arg_boot = true;
329                         break;
330
331                 case ARG_UUID:
332                         r = sd_id128_from_string(optarg, &arg_uuid);
333                         if (r < 0) {
334                                 log_error("Invalid UUID: %s", optarg);
335                                 return r;
336                         }
337                         break;
338
339                 case 'S':
340                         arg_slice = optarg;
341                         break;
342
343                 case 'M':
344                         if (isempty(optarg)) {
345                                 free(arg_machine);
346                                 arg_machine = NULL;
347                         } else {
348
349                                 if (!hostname_is_valid(optarg)) {
350                                         log_error("Invalid machine name: %s", optarg);
351                                         return -EINVAL;
352                                 }
353
354                                 free(arg_machine);
355                                 arg_machine = strdup(optarg);
356                                 if (!arg_machine)
357                                         return log_oom();
358
359                                 break;
360                         }
361
362                 case 'Z':
363                         arg_selinux_context = optarg;
364                         break;
365
366                 case 'L':
367                         arg_selinux_apifs_context = optarg;
368                         break;
369
370                 case ARG_READ_ONLY:
371                         arg_read_only = true;
372                         break;
373
374                 case ARG_CAPABILITY:
375                 case ARG_DROP_CAPABILITY: {
376                         char *state, *word;
377                         size_t length;
378
379                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
380                                 _cleanup_free_ char *t;
381                                 cap_value_t cap;
382
383                                 t = strndup(word, length);
384                                 if (!t)
385                                         return log_oom();
386
387                                 if (streq(t, "all")) {
388                                         if (c == ARG_CAPABILITY)
389                                                 plus = (uint64_t) -1;
390                                         else
391                                                 minus = (uint64_t) -1;
392                                 } else {
393                                         if (cap_from_name(t, &cap) < 0) {
394                                                 log_error("Failed to parse capability %s.", t);
395                                                 return -EINVAL;
396                                         }
397
398                                         if (c == ARG_CAPABILITY)
399                                                 plus |= 1ULL << (uint64_t) cap;
400                                         else
401                                                 minus |= 1ULL << (uint64_t) cap;
402                                 }
403                         }
404
405                         break;
406                 }
407
408                 case 'j':
409                         arg_link_journal = LINK_GUEST;
410                         break;
411
412                 case ARG_LINK_JOURNAL:
413                         if (streq(optarg, "auto"))
414                                 arg_link_journal = LINK_AUTO;
415                         else if (streq(optarg, "no"))
416                                 arg_link_journal = LINK_NO;
417                         else if (streq(optarg, "guest"))
418                                 arg_link_journal = LINK_GUEST;
419                         else if (streq(optarg, "host"))
420                                 arg_link_journal = LINK_HOST;
421                         else {
422                                 log_error("Failed to parse link journal mode %s", optarg);
423                                 return -EINVAL;
424                         }
425
426                         break;
427
428                 case ARG_BIND:
429                 case ARG_BIND_RO: {
430                         _cleanup_free_ char *a = NULL, *b = NULL;
431                         char *e;
432                         char ***x;
433
434                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
435
436                         e = strchr(optarg, ':');
437                         if (e) {
438                                 a = strndup(optarg, e - optarg);
439                                 b = strdup(e + 1);
440                         } else {
441                                 a = strdup(optarg);
442                                 b = strdup(optarg);
443                         }
444
445                         if (!a || !b)
446                                 return log_oom();
447
448                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
449                                 log_error("Invalid bind mount specification: %s", optarg);
450                                 return -EINVAL;
451                         }
452
453                         r = strv_extend(x, a);
454                         if (r < 0)
455                                 return log_oom();
456
457                         r = strv_extend(x, b);
458                         if (r < 0)
459                                 return log_oom();
460
461                         break;
462                 }
463
464                 case ARG_SETENV: {
465                         char **n;
466
467                         if (!env_assignment_is_valid(optarg)) {
468                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
469                                 return -EINVAL;
470                         }
471
472                         n = strv_env_set(arg_setenv, optarg);
473                         if (!n)
474                                 return log_oom();
475
476                         strv_free(arg_setenv);
477                         arg_setenv = n;
478                         break;
479                 }
480
481                 case 'q':
482                         arg_quiet = true;
483                         break;
484
485                 case ARG_SHARE_SYSTEM:
486                         arg_share_system = true;
487                         break;
488
489                 case ARG_REGISTER:
490                         r = parse_boolean(optarg);
491                         if (r < 0) {
492                                 log_error("Failed to parse --register= argument: %s", optarg);
493                                 return r;
494                         }
495
496                         arg_register = r;
497                         break;
498
499                 case ARG_KEEP_UNIT:
500                         arg_keep_unit = true;
501                         break;
502
503                 case ARG_PERSONALITY:
504
505                         arg_personality = personality_from_string(optarg);
506                         if (arg_personality == 0xffffffffLU) {
507                                 log_error("Unknown or unsupported personality '%s'.", optarg);
508                                 return -EINVAL;
509                         }
510
511                         break;
512
513                 case '?':
514                         return -EINVAL;
515
516                 default:
517                         assert_not_reached("Unhandled option");
518                 }
519         }
520
521         if (arg_share_system)
522                 arg_register = false;
523
524         if (arg_boot && arg_share_system) {
525                 log_error("--boot and --share-system may not be combined.");
526                 return -EINVAL;
527         }
528
529         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
530                 log_error("--keep-unit may not be used when invoked from a user session.");
531                 return -EINVAL;
532         }
533
534         if (arg_directory && arg_image) {
535                 log_error("--directory= and --image= may not be combined.");
536                 return -EINVAL;
537         }
538
539         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
540
541         return 1;
542 }
543
544 static int mount_all(const char *dest) {
545
546         typedef struct MountPoint {
547                 const char *what;
548                 const char *where;
549                 const char *type;
550                 const char *options;
551                 unsigned long flags;
552                 bool fatal;
553         } MountPoint;
554
555         static const MountPoint mount_table[] = {
556                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
557                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
558                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
559                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
560                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
561                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
562                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
563                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
564 #ifdef HAVE_SELINUX
565                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
566                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
567 #endif
568         };
569
570         unsigned k;
571         int r = 0;
572
573         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
574                 _cleanup_free_ char *where = NULL;
575 #ifdef HAVE_SELINUX
576                 _cleanup_free_ char *options = NULL;
577 #endif
578                 const char *o;
579                 int t;
580
581                 where = strjoin(dest, "/", mount_table[k].where, NULL);
582                 if (!where)
583                         return log_oom();
584
585                 t = path_is_mount_point(where, true);
586                 if (t < 0) {
587                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
588
589                         if (r == 0)
590                                 r = t;
591
592                         continue;
593                 }
594
595                 /* Skip this entry if it is not a remount. */
596                 if (mount_table[k].what && t > 0)
597                         continue;
598
599                 mkdir_p(where, 0755);
600
601 #ifdef HAVE_SELINUX
602                 if (arg_selinux_apifs_context &&
603                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
604                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
605                         if (!options)
606                                 return log_oom();
607
608                         o = options;
609                 } else
610 #endif
611                         o = mount_table[k].options;
612
613
614                 if (mount(mount_table[k].what,
615                           where,
616                           mount_table[k].type,
617                           mount_table[k].flags,
618                           o) < 0 &&
619                     mount_table[k].fatal) {
620
621                         log_error("mount(%s) failed: %m", where);
622
623                         if (r == 0)
624                                 r = -errno;
625                 }
626         }
627
628         return r;
629 }
630
631 static int mount_binds(const char *dest, char **l, unsigned long flags) {
632         char **x, **y;
633
634         STRV_FOREACH_PAIR(x, y, l) {
635                 char *where;
636                 struct stat source_st, dest_st;
637                 int r;
638
639                 if (stat(*x, &source_st) < 0) {
640                         log_error("Failed to stat %s: %m", *x);
641                         return -errno;
642                 }
643
644                 where = strappenda(dest, *y);
645                 r = stat(where, &dest_st);
646                 if (r == 0) {
647                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
648                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
649                                                 *x, where);
650                                 return -EINVAL;
651                         }
652                 } else if (errno == ENOENT) {
653                         r = mkdir_parents_label(where, 0755);
654                         if (r < 0) {
655                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
656                                 return r;
657                         }
658                 } else {
659                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
660                         return -errno;
661                 }
662                 /* Create the mount point, but be conservative -- refuse to create block
663                 * and char devices. */
664                 if (S_ISDIR(source_st.st_mode))
665                         mkdir_label(where, 0755);
666                 else if (S_ISFIFO(source_st.st_mode))
667                         mkfifo(where, 0644);
668                 else if (S_ISSOCK(source_st.st_mode))
669                         mknod(where, 0644 | S_IFSOCK, 0);
670                 else if (S_ISREG(source_st.st_mode))
671                         touch(where);
672                 else {
673                         log_error("Refusing to create mountpoint for file: %s", *x);
674                         return -ENOTSUP;
675                 }
676
677                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
678                         log_error("mount(%s) failed: %m", where);
679                         return -errno;
680                 }
681
682                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
683                         log_error("mount(%s) failed: %m", where);
684                         return -errno;
685                 }
686         }
687
688         return 0;
689 }
690
691 static int setup_timezone(const char *dest) {
692         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
693         char *z, *y;
694         int r;
695
696         assert(dest);
697
698         /* Fix the timezone, if possible */
699         r = readlink_malloc("/etc/localtime", &p);
700         if (r < 0) {
701                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
702                 return 0;
703         }
704
705         z = path_startswith(p, "../usr/share/zoneinfo/");
706         if (!z)
707                 z = path_startswith(p, "/usr/share/zoneinfo/");
708         if (!z) {
709                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
710                 return 0;
711         }
712
713         where = strappend(dest, "/etc/localtime");
714         if (!where)
715                 return log_oom();
716
717         r = readlink_malloc(where, &q);
718         if (r >= 0) {
719                 y = path_startswith(q, "../usr/share/zoneinfo/");
720                 if (!y)
721                         y = path_startswith(q, "/usr/share/zoneinfo/");
722
723
724                 /* Already pointing to the right place? Then do nothing .. */
725                 if (y && streq(y, z))
726                         return 0;
727         }
728
729         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
730         if (!check)
731                 return log_oom();
732
733         if (access(check, F_OK) < 0) {
734                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
735                 return 0;
736         }
737
738         what = strappend("../usr/share/zoneinfo/", z);
739         if (!what)
740                 return log_oom();
741
742         unlink(where);
743         if (symlink(what, where) < 0) {
744                 log_error("Failed to correct timezone of container: %m");
745                 return 0;
746         }
747
748         return 0;
749 }
750
751 static int setup_resolv_conf(const char *dest) {
752         char _cleanup_free_ *where = NULL;
753
754         assert(dest);
755
756         if (arg_private_network)
757                 return 0;
758
759         /* Fix resolv.conf, if possible */
760         where = strappend(dest, "/etc/resolv.conf");
761         if (!where)
762                 return log_oom();
763
764         /* We don't really care for the results of this really. If it
765          * fails, it fails, but meh... */
766         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
767
768         return 0;
769 }
770
771 static int setup_boot_id(const char *dest) {
772         _cleanup_free_ char *from = NULL, *to = NULL;
773         sd_id128_t rnd = {};
774         char as_uuid[37];
775         int r;
776
777         assert(dest);
778
779         if (arg_share_system)
780                 return 0;
781
782         /* Generate a new randomized boot ID, so that each boot-up of
783          * the container gets a new one */
784
785         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
786         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
787         if (!from || !to)
788                 return log_oom();
789
790         r = sd_id128_randomize(&rnd);
791         if (r < 0) {
792                 log_error("Failed to generate random boot id: %s", strerror(-r));
793                 return r;
794         }
795
796         snprintf(as_uuid, sizeof(as_uuid),
797                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
798                  SD_ID128_FORMAT_VAL(rnd));
799         char_array_0(as_uuid);
800
801         r = write_string_file(from, as_uuid);
802         if (r < 0) {
803                 log_error("Failed to write boot id: %s", strerror(-r));
804                 return r;
805         }
806
807         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
808                 log_error("Failed to bind mount boot id: %m");
809                 r = -errno;
810         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
811                 log_warning("Failed to make boot id read-only: %m");
812
813         unlink(from);
814         return r;
815 }
816
817 static int copy_devnodes(const char *dest) {
818
819         static const char devnodes[] =
820                 "null\0"
821                 "zero\0"
822                 "full\0"
823                 "random\0"
824                 "urandom\0"
825                 "tty\0";
826
827         const char *d;
828         int r = 0;
829         _cleanup_umask_ mode_t u;
830
831         assert(dest);
832
833         u = umask(0000);
834
835         NULSTR_FOREACH(d, devnodes) {
836                 _cleanup_free_ char *from = NULL, *to = NULL;
837                 struct stat st;
838
839                 from = strappend("/dev/", d);
840                 to = strjoin(dest, "/dev/", d, NULL);
841                 if (!from || !to)
842                         return log_oom();
843
844                 if (stat(from, &st) < 0) {
845
846                         if (errno != ENOENT) {
847                                 log_error("Failed to stat %s: %m", from);
848                                 return -errno;
849                         }
850
851                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
852
853                         log_error("%s is not a char or block device, cannot copy", from);
854                         return -EIO;
855
856                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
857
858                         log_error("mknod(%s) failed: %m", dest);
859                         return  -errno;
860                 }
861         }
862
863         return r;
864 }
865
866 static int setup_ptmx(const char *dest) {
867         _cleanup_free_ char *p = NULL;
868
869         p = strappend(dest, "/dev/ptmx");
870         if (!p)
871                 return log_oom();
872
873         if (symlink("pts/ptmx", p) < 0) {
874                 log_error("Failed to create /dev/ptmx symlink: %m");
875                 return -errno;
876         }
877
878         return 0;
879 }
880
881 static int setup_dev_console(const char *dest, const char *console) {
882         struct stat st;
883         _cleanup_free_ char *to = NULL;
884         int r;
885         _cleanup_umask_ mode_t u;
886
887         assert(dest);
888         assert(console);
889
890         u = umask(0000);
891
892         if (stat(console, &st) < 0) {
893                 log_error("Failed to stat %s: %m", console);
894                 return -errno;
895
896         } else if (!S_ISCHR(st.st_mode)) {
897                 log_error("/dev/console is not a char device");
898                 return -EIO;
899         }
900
901         r = chmod_and_chown(console, 0600, 0, 0);
902         if (r < 0) {
903                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
904                 return r;
905         }
906
907         if (asprintf(&to, "%s/dev/console", dest) < 0)
908                 return log_oom();
909
910         /* We need to bind mount the right tty to /dev/console since
911          * ptys can only exist on pts file systems. To have something
912          * to bind mount things on we create a device node first, that
913          * has the right major/minor (note that the major minor
914          * doesn't actually matter here, since we mount it over
915          * anyway). */
916
917         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
918                 log_error("mknod() for /dev/console failed: %m");
919                 return -errno;
920         }
921
922         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
923                 log_error("Bind mount for /dev/console failed: %m");
924                 return -errno;
925         }
926
927         return 0;
928 }
929
930 static int setup_kmsg(const char *dest, int kmsg_socket) {
931         _cleanup_free_ char *from = NULL, *to = NULL;
932         int r, fd, k;
933         _cleanup_umask_ mode_t u;
934         union {
935                 struct cmsghdr cmsghdr;
936                 uint8_t buf[CMSG_SPACE(sizeof(int))];
937         } control = {};
938         struct msghdr mh = {
939                 .msg_control = &control,
940                 .msg_controllen = sizeof(control),
941         };
942         struct cmsghdr *cmsg;
943
944         assert(dest);
945         assert(kmsg_socket >= 0);
946
947         u = umask(0000);
948
949         /* We create the kmsg FIFO as /dev/kmsg, but immediately
950          * delete it after bind mounting it to /proc/kmsg. While FIFOs
951          * on the reading side behave very similar to /proc/kmsg,
952          * their writing side behaves differently from /dev/kmsg in
953          * that writing blocks when nothing is reading. In order to
954          * avoid any problems with containers deadlocking due to this
955          * we simply make /dev/kmsg unavailable to the container. */
956         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
957             asprintf(&to, "%s/proc/kmsg", dest) < 0)
958                 return log_oom();
959
960         if (mkfifo(from, 0600) < 0) {
961                 log_error("mkfifo() for /dev/kmsg failed: %m");
962                 return -errno;
963         }
964
965         r = chmod_and_chown(from, 0600, 0, 0);
966         if (r < 0) {
967                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
968                 return r;
969         }
970
971         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
972                 log_error("Bind mount for /proc/kmsg failed: %m");
973                 return -errno;
974         }
975
976         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
977         if (fd < 0) {
978                 log_error("Failed to open fifo: %m");
979                 return -errno;
980         }
981
982         cmsg = CMSG_FIRSTHDR(&mh);
983         cmsg->cmsg_level = SOL_SOCKET;
984         cmsg->cmsg_type = SCM_RIGHTS;
985         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
986         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
987
988         mh.msg_controllen = cmsg->cmsg_len;
989
990         /* Store away the fd in the socket, so that it stays open as
991          * long as we run the child */
992         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
993         close_nointr_nofail(fd);
994
995         if (k < 0) {
996                 log_error("Failed to send FIFO fd: %m");
997                 return -errno;
998         }
999
1000         /* And now make the FIFO unavailable as /dev/kmsg... */
1001         unlink(from);
1002         return 0;
1003 }
1004
1005 static int setup_hostname(void) {
1006
1007         if (arg_share_system)
1008                 return 0;
1009
1010         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
1011                 return -errno;
1012
1013         return 0;
1014 }
1015
1016 static int setup_journal(const char *directory) {
1017         sd_id128_t machine_id, this_id;
1018         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
1019         char *id;
1020         int r;
1021
1022         p = strappend(directory, "/etc/machine-id");
1023         if (!p)
1024                 return log_oom();
1025
1026         r = read_one_line_file(p, &b);
1027         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1028                 return 0;
1029         else if (r < 0) {
1030                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1031                 return r;
1032         }
1033
1034         id = strstrip(b);
1035         if (isempty(id) && arg_link_journal == LINK_AUTO)
1036                 return 0;
1037
1038         /* Verify validity */
1039         r = sd_id128_from_string(id, &machine_id);
1040         if (r < 0) {
1041                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1042                 return r;
1043         }
1044
1045         r = sd_id128_get_machine(&this_id);
1046         if (r < 0) {
1047                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1048                 return r;
1049         }
1050
1051         if (sd_id128_equal(machine_id, this_id)) {
1052                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1053                          "Host and machine ids are equal (%s): refusing to link journals", id);
1054                 if (arg_link_journal == LINK_AUTO)
1055                         return 0;
1056                 return
1057                         -EEXIST;
1058         }
1059
1060         if (arg_link_journal == LINK_NO)
1061                 return 0;
1062
1063         free(p);
1064         p = strappend("/var/log/journal/", id);
1065         q = strjoin(directory, "/var/log/journal/", id, NULL);
1066         if (!p || !q)
1067                 return log_oom();
1068
1069         if (path_is_mount_point(p, false) > 0) {
1070                 if (arg_link_journal != LINK_AUTO) {
1071                         log_error("%s: already a mount point, refusing to use for journal", p);
1072                         return -EEXIST;
1073                 }
1074
1075                 return 0;
1076         }
1077
1078         if (path_is_mount_point(q, false) > 0) {
1079                 if (arg_link_journal != LINK_AUTO) {
1080                         log_error("%s: already a mount point, refusing to use for journal", q);
1081                         return -EEXIST;
1082                 }
1083
1084                 return 0;
1085         }
1086
1087         r = readlink_and_make_absolute(p, &d);
1088         if (r >= 0) {
1089                 if ((arg_link_journal == LINK_GUEST ||
1090                      arg_link_journal == LINK_AUTO) &&
1091                     path_equal(d, q)) {
1092
1093                         r = mkdir_p(q, 0755);
1094                         if (r < 0)
1095                                 log_warning("failed to create directory %s: %m", q);
1096                         return 0;
1097                 }
1098
1099                 if (unlink(p) < 0) {
1100                         log_error("Failed to remove symlink %s: %m", p);
1101                         return -errno;
1102                 }
1103         } else if (r == -EINVAL) {
1104
1105                 if (arg_link_journal == LINK_GUEST &&
1106                     rmdir(p) < 0) {
1107
1108                         if (errno == ENOTDIR) {
1109                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1110                                 return r;
1111                         } else {
1112                                 log_error("Failed to remove %s: %m", p);
1113                                 return -errno;
1114                         }
1115                 }
1116         } else if (r != -ENOENT) {
1117                 log_error("readlink(%s) failed: %m", p);
1118                 return r;
1119         }
1120
1121         if (arg_link_journal == LINK_GUEST) {
1122
1123                 if (symlink(q, p) < 0) {
1124                         log_error("Failed to symlink %s to %s: %m", q, p);
1125                         return -errno;
1126                 }
1127
1128                 r = mkdir_p(q, 0755);
1129                 if (r < 0)
1130                         log_warning("failed to create directory %s: %m", q);
1131                 return 0;
1132         }
1133
1134         if (arg_link_journal == LINK_HOST) {
1135                 r = mkdir_p(p, 0755);
1136                 if (r < 0) {
1137                         log_error("Failed to create %s: %m", p);
1138                         return r;
1139                 }
1140
1141         } else if (access(p, F_OK) < 0)
1142                 return 0;
1143
1144         if (dir_is_empty(q) == 0) {
1145                 log_error("%s not empty.", q);
1146                 return -ENOTEMPTY;
1147         }
1148
1149         r = mkdir_p(q, 0755);
1150         if (r < 0) {
1151                 log_error("Failed to create %s: %m", q);
1152                 return r;
1153         }
1154
1155         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1156                 log_error("Failed to bind mount journal from host into guest: %m");
1157                 return -errno;
1158         }
1159
1160         return 0;
1161 }
1162
1163 static int setup_kdbus(const char *dest, const char *path) {
1164         const char *p;
1165
1166         if (!path)
1167                 return 0;
1168
1169         p = strappenda(dest, "/dev/kdbus");
1170         if (mkdir(p, 0755) < 0) {
1171                 log_error("Failed to create kdbus path: %m");
1172                 return  -errno;
1173         }
1174
1175         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1176                 log_error("Failed to mount kdbus domain path: %m");
1177                 return -errno;
1178         }
1179
1180         return 0;
1181 }
1182
1183 static int drop_capabilities(void) {
1184         return capability_bounding_set_drop(~arg_retain, false);
1185 }
1186
1187 static int register_machine(pid_t pid) {
1188         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1189         _cleanup_bus_unref_ sd_bus *bus = NULL;
1190         int r;
1191
1192         if (!arg_register)
1193                 return 0;
1194
1195         r = sd_bus_default_system(&bus);
1196         if (r < 0) {
1197                 log_error("Failed to open system bus: %s", strerror(-r));
1198                 return r;
1199         }
1200
1201         if (arg_keep_unit) {
1202                 r = sd_bus_call_method(
1203                                 bus,
1204                                 "org.freedesktop.machine1",
1205                                 "/org/freedesktop/machine1",
1206                                 "org.freedesktop.machine1.Manager",
1207                                 "RegisterMachine",
1208                                 &error,
1209                                 NULL,
1210                                 "sayssus",
1211                                 arg_machine,
1212                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1213                                 "nspawn",
1214                                 "container",
1215                                 (uint32_t) pid,
1216                                 strempty(arg_directory));
1217         } else {
1218                 _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
1219
1220                 r = sd_bus_message_new_method_call(
1221                                 bus,
1222                                 &m,
1223                                 "org.freedesktop.machine1",
1224                                 "/org/freedesktop/machine1",
1225                                 "org.freedesktop.machine1.Manager",
1226                                 "CreateMachine");
1227                 if (r < 0) {
1228                         log_error("Failed to create message: %s", strerror(-r));
1229                         return r;
1230                 }
1231
1232                 r = sd_bus_message_append(
1233                                 m,
1234                                 "sayssus",
1235                                 arg_machine,
1236                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1237                                 "nspawn",
1238                                 "container",
1239                                 (uint32_t) pid,
1240                                 strempty(arg_directory));
1241                 if (r < 0) {
1242                         log_error("Failed to append message arguments: %s", strerror(-r));
1243                         return r;
1244                 }
1245
1246                 r = sd_bus_message_open_container(m, 'a', "(sv)");
1247                 if (r < 0) {
1248                         log_error("Failed to open container: %s", strerror(-r));
1249                         return r;
1250                 }
1251
1252                 if (!isempty(arg_slice)) {
1253                         r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
1254                         if (r < 0) {
1255                                 log_error("Failed to append slice: %s", strerror(-r));
1256                                 return r;
1257                         }
1258                 }
1259
1260                 r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
1261                 if (r < 0) {
1262                         log_error("Failed to add device policy: %s", strerror(-r));
1263                         return r;
1264                 }
1265
1266                 r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
1267                                           /* Allow the container to
1268                                            * access and create the API
1269                                            * device nodes, so that
1270                                            * PrivateDevices= in the
1271                                            * container can work
1272                                            * fine */
1273                                           "/dev/null", "rwm",
1274                                           "/dev/zero", "rwm",
1275                                           "/dev/full", "rwm",
1276                                           "/dev/random", "rwm",
1277                                           "/dev/urandom", "rwm",
1278                                           "/dev/tty", "rwm",
1279                                           /* Allow the container
1280                                            * access to ptys. However,
1281                                            * do not permit the
1282                                            * container to ever create
1283                                            * these device nodes. */
1284                                           "/dev/pts/ptmx", "rw",
1285                                           "char-pts", "rw");
1286                 if (r < 0) {
1287                         log_error("Failed to add device whitelist: %s", strerror(-r));
1288                         return r;
1289                 }
1290
1291                 r = sd_bus_message_close_container(m);
1292                 if (r < 0) {
1293                         log_error("Failed to close container: %s", strerror(-r));
1294                         return r;
1295                 }
1296
1297                 r = sd_bus_call(bus, m, 0, &error, NULL);
1298         }
1299
1300         if (r < 0) {
1301                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1302                 return r;
1303         }
1304
1305         return 0;
1306 }
1307
1308 static int terminate_machine(pid_t pid) {
1309         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1310         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1311         _cleanup_bus_unref_ sd_bus *bus = NULL;
1312         const char *path;
1313         int r;
1314
1315         if (!arg_register)
1316                 return 0;
1317
1318         r = sd_bus_default_system(&bus);
1319         if (r < 0) {
1320                 log_error("Failed to open system bus: %s", strerror(-r));
1321                 return r;
1322         }
1323
1324         r = sd_bus_call_method(
1325                         bus,
1326                         "org.freedesktop.machine1",
1327                         "/org/freedesktop/machine1",
1328                         "org.freedesktop.machine1.Manager",
1329                         "GetMachineByPID",
1330                         &error,
1331                         &reply,
1332                         "u",
1333                         (uint32_t) pid);
1334         if (r < 0) {
1335                 /* Note that the machine might already have been
1336                  * cleaned up automatically, hence don't consider it a
1337                  * failure if we cannot get the machine object. */
1338                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1339                 return 0;
1340         }
1341
1342         r = sd_bus_message_read(reply, "o", &path);
1343         if (r < 0)
1344                 return bus_log_parse_error(r);
1345
1346         r = sd_bus_call_method(
1347                         bus,
1348                         "org.freedesktop.machine1",
1349                         path,
1350                         "org.freedesktop.machine1.Machine",
1351                         "Terminate",
1352                         &error,
1353                         NULL,
1354                         NULL);
1355         if (r < 0) {
1356                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1357                 return 0;
1358         }
1359
1360         return 0;
1361 }
1362
1363 static int reset_audit_loginuid(void) {
1364         _cleanup_free_ char *p = NULL;
1365         int r;
1366
1367         if (arg_share_system)
1368                 return 0;
1369
1370         r = read_one_line_file("/proc/self/loginuid", &p);
1371         if (r == -ENOENT)
1372                 return 0;
1373         if (r < 0) {
1374                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1375                 return r;
1376         }
1377
1378         /* Already reset? */
1379         if (streq(p, "4294967295"))
1380                 return 0;
1381
1382         r = write_string_file("/proc/self/loginuid", "4294967295");
1383         if (r < 0) {
1384                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1385                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1386                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1387                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1388                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1389
1390                 sleep(5);
1391         }
1392
1393         return 0;
1394 }
1395
1396 static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
1397         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1398         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1399         int r;
1400
1401         if (!arg_private_network)
1402                 return 0;
1403
1404         if (!arg_network_veth)
1405                 return 0;
1406
1407         /* Use two different interface name prefixes depending whether
1408          * we are in bridge mode or not. */
1409         if (arg_network_bridge)
1410                 memcpy(iface_name, "vb-", 3);
1411         else
1412                 memcpy(iface_name, "ve-", 3);
1413
1414         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1415
1416         r = sd_rtnl_open(&rtnl, 0);
1417         if (r < 0) {
1418                 log_error("Failed to connect to netlink: %s", strerror(-r));
1419                 return r;
1420         }
1421
1422         r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1423         if (r < 0) {
1424                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1425                 return r;
1426         }
1427
1428         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1429         if (r < 0) {
1430                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1431                 return r;
1432         }
1433
1434         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1435         if (r < 0) {
1436                 log_error("Failed to open netlink container: %s", strerror(-r));
1437                 return r;
1438         }
1439
1440         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1441         if (r < 0) {
1442                 log_error("Failed to append netlink kind: %s", strerror(-r));
1443                 return r;
1444         }
1445
1446         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1447         if (r < 0) {
1448                 log_error("Failed to open netlink container: %s", strerror(-r));
1449                 return r;
1450         }
1451
1452         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1453         if (r < 0) {
1454                 log_error("Failed to open netlink container: %s", strerror(-r));
1455                 return r;
1456         }
1457
1458         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1459         if (r < 0) {
1460                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1461                 return r;
1462         }
1463
1464         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1465         if (r < 0) {
1466                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1467                 return r;
1468         }
1469
1470         r = sd_rtnl_message_close_container(m);
1471         if (r < 0) {
1472                 log_error("Failed to close netlink container: %s", strerror(-r));
1473                 return r;
1474         }
1475
1476         r = sd_rtnl_message_close_container(m);
1477         if (r < 0) {
1478                 log_error("Failed to close netlink container: %s", strerror(-r));
1479                 return r;
1480         }
1481
1482         r = sd_rtnl_message_close_container(m);
1483         if (r < 0) {
1484                 log_error("Failed to close netlink container: %s", strerror(-r));
1485                 return r;
1486         }
1487
1488         r = sd_rtnl_call(rtnl, m, 0, NULL);
1489         if (r < 0) {
1490                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1491                 return r;
1492         }
1493
1494         return 0;
1495 }
1496
1497 static int setup_bridge(const char veth_name[]) {
1498         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1499         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1500         int r, bridge;
1501
1502         if (!arg_private_network)
1503                 return 0;
1504
1505         if (!arg_network_veth)
1506                 return 0;
1507
1508         if (!arg_network_bridge)
1509                 return 0;
1510
1511         bridge = (int) if_nametoindex(arg_network_bridge);
1512         if (bridge <= 0) {
1513                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1514                 return -errno;
1515         }
1516
1517         r = sd_rtnl_open(&rtnl, 0);
1518         if (r < 0) {
1519                 log_error("Failed to connect to netlink: %s", strerror(-r));
1520                 return r;
1521         }
1522
1523         r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
1524         if (r < 0) {
1525                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1526                 return r;
1527         }
1528
1529         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1530         if (r < 0) {
1531                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1532                 return r;
1533         }
1534
1535         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1536         if (r < 0) {
1537                 log_error("Failed to add netlink master field: %s", strerror(-r));
1538                 return r;
1539         }
1540
1541         r = sd_rtnl_call(rtnl, m, 0, NULL);
1542         if (r < 0) {
1543                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1544                 return r;
1545         }
1546
1547         return 0;
1548 }
1549
1550 static int parse_interface(struct udev *udev, const char *name) {
1551         _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1552         char ifi_str[2 + DECIMAL_STR_MAX(int)];
1553         int ifi;
1554
1555         ifi = (int) if_nametoindex(name);
1556         if (ifi <= 0) {
1557                 log_error("Failed to resolve interface %s: %m", name);
1558                 return -errno;
1559         }
1560
1561         sprintf(ifi_str, "n%i", ifi);
1562         d = udev_device_new_from_device_id(udev, ifi_str);
1563         if (!d) {
1564                 log_error("Failed to get udev device for interface %s: %m", name);
1565                 return -errno;
1566         }
1567
1568         if (udev_device_get_is_initialized(d) <= 0) {
1569                 log_error("Network interface %s is not initialized yet.", name);
1570                 return -EBUSY;
1571         }
1572
1573         return ifi;
1574 }
1575
1576 static int move_network_interfaces(pid_t pid) {
1577         _cleanup_udev_unref_ struct udev *udev = NULL;
1578         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1579         char **i;
1580         int r;
1581
1582         if (!arg_private_network)
1583                 return 0;
1584
1585         if (strv_isempty(arg_network_interfaces))
1586                 return 0;
1587
1588         r = sd_rtnl_open(&rtnl, 0);
1589         if (r < 0) {
1590                 log_error("Failed to connect to netlink: %s", strerror(-r));
1591                 return r;
1592         }
1593
1594         udev = udev_new();
1595         if (!udev) {
1596                 log_error("Failed to connect to udev.");
1597                 return -ENOMEM;
1598         }
1599
1600         STRV_FOREACH(i, arg_network_interfaces) {
1601                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1602                 int ifi;
1603
1604                 ifi = parse_interface(udev, *i);
1605                 if (ifi < 0)
1606                         return ifi;
1607
1608                 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
1609                 if (r < 0) {
1610                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1611                         return r;
1612                 }
1613
1614                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1615                 if (r < 0) {
1616                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1617                         return r;
1618                 }
1619
1620                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1621                 if (r < 0) {
1622                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1623                         return r;
1624                 }
1625         }
1626
1627         return 0;
1628 }
1629
1630 static int setup_macvlan(pid_t pid) {
1631         _cleanup_udev_unref_ struct udev *udev = NULL;
1632         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1633         char **i;
1634         int r;
1635
1636         if (!arg_private_network)
1637                 return 0;
1638
1639         if (strv_isempty(arg_network_macvlan))
1640                 return 0;
1641
1642         r = sd_rtnl_open(&rtnl, 0);
1643         if (r < 0) {
1644                 log_error("Failed to connect to netlink: %s", strerror(-r));
1645                 return r;
1646         }
1647
1648         udev = udev_new();
1649         if (!udev) {
1650                 log_error("Failed to connect to udev.");
1651                 return -ENOMEM;
1652         }
1653
1654         STRV_FOREACH(i, arg_network_macvlan) {
1655                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1656                 _cleanup_free_ char *n = NULL;
1657                 int ifi;
1658
1659                 ifi = parse_interface(udev, *i);
1660                 if (ifi < 0)
1661                         return ifi;
1662
1663                 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1664                 if (r < 0) {
1665                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1666                         return r;
1667                 }
1668
1669                 r = sd_rtnl_message_append_u32(m, IFLA_LINK, ifi);
1670                 if (r < 0) {
1671                         log_error("Failed to add netlink interface index: %s", strerror(-r));
1672                         return r;
1673                 }
1674
1675                 n = strappend("mv-", *i);
1676                 if (!n)
1677                         return log_oom();
1678
1679                 strshorten(n, IFNAMSIZ-1);
1680
1681                 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, n);
1682                 if (r < 0) {
1683                         log_error("Failed to add netlink interface name: %s", strerror(-r));
1684                         return r;
1685                 }
1686
1687                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1688                 if (r < 0) {
1689                         log_error("Failed to add netlink namespace field: %s", strerror(-r));
1690                         return r;
1691                 }
1692
1693                 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1694                 if (r < 0) {
1695                         log_error("Failed to open netlink container: %s", strerror(-r));
1696                         return r;
1697                 }
1698
1699                 r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "macvlan");
1700                 if (r < 0) {
1701                         log_error("Failed to append netlink kind: %s", strerror(-r));
1702                         return r;
1703                 }
1704
1705                 r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1706                 if (r < 0) {
1707                         log_error("Failed to open netlink container: %s", strerror(-r));
1708                         return r;
1709                 }
1710
1711                 r = sd_rtnl_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
1712                 if (r < 0) {
1713                         log_error("Failed to append macvlan mode: %s", strerror(-r));
1714                         return r;
1715                 }
1716
1717                 r = sd_rtnl_message_close_container(m);
1718                 if (r < 0) {
1719                         log_error("Failed to close netlink container: %s", strerror(-r));
1720                         return r;
1721                 }
1722
1723                 r = sd_rtnl_message_close_container(m);
1724                 if (r < 0) {
1725                         log_error("Failed to close netlink container: %s", strerror(-r));
1726                         return r;
1727                 }
1728
1729                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1730                 if (r < 0) {
1731                         log_error("Failed to add new macvlan interfaces: %s", strerror(-r));
1732                         return r;
1733                 }
1734         }
1735
1736         return 0;
1737 }
1738
1739 static int audit_still_doesnt_work_in_containers(void) {
1740
1741 #ifdef HAVE_SECCOMP
1742         scmp_filter_ctx seccomp;
1743         int r;
1744
1745         /*
1746            Audit is broken in containers, much of the userspace audit
1747            hookup will fail if running inside a container. We don't
1748            care and just turn off creation of audit sockets.
1749
1750            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1751            with EAFNOSUPPORT which audit userspace uses as indication
1752            that audit is disabled in the kernel.
1753          */
1754
1755         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1756         if (!seccomp)
1757                 return log_oom();
1758
1759         r = seccomp_add_secondary_archs(seccomp);
1760         if (r < 0) {
1761                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1762                 goto finish;
1763         }
1764
1765         r = seccomp_rule_add(
1766                         seccomp,
1767                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1768                         SCMP_SYS(socket),
1769                         2,
1770                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1771                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1772         if (r < 0) {
1773                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1774                 goto finish;
1775         }
1776
1777         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1778         if (r < 0) {
1779                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1780                 goto finish;
1781         }
1782
1783         r = seccomp_load(seccomp);
1784         if (r < 0)
1785                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1786
1787 finish:
1788         seccomp_release(seccomp);
1789         return r;
1790 #else
1791         return 0;
1792 #endif
1793
1794 }
1795
1796 static int setup_image(char **device_path, int *loop_nr) {
1797         struct loop_info64 info = {
1798                 .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN
1799         };
1800         _cleanup_close_ int fd = -1, control = -1, loop = -1;
1801         _cleanup_free_ char* loopdev = NULL;
1802         struct stat st;
1803         int r, nr;
1804
1805         assert(device_path);
1806         assert(loop_nr);
1807
1808         fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1809         if (fd < 0) {
1810                 log_error("Failed to open %s: %m", arg_image);
1811                 return -errno;
1812         }
1813
1814         if (fstat(fd, &st) < 0) {
1815                 log_error("Failed to stat %s: %m", arg_image);
1816                 return -errno;
1817         }
1818
1819         if (S_ISBLK(st.st_mode)) {
1820                 char *p;
1821
1822                 p = strdup(arg_image);
1823                 if (!p)
1824                         return log_oom();
1825
1826                 *device_path = p;
1827
1828                 *loop_nr = -1;
1829
1830                 r = fd;
1831                 fd = -1;
1832
1833                 return r;
1834         }
1835
1836         if (!S_ISREG(st.st_mode)) {
1837                 log_error("%s is not a regular file or block device: %m", arg_image);
1838                 return -EINVAL;
1839         }
1840
1841         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1842         if (control < 0) {
1843                 log_error("Failed to open /dev/loop-control: %m");
1844                 return -errno;
1845         }
1846
1847         nr = ioctl(control, LOOP_CTL_GET_FREE);
1848         if (nr < 0) {
1849                 log_error("Failed to allocate loop device: %m");
1850                 return -errno;
1851         }
1852
1853         if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
1854                 return log_oom();
1855
1856         loop = open(loopdev, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1857         if (loop < 0) {
1858                 log_error("Failed to open loop device %s: %m", loopdev);
1859                 return -errno;
1860         }
1861
1862         if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
1863                 log_error("Failed to set loopback file descriptor on %s: %m", loopdev);
1864                 return -errno;
1865         }
1866
1867         if (arg_read_only)
1868                 info.lo_flags |= LO_FLAGS_READ_ONLY;
1869
1870         if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
1871                 log_error("Failed to set loopback settings on %s: %m", loopdev);
1872                 return -errno;
1873         }
1874
1875         *device_path = loopdev;
1876         loopdev = NULL;
1877
1878         *loop_nr = nr;
1879
1880         r = loop;
1881         loop = -1;
1882
1883         return r;
1884 }
1885
1886 static int dissect_image(
1887                 int fd,
1888                 char **root_device,
1889                 char **home_device,
1890                 char **srv_device,
1891                 bool *secondary) {
1892
1893 #ifdef HAVE_BLKID
1894         int home_nr = -1, root_nr = -1, secondary_root_nr = -1, srv_nr = -1;
1895         _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL;
1896         _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
1897         _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1898         _cleanup_blkid_free_probe_ blkid_probe b = NULL;
1899         _cleanup_udev_unref_ struct udev *udev = NULL;
1900         struct udev_list_entry *first, *item;
1901         const char *pttype = NULL;
1902         blkid_partlist pl;
1903         struct stat st;
1904         int r;
1905
1906         assert(fd >= 0);
1907         assert(root_device);
1908         assert(home_device);
1909         assert(srv_device);
1910         assert(secondary);
1911
1912         b = blkid_new_probe();
1913         if (!b)
1914                 return log_oom();
1915
1916         errno = 0;
1917         r = blkid_probe_set_device(b, fd, 0, 0);
1918         if (r != 0) {
1919                 if (errno == 0)
1920                         return log_oom();
1921
1922                 log_error("Failed to set device on blkid probe: %m");
1923                 return -errno;
1924         }
1925
1926         blkid_probe_enable_partitions(b, 1);
1927         blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
1928
1929         errno = 0;
1930         r = blkid_do_safeprobe(b);
1931         if (r == -2 || r == 1) {
1932                 log_error("Failed to identify any partition table on %s.\n"
1933                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
1934                 return -EINVAL;
1935         } else if (r != 0) {
1936                 if (errno == 0)
1937                         errno = EIO;
1938                 log_error("Failed to probe: %m");
1939                 return -errno;
1940         }
1941
1942         blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
1943         if (!streq_ptr(pttype, "gpt")) {
1944                 log_error("Image %s does not carry a GUID Partition Table.\n"
1945                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
1946                 return -EINVAL;
1947         }
1948
1949         errno = 0;
1950         pl = blkid_probe_get_partitions(b);
1951         if (!pl) {
1952                 if (errno == 0)
1953                         return log_oom();
1954
1955                 log_error("Failed to list partitions of %s", arg_image);
1956                 return -errno;
1957         }
1958
1959         udev = udev_new();
1960         if (!udev)
1961                 return log_oom();
1962
1963         if (fstat(fd, &st) < 0) {
1964                 log_error("Failed to stat block device: %m");
1965                 return -errno;
1966         }
1967
1968         d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
1969         if (!d)
1970                 return log_oom();
1971
1972         e = udev_enumerate_new(udev);
1973         if (!e)
1974                 return log_oom();
1975
1976         r = udev_enumerate_add_match_parent(e, d);
1977         if (r < 0)
1978                 return log_oom();
1979
1980         r = udev_enumerate_scan_devices(e);
1981         if (r < 0) {
1982                 log_error("Failed to scan for partition devices of %s: %s", arg_image, strerror(-r));
1983                 return r;
1984         }
1985
1986         first = udev_enumerate_get_list_entry(e);
1987         udev_list_entry_foreach(item, first) {
1988                 _cleanup_udev_device_unref_ struct udev_device *q;
1989                 const char *stype, *node;
1990                 sd_id128_t type_id;
1991                 blkid_partition pp;
1992                 dev_t qn;
1993                 int nr;
1994
1995                 errno = 0;
1996                 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1997                 if (!q) {
1998                         if (!errno)
1999                                 errno = ENOMEM;
2000
2001                         log_error("Failed to get partition device of %s: %m", arg_image);
2002                         return -errno;
2003                 }
2004
2005                 qn = udev_device_get_devnum(q);
2006                 if (major(qn) == 0)
2007                         continue;
2008
2009                 if (st.st_rdev == qn)
2010                         continue;
2011
2012                 node = udev_device_get_devnode(q);
2013                 if (!node)
2014                         continue;
2015
2016                 pp = blkid_partlist_devno_to_partition(pl, qn);
2017                 if (!pp)
2018                         continue;
2019
2020                 nr = blkid_partition_get_partno(pp);
2021                 if (nr < 0)
2022                         continue;
2023
2024                 stype = blkid_partition_get_type_string(pp);
2025                 if (!stype)
2026                         continue;
2027
2028                 if (sd_id128_from_string(stype, &type_id) < 0)
2029                         continue;
2030
2031                 if (sd_id128_equal(type_id, GPT_HOME)) {
2032
2033                         if (home && nr >= home_nr)
2034                                 continue;
2035
2036                         home_nr = nr;
2037                         free(home);
2038                         home = strdup(node);
2039                         if (!home)
2040                                 return log_oom();
2041                 } else if (sd_id128_equal(type_id, GPT_SRV)) {
2042
2043                         if (srv && nr >= srv_nr)
2044                                 continue;
2045
2046                         srv_nr = nr;
2047                         free(srv);
2048                         srv = strdup(node);
2049                         if (!srv)
2050                                 return log_oom();
2051                 }
2052 #ifdef GPT_ROOT_NATIVE
2053                 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
2054
2055                         if (root && nr >= root_nr)
2056                                 continue;
2057
2058                         root_nr = nr;
2059                         free(root);
2060                         root = strdup(node);
2061                         if (!root)
2062                                 return log_oom();
2063                 }
2064 #endif
2065 #ifdef GPT_ROOT_SECONDARY
2066                 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
2067
2068                         if (secondary_root && nr >= secondary_root_nr)
2069                                 continue;
2070
2071                         secondary_root_nr = nr;
2072                         free(secondary_root);
2073                         secondary_root = strdup(node);
2074                         if (!secondary_root)
2075                                 return log_oom();
2076                 }
2077 #endif
2078         }
2079
2080         if (!root && !secondary_root) {
2081                 log_error("Failed to identify root partition in disk image %s.\n"
2082                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
2083                 return -EINVAL;
2084         }
2085
2086         if (root) {
2087                 *root_device = root;
2088                 root = NULL;
2089                 *secondary = false;
2090         } else if (secondary_root) {
2091                 *root_device = secondary_root;
2092                 secondary_root = NULL;
2093                 *secondary = true;
2094         }
2095
2096         if (home) {
2097                 *home_device = home;
2098                 home = NULL;
2099         }
2100
2101         if (srv) {
2102                 *srv_device = srv;
2103                 srv = NULL;
2104         }
2105
2106         return 0;
2107 #else
2108         log_error("--image= is not supported, compiled without blkid support.");
2109         return -ENOTSUP;
2110 #endif
2111 }
2112
2113 static int mount_device(const char *what, const char *where, const char *directory) {
2114 #ifdef HAVE_BLKID
2115         _cleanup_blkid_free_probe_ blkid_probe b = NULL;
2116         const char *fstype, *p;
2117         int r;
2118
2119         assert(what);
2120         assert(where);
2121
2122         if (directory)
2123                 p = strappenda(where, directory);
2124         else
2125                 p = where;
2126
2127         errno = 0;
2128         b = blkid_new_probe_from_filename(what);
2129         if (!b) {
2130                 if (errno == 0)
2131                         return log_oom();
2132                 log_error("Failed to allocate prober for %s: %m", what);
2133                 return -errno;
2134         }
2135
2136         blkid_probe_enable_superblocks(b, 1);
2137         blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
2138
2139         errno = 0;
2140         r = blkid_do_safeprobe(b);
2141         if (r == -1 || r == 1) {
2142                 log_error("Cannot determine file system type of %s", what);
2143                 return -EINVAL;
2144         } else if (r != 0) {
2145                 if (errno == 0)
2146                         errno = EIO;
2147                 log_error("Failed to probe %s: %m", what);
2148                 return -errno;
2149         }
2150
2151         errno = 0;
2152         if (blkid_probe_lookup_value(b, "TYPE", &fstype, NULL) < 0) {
2153                 if (errno == 0)
2154                         errno = EINVAL;
2155                 log_error("Failed to determine file system type of %s", what);
2156                 return -errno;
2157         }
2158
2159         if (streq(fstype, "crypto_LUKS")) {
2160                 log_error("nspawn currently does not support LUKS disk images.");
2161                 return -ENOTSUP;
2162         }
2163
2164         if (mount(what, p, fstype, arg_read_only ? MS_NODEV|MS_RDONLY : 0, NULL) < 0) {
2165                 log_error("Failed to mount %s: %m", what);
2166                 return -errno;
2167         }
2168
2169         return 0;
2170 #else
2171         log_error("--image= is not supported, compiled without blkid support.");
2172         return -ENOTSUP;
2173 #endif
2174 }
2175
2176 static int mount_devices(const char *where, const char *root_device, const char *home_device, const char *srv_device) {
2177         int r;
2178
2179         assert(where);
2180
2181         if (root_device) {
2182                 r = mount_device(root_device, arg_directory, NULL);
2183                 if (r < 0) {
2184                         log_error("Failed to mount root directory: %s", strerror(-r));
2185                         return r;
2186                 }
2187         }
2188
2189         if (home_device) {
2190                 r = mount_device(home_device, arg_directory, "/home");
2191                 if (r < 0) {
2192                         log_error("Failed to mount home directory: %s", strerror(-r));
2193                         return r;
2194                 }
2195         }
2196
2197         if (srv_device) {
2198                 r = mount_device(srv_device, arg_directory, "/srv");
2199                 if (r < 0) {
2200                         log_error("Failed to mount server data directory: %s", strerror(-r));
2201                         return r;
2202                 }
2203         }
2204
2205         return 0;
2206 }
2207
2208 static void loop_remove(int nr, int *image_fd) {
2209         _cleanup_close_ int control = -1;
2210
2211         if (nr < 0)
2212                 return;
2213
2214         if (image_fd && *image_fd >= 0) {
2215                 ioctl(*image_fd, LOOP_CLR_FD);
2216                 close_nointr_nofail(*image_fd);
2217                 *image_fd = -1;
2218         }
2219
2220         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2221         if (control < 0)
2222                 return;
2223
2224         ioctl(control, LOOP_CTL_REMOVE, nr);
2225 }
2226
2227 int main(int argc, char *argv[]) {
2228
2229         _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
2230         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, image_fd = -1;
2231         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
2232         _cleanup_fdset_free_ FDSet *fds = NULL;
2233         int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
2234         const char *console = NULL;
2235         char veth_name[IFNAMSIZ];
2236         bool secondary = false;
2237         pid_t pid = 0;
2238         sigset_t mask;
2239
2240         log_parse_environment();
2241         log_open();
2242
2243         k = parse_argv(argc, argv);
2244         if (k < 0)
2245                 goto finish;
2246         else if (k == 0) {
2247                 r = EXIT_SUCCESS;
2248                 goto finish;
2249         }
2250
2251         if (!arg_image) {
2252                 if (arg_directory) {
2253                         char *p;
2254
2255                         p = path_make_absolute_cwd(arg_directory);
2256                         free(arg_directory);
2257                         arg_directory = p;
2258                 } else
2259                         arg_directory = get_current_dir_name();
2260
2261                 if (!arg_directory) {
2262                         log_error("Failed to determine path, please use -D.");
2263                         goto finish;
2264                 }
2265                 path_kill_slashes(arg_directory);
2266         }
2267
2268         if (!arg_machine) {
2269                 arg_machine = strdup(basename(arg_image ? arg_image : arg_directory));
2270                 if (!arg_machine) {
2271                         log_oom();
2272                         goto finish;
2273                 }
2274
2275                 hostname_cleanup(arg_machine, false);
2276                 if (isempty(arg_machine)) {
2277                         log_error("Failed to determine machine name automatically, please use -M.");
2278                         goto finish;
2279                 }
2280         }
2281
2282         if (geteuid() != 0) {
2283                 log_error("Need to be root.");
2284                 goto finish;
2285         }
2286
2287         if (sd_booted() <= 0) {
2288                 log_error("Not running on a systemd system.");
2289                 goto finish;
2290         }
2291
2292         log_close();
2293         n_fd_passed = sd_listen_fds(false);
2294         if (n_fd_passed > 0) {
2295                 k = fdset_new_listen_fds(&fds, false);
2296                 if (k < 0) {
2297                         log_error("Failed to collect file descriptors: %s", strerror(-k));
2298                         goto finish;
2299                 }
2300         }
2301         fdset_close_others(fds);
2302         log_open();
2303
2304         if (arg_directory) {
2305                 if (path_equal(arg_directory, "/")) {
2306                         log_error("Spawning container on root directory not supported.");
2307                         goto finish;
2308                 }
2309
2310                 if (arg_boot) {
2311                         if (path_is_os_tree(arg_directory) <= 0) {
2312                                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
2313                                 goto finish;
2314                         }
2315                 } else {
2316                         const char *p;
2317
2318                         p = strappenda(arg_directory,
2319                                        argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
2320                         if (access(p, F_OK) < 0) {
2321                                 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
2322                                 goto finish;
2323
2324                         }
2325                 }
2326         } else {
2327                 char template[] = "/tmp/nspawn-root-XXXXXX";
2328
2329                 if (!mkdtemp(template)) {
2330                         log_error("Failed to create temporary directory: %m");
2331                         r = -errno;
2332                         goto finish;
2333                 }
2334
2335                 arg_directory = strdup(template);
2336                 if (!arg_directory) {
2337                         r = log_oom();
2338                         goto finish;
2339                 }
2340
2341                 image_fd = setup_image(&device_path, &loop_nr);
2342                 if (image_fd < 0) {
2343                         r = image_fd;
2344                         goto finish;
2345                 }
2346
2347                 r = dissect_image(image_fd, &root_device, &home_device, &srv_device, &secondary);
2348                 if (r < 0)
2349                         goto finish;
2350         }
2351
2352         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
2353         if (master < 0) {
2354                 log_error("Failed to acquire pseudo tty: %m");
2355                 goto finish;
2356         }
2357
2358         console = ptsname(master);
2359         if (!console) {
2360                 log_error("Failed to determine tty name: %m");
2361                 goto finish;
2362         }
2363
2364         if (!arg_quiet)
2365                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_image ? arg_image : arg_directory);
2366
2367         if (unlockpt(master) < 0) {
2368                 log_error("Failed to unlock tty: %m");
2369                 goto finish;
2370         }
2371
2372         if (access("/dev/kdbus/control", F_OK) >= 0) {
2373
2374                 if (arg_share_system) {
2375                         kdbus_domain = strdup("/dev/kdbus");
2376                         if (!kdbus_domain) {
2377                                 log_oom();
2378                                 goto finish;
2379                         }
2380                 } else {
2381                         const char *ns;
2382
2383                         ns = strappenda("machine-", arg_machine);
2384                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
2385                         if (r < 0)
2386                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
2387                         else
2388                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
2389                 }
2390         }
2391
2392         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
2393                 log_error("Failed to create kmsg socket pair: %m");
2394                 goto finish;
2395         }
2396
2397         sd_notify(0, "READY=1");
2398
2399         assert_se(sigemptyset(&mask) == 0);
2400         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
2401         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
2402
2403         for (;;) {
2404                 siginfo_t status;
2405
2406                 sync_fd = eventfd(0, EFD_CLOEXEC);
2407                 if (sync_fd < 0) {
2408                         log_error("Failed to create event fd: %m");
2409                         goto finish;
2410                 }
2411
2412                 pid = syscall(__NR_clone,
2413                               SIGCHLD|CLONE_NEWNS|
2414                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
2415                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
2416                 if (pid < 0) {
2417                         if (errno == EINVAL)
2418                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
2419                         else
2420                                 log_error("clone() failed: %m");
2421
2422                         goto finish;
2423                 }
2424
2425                 if (pid == 0) {
2426                         /* child */
2427                         const char *home = NULL;
2428                         uid_t uid = (uid_t) -1;
2429                         gid_t gid = (gid_t) -1;
2430                         unsigned n_env = 2;
2431                         const char *envp[] = {
2432                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
2433                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
2434                                 NULL, /* TERM */
2435                                 NULL, /* HOME */
2436                                 NULL, /* USER */
2437                                 NULL, /* LOGNAME */
2438                                 NULL, /* container_uuid */
2439                                 NULL, /* LISTEN_FDS */
2440                                 NULL, /* LISTEN_PID */
2441                                 NULL
2442                         };
2443                         char **env_use;
2444                         eventfd_t x;
2445
2446                         envp[n_env] = strv_find_prefix(environ, "TERM=");
2447                         if (envp[n_env])
2448                                 n_env ++;
2449
2450                         close_nointr_nofail(master);
2451                         master = -1;
2452
2453                         close_nointr(STDIN_FILENO);
2454                         close_nointr(STDOUT_FILENO);
2455                         close_nointr(STDERR_FILENO);
2456
2457                         close_nointr_nofail(kmsg_socket_pair[0]);
2458                         kmsg_socket_pair[0] = -1;
2459
2460                         reset_all_signal_handlers();
2461
2462                         assert_se(sigemptyset(&mask) == 0);
2463                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2464
2465                         k = open_terminal(console, O_RDWR);
2466                         if (k != STDIN_FILENO) {
2467                                 if (k >= 0) {
2468                                         close_nointr_nofail(k);
2469                                         k = -EINVAL;
2470                                 }
2471
2472                                 log_error("Failed to open console: %s", strerror(-k));
2473                                 goto child_fail;
2474                         }
2475
2476                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
2477                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
2478                                 log_error("Failed to duplicate console: %m");
2479                                 goto child_fail;
2480                         }
2481
2482                         if (setsid() < 0) {
2483                                 log_error("setsid() failed: %m");
2484                                 goto child_fail;
2485                         }
2486
2487                         if (reset_audit_loginuid() < 0)
2488                                 goto child_fail;
2489
2490                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
2491                                 log_error("PR_SET_PDEATHSIG failed: %m");
2492                                 goto child_fail;
2493                         }
2494
2495                         /* Mark everything as slave, so that we still
2496                          * receive mounts from the real root, but don't
2497                          * propagate mounts to the real root. */
2498                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
2499                                 log_error("MS_SLAVE|MS_REC failed: %m");
2500                                 goto child_fail;
2501                         }
2502
2503                         if (mount_devices(arg_directory, root_device, home_device, srv_device) < 0)
2504                                 goto child_fail;
2505
2506                         /* Turn directory into bind mount */
2507                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
2508                                 log_error("Failed to make bind mount.");
2509                                 goto child_fail;
2510                         }
2511
2512                         if (arg_read_only)
2513                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
2514                                         log_error("Failed to make read-only.");
2515                                         goto child_fail;
2516                                 }
2517
2518                         if (mount_all(arg_directory) < 0)
2519                                 goto child_fail;
2520
2521                         if (copy_devnodes(arg_directory) < 0)
2522                                 goto child_fail;
2523
2524                         if (setup_ptmx(arg_directory) < 0)
2525                                 goto child_fail;
2526
2527                         dev_setup(arg_directory);
2528
2529                         if (audit_still_doesnt_work_in_containers() < 0)
2530                                 goto child_fail;
2531
2532                         if (setup_dev_console(arg_directory, console) < 0)
2533                                 goto child_fail;
2534
2535                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
2536                                 goto child_fail;
2537
2538                         close_nointr_nofail(kmsg_socket_pair[1]);
2539                         kmsg_socket_pair[1] = -1;
2540
2541                         if (setup_boot_id(arg_directory) < 0)
2542                                 goto child_fail;
2543
2544                         if (setup_timezone(arg_directory) < 0)
2545                                 goto child_fail;
2546
2547                         if (setup_resolv_conf(arg_directory) < 0)
2548                                 goto child_fail;
2549
2550                         if (setup_journal(arg_directory) < 0)
2551                                 goto child_fail;
2552
2553                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
2554                                 goto child_fail;
2555
2556                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
2557                                 goto child_fail;
2558
2559                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
2560                                 goto child_fail;
2561
2562                         if (chdir(arg_directory) < 0) {
2563                                 log_error("chdir(%s) failed: %m", arg_directory);
2564                                 goto child_fail;
2565                         }
2566
2567                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
2568                                 log_error("mount(MS_MOVE) failed: %m");
2569                                 goto child_fail;
2570                         }
2571
2572                         if (chroot(".") < 0) {
2573                                 log_error("chroot() failed: %m");
2574                                 goto child_fail;
2575                         }
2576
2577                         if (chdir("/") < 0) {
2578                                 log_error("chdir() failed: %m");
2579                                 goto child_fail;
2580                         }
2581
2582                         umask(0022);
2583
2584                         if (arg_private_network)
2585                                 loopback_setup();
2586
2587                         if (drop_capabilities() < 0) {
2588                                 log_error("drop_capabilities() failed: %m");
2589                                 goto child_fail;
2590                         }
2591
2592                         if (arg_user) {
2593
2594                                 /* Note that this resolves user names
2595                                  * inside the container, and hence
2596                                  * accesses the NSS modules from the
2597                                  * container and not the host. This is
2598                                  * a bit weird... */
2599
2600                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
2601                                         log_error("get_user_creds() failed: %m");
2602                                         goto child_fail;
2603                                 }
2604
2605                                 if (mkdir_parents_label(home, 0775) < 0) {
2606                                         log_error("mkdir_parents_label() failed: %m");
2607                                         goto child_fail;
2608                                 }
2609
2610                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
2611                                         log_error("mkdir_safe_label() failed: %m");
2612                                         goto child_fail;
2613                                 }
2614
2615                                 if (initgroups((const char*)arg_user, gid) < 0) {
2616                                         log_error("initgroups() failed: %m");
2617                                         goto child_fail;
2618                                 }
2619
2620                                 if (setresgid(gid, gid, gid) < 0) {
2621                                         log_error("setregid() failed: %m");
2622                                         goto child_fail;
2623                                 }
2624
2625                                 if (setresuid(uid, uid, uid) < 0) {
2626                                         log_error("setreuid() failed: %m");
2627                                         goto child_fail;
2628                                 }
2629                         } else {
2630                                 /* Reset everything fully to 0, just in case */
2631
2632                                 if (setgroups(0, NULL) < 0) {
2633                                         log_error("setgroups() failed: %m");
2634                                         goto child_fail;
2635                                 }
2636
2637                                 if (setresgid(0, 0, 0) < 0) {
2638                                         log_error("setregid() failed: %m");
2639                                         goto child_fail;
2640                                 }
2641
2642                                 if (setresuid(0, 0, 0) < 0) {
2643                                         log_error("setreuid() failed: %m");
2644                                         goto child_fail;
2645                                 }
2646                         }
2647
2648                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
2649                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
2650                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
2651                                 log_oom();
2652                                 goto child_fail;
2653                         }
2654
2655                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
2656                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
2657                                         log_oom();
2658                                         goto child_fail;
2659                                 }
2660                         }
2661
2662                         if (fdset_size(fds) > 0) {
2663                                 k = fdset_cloexec(fds, false);
2664                                 if (k < 0) {
2665                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
2666                                         goto child_fail;
2667                                 }
2668
2669                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
2670                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
2671                                         log_oom();
2672                                         goto child_fail;
2673                                 }
2674                         }
2675
2676                         setup_hostname();
2677
2678                         if (arg_personality != 0xffffffffLU) {
2679                                 if (personality(arg_personality) < 0) {
2680                                         log_error("personality() failed: %m");
2681                                         goto child_fail;
2682                                 }
2683                         } else if (secondary) {
2684                                 if (personality(PER_LINUX32) < 0) {
2685                                         log_error("personality() failed: %m");
2686                                         goto child_fail;
2687                                 }
2688                         }
2689
2690                         eventfd_read(sync_fd, &x);
2691                         close_nointr_nofail(sync_fd);
2692                         sync_fd = -1;
2693
2694                         if (!strv_isempty(arg_setenv)) {
2695                                 char **n;
2696
2697                                 n = strv_env_merge(2, envp, arg_setenv);
2698                                 if (!n) {
2699                                         log_oom();
2700                                         goto child_fail;
2701                                 }
2702
2703                                 env_use = n;
2704                         } else
2705                                 env_use = (char**) envp;
2706
2707 #ifdef HAVE_SELINUX
2708                         if (arg_selinux_context)
2709                                 if (setexeccon((security_context_t) arg_selinux_context) < 0)
2710                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2711 #endif
2712                         if (arg_boot) {
2713                                 char **a;
2714                                 size_t l;
2715
2716                                 /* Automatically search for the init system */
2717
2718                                 l = 1 + argc - optind;
2719                                 a = newa(char*, l + 1);
2720                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2721
2722                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2723                                 execve(a[0], a, env_use);
2724
2725                                 a[0] = (char*) "/lib/systemd/systemd";
2726                                 execve(a[0], a, env_use);
2727
2728                                 a[0] = (char*) "/sbin/init";
2729                                 execve(a[0], a, env_use);
2730                         } else if (argc > optind)
2731                                 execvpe(argv[optind], argv + optind, env_use);
2732                         else {
2733                                 chdir(home ? home : "/root");
2734                                 execle("/bin/bash", "-bash", NULL, env_use);
2735                                 execle("/bin/sh", "-sh", NULL, env_use);
2736                         }
2737
2738                         log_error("execv() failed: %m");
2739
2740                 child_fail:
2741                         _exit(EXIT_FAILURE);
2742                 }
2743
2744                 fdset_free(fds);
2745                 fds = NULL;
2746
2747                 r = register_machine(pid);
2748                 if (r < 0)
2749                         goto finish;
2750
2751                 r = move_network_interfaces(pid);
2752                 if (r < 0)
2753                         goto finish;
2754
2755                 r = setup_veth(pid, veth_name);
2756                 if (r < 0)
2757                         goto finish;
2758
2759                 r = setup_bridge(veth_name);
2760                 if (r < 0)
2761                         goto finish;
2762
2763                 r = setup_macvlan(pid);
2764                 if (r < 0)
2765                         goto finish;
2766
2767                 eventfd_write(sync_fd, 1);
2768                 close_nointr_nofail(sync_fd);
2769                 sync_fd = -1;
2770
2771                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2772                 if (k < 0) {
2773                         r = EXIT_FAILURE;
2774                         break;
2775                 }
2776
2777                 if (!arg_quiet)
2778                         putc('\n', stdout);
2779
2780                 /* Kill if it is not dead yet anyway */
2781                 terminate_machine(pid);
2782
2783                 /* Redundant, but better safe than sorry */
2784                 kill(pid, SIGKILL);
2785
2786                 k = wait_for_terminate(pid, &status);
2787                 pid = 0;
2788
2789                 if (k < 0) {
2790                         r = EXIT_FAILURE;
2791                         break;
2792                 }
2793
2794                 if (status.si_code == CLD_EXITED) {
2795                         r = status.si_status;
2796                         if (status.si_status != 0) {
2797                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2798                                 break;
2799                         }
2800
2801                         if (!arg_quiet)
2802                                 log_debug("Container %s exited successfully.", arg_machine);
2803                         break;
2804                 } else if (status.si_code == CLD_KILLED &&
2805                            status.si_status == SIGINT) {
2806
2807                         if (!arg_quiet)
2808                                 log_info("Container %s has been shut down.", arg_machine);
2809                         r = 0;
2810                         break;
2811                 } else if (status.si_code == CLD_KILLED &&
2812                            status.si_status == SIGHUP) {
2813
2814                         if (!arg_quiet)
2815                                 log_info("Container %s is being rebooted.", arg_machine);
2816                         continue;
2817                 } else if (status.si_code == CLD_KILLED ||
2818                            status.si_code == CLD_DUMPED) {
2819
2820                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2821                         r = EXIT_FAILURE;
2822                         break;
2823                 } else {
2824                         log_error("Container %s failed due to unknown reason.", arg_machine);
2825                         r = EXIT_FAILURE;
2826                         break;
2827                 }
2828         }
2829
2830 finish:
2831         loop_remove(loop_nr, &image_fd);
2832
2833         if (pid > 0)
2834                 kill(pid, SIGKILL);
2835
2836         free(arg_directory);
2837         free(arg_machine);
2838         free(arg_user);
2839         strv_free(arg_setenv);
2840         strv_free(arg_network_interfaces);
2841         strv_free(arg_network_macvlan);
2842         strv_free(arg_bind);
2843         strv_free(arg_bind_ro);
2844
2845         return r;
2846 }