chiark / gitweb /
nspawn: don't try mknod() of /dev/console with the correct major/minor
[elogind.git] / src / nspawn / nspawn.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2010 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <signal.h>
23 #include <sched.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/syscall.h>
27 #include <sys/mount.h>
28 #include <sys/wait.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdio.h>
32 #include <errno.h>
33 #include <sys/prctl.h>
34 #include <sys/capability.h>
35 #include <getopt.h>
36 #include <termios.h>
37 #include <sys/signalfd.h>
38 #include <grp.h>
39 #include <linux/fs.h>
40 #include <sys/un.h>
41 #include <sys/socket.h>
42 #include <linux/netlink.h>
43 #include <sys/eventfd.h>
44 #include <net/if.h>
45 #include <linux/veth.h>
46 #include <sys/personality.h>
47 #include <linux/loop.h>
48
49 #ifdef HAVE_SELINUX
50 #include <selinux/selinux.h>
51 #endif
52
53 #ifdef HAVE_SECCOMP
54 #include <seccomp.h>
55 #endif
56
57 #ifdef HAVE_BLKID
58 #include <blkid/blkid.h>
59 #endif
60
61 #include "sd-daemon.h"
62 #include "sd-bus.h"
63 #include "sd-id128.h"
64 #include "sd-rtnl.h"
65 #include "log.h"
66 #include "util.h"
67 #include "mkdir.h"
68 #include "macro.h"
69 #include "audit.h"
70 #include "missing.h"
71 #include "cgroup-util.h"
72 #include "strv.h"
73 #include "path-util.h"
74 #include "loopback-setup.h"
75 #include "dev-setup.h"
76 #include "fdset.h"
77 #include "build.h"
78 #include "fileio.h"
79 #include "bus-util.h"
80 #include "bus-error.h"
81 #include "ptyfwd.h"
82 #include "bus-kernel.h"
83 #include "env-util.h"
84 #include "def.h"
85 #include "rtnl-util.h"
86 #include "udev-util.h"
87 #include "blkid-util.h"
88 #include "gpt.h"
89
90 #ifdef HAVE_SECCOMP
91 #include "seccomp-util.h"
92 #endif
93
94 typedef enum LinkJournal {
95         LINK_NO,
96         LINK_AUTO,
97         LINK_HOST,
98         LINK_GUEST
99 } LinkJournal;
100
101 static char *arg_directory = NULL;
102 static char *arg_user = NULL;
103 static sd_id128_t arg_uuid = {};
104 static char *arg_machine = NULL;
105 static const char *arg_selinux_context = NULL;
106 static const char *arg_selinux_apifs_context = NULL;
107 static const char *arg_slice = NULL;
108 static bool arg_private_network = false;
109 static bool arg_read_only = false;
110 static bool arg_boot = false;
111 static LinkJournal arg_link_journal = LINK_AUTO;
112 static uint64_t arg_retain =
113         (1ULL << CAP_CHOWN) |
114         (1ULL << CAP_DAC_OVERRIDE) |
115         (1ULL << CAP_DAC_READ_SEARCH) |
116         (1ULL << CAP_FOWNER) |
117         (1ULL << CAP_FSETID) |
118         (1ULL << CAP_IPC_OWNER) |
119         (1ULL << CAP_KILL) |
120         (1ULL << CAP_LEASE) |
121         (1ULL << CAP_LINUX_IMMUTABLE) |
122         (1ULL << CAP_NET_BIND_SERVICE) |
123         (1ULL << CAP_NET_BROADCAST) |
124         (1ULL << CAP_NET_RAW) |
125         (1ULL << CAP_SETGID) |
126         (1ULL << CAP_SETFCAP) |
127         (1ULL << CAP_SETPCAP) |
128         (1ULL << CAP_SETUID) |
129         (1ULL << CAP_SYS_ADMIN) |
130         (1ULL << CAP_SYS_CHROOT) |
131         (1ULL << CAP_SYS_NICE) |
132         (1ULL << CAP_SYS_PTRACE) |
133         (1ULL << CAP_SYS_TTY_CONFIG) |
134         (1ULL << CAP_SYS_RESOURCE) |
135         (1ULL << CAP_SYS_BOOT) |
136         (1ULL << CAP_AUDIT_WRITE) |
137         (1ULL << CAP_AUDIT_CONTROL) |
138         (1ULL << CAP_MKNOD);
139 static char **arg_bind = NULL;
140 static char **arg_bind_ro = NULL;
141 static char **arg_setenv = NULL;
142 static bool arg_quiet = false;
143 static bool arg_share_system = false;
144 static bool arg_register = true;
145 static bool arg_keep_unit = false;
146 static char **arg_network_interfaces = NULL;
147 static char **arg_network_macvlan = NULL;
148 static bool arg_network_veth = false;
149 static const char *arg_network_bridge = NULL;
150 static unsigned long arg_personality = 0xffffffffLU;
151 static const char *arg_image = NULL;
152
153 static int help(void) {
154
155         printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
156                "Spawn a minimal namespace container for debugging, testing and building.\n\n"
157                "  -h --help                 Show this help\n"
158                "     --version              Print version string\n"
159                "  -q --quiet                Do not show status information\n"
160                "  -D --directory=PATH       Root directory for the container\n"
161                "  -i --image=PATH           File system device or image for the container\n"
162                "  -b --boot                 Boot up full system (i.e. invoke init)\n"
163                "  -u --user=USER            Run the command under specified user or uid\n"
164                "  -M --machine=NAME         Set the machine name for the container\n"
165                "     --uuid=UUID            Set a specific machine UUID for the container\n"
166                "  -S --slice=SLICE          Place the container in the specified slice\n"
167                "     --private-network      Disable network in container\n"
168                "     --network-interface=INTERFACE\n"
169                "                            Assign an existing network interface to the\n"
170                "                            container\n"
171                "     --network-macvlan=INTERFACE\n"
172                "                            Create a macvlan network interface based on an\n"
173                "                            existing network interface to the container\n"
174                "     --network-veth         Add a virtual ethernet connection between host\n"
175                "                            and container\n"
176                "     --network-bridge=INTERFACE\n"
177                "                            Add a virtual ethernet connection between host\n"
178                "                            and container and add it to an existing bridge on\n"
179                "                            the host\n"
180                "  -Z --selinux-context=SECLABEL\n"
181                "                            Set the SELinux security context to be used by\n"
182                "                            processes in the container\n"
183                "  -L --selinux-apifs-context=SECLABEL\n"
184                "                            Set the SELinux security context to be used by\n"
185                "                            API/tmpfs file systems in the container\n"
186                "     --capability=CAP       In addition to the default, retain specified\n"
187                "                            capability\n"
188                "     --drop-capability=CAP  Drop the specified capability from the default set\n"
189                "     --link-journal=MODE    Link up guest journal, one of no, auto, guest, host\n"
190                "  -j                        Equivalent to --link-journal=host\n"
191                "     --read-only            Mount the root directory read-only\n"
192                "     --bind=PATH[:PATH]     Bind mount a file or directory from the host into\n"
193                "                            the container\n"
194                "     --bind-ro=PATH[:PATH]  Similar, but creates a read-only bind mount\n"
195                "     --setenv=NAME=VALUE    Pass an environment variable to PID 1\n"
196                "     --share-system         Share system namespaces with host\n"
197                "     --register=BOOLEAN     Register container as machine\n"
198                "     --keep-unit            Do not register a scope for the machine, reuse\n"
199                "                            the service unit nspawn is running in\n",
200                program_invocation_short_name);
201
202         return 0;
203 }
204
205 static int parse_argv(int argc, char *argv[]) {
206
207         enum {
208                 ARG_VERSION = 0x100,
209                 ARG_PRIVATE_NETWORK,
210                 ARG_UUID,
211                 ARG_READ_ONLY,
212                 ARG_CAPABILITY,
213                 ARG_DROP_CAPABILITY,
214                 ARG_LINK_JOURNAL,
215                 ARG_BIND,
216                 ARG_BIND_RO,
217                 ARG_SETENV,
218                 ARG_SHARE_SYSTEM,
219                 ARG_REGISTER,
220                 ARG_KEEP_UNIT,
221                 ARG_NETWORK_INTERFACE,
222                 ARG_NETWORK_MACVLAN,
223                 ARG_NETWORK_VETH,
224                 ARG_NETWORK_BRIDGE,
225                 ARG_PERSONALITY,
226         };
227
228         static const struct option options[] = {
229                 { "help",                  no_argument,       NULL, 'h'                   },
230                 { "version",               no_argument,       NULL, ARG_VERSION           },
231                 { "directory",             required_argument, NULL, 'D'                   },
232                 { "user",                  required_argument, NULL, 'u'                   },
233                 { "private-network",       no_argument,       NULL, ARG_PRIVATE_NETWORK   },
234                 { "boot",                  no_argument,       NULL, 'b'                   },
235                 { "uuid",                  required_argument, NULL, ARG_UUID              },
236                 { "read-only",             no_argument,       NULL, ARG_READ_ONLY         },
237                 { "capability",            required_argument, NULL, ARG_CAPABILITY        },
238                 { "drop-capability",       required_argument, NULL, ARG_DROP_CAPABILITY   },
239                 { "link-journal",          required_argument, NULL, ARG_LINK_JOURNAL      },
240                 { "bind",                  required_argument, NULL, ARG_BIND              },
241                 { "bind-ro",               required_argument, NULL, ARG_BIND_RO           },
242                 { "machine",               required_argument, NULL, 'M'                   },
243                 { "slice",                 required_argument, NULL, 'S'                   },
244                 { "setenv",                required_argument, NULL, ARG_SETENV            },
245                 { "selinux-context",       required_argument, NULL, 'Z'                   },
246                 { "selinux-apifs-context", required_argument, NULL, 'L'                   },
247                 { "quiet",                 no_argument,       NULL, 'q'                   },
248                 { "share-system",          no_argument,       NULL, ARG_SHARE_SYSTEM      },
249                 { "register",              required_argument, NULL, ARG_REGISTER          },
250                 { "keep-unit",             no_argument,       NULL, ARG_KEEP_UNIT         },
251                 { "network-interface",     required_argument, NULL, ARG_NETWORK_INTERFACE },
252                 { "network-macvlan",       required_argument, NULL, ARG_NETWORK_MACVLAN   },
253                 { "network-veth",          no_argument,       NULL, ARG_NETWORK_VETH      },
254                 { "network-bridge",        required_argument, NULL, ARG_NETWORK_BRIDGE    },
255                 { "personality",           required_argument, NULL, ARG_PERSONALITY       },
256                 { "image",                 required_argument, NULL, 'i'                   },
257                 {}
258         };
259
260         int c, r;
261         uint64_t plus = 0, minus = 0;
262
263         assert(argc >= 0);
264         assert(argv);
265
266         while ((c = getopt_long(argc, argv, "+hD:u:bL:M:jS:Z:qi:", options, NULL)) >= 0) {
267
268                 switch (c) {
269
270                 case 'h':
271                         return help();
272
273                 case ARG_VERSION:
274                         puts(PACKAGE_STRING);
275                         puts(SYSTEMD_FEATURES);
276                         return 0;
277
278                 case 'D':
279                         free(arg_directory);
280                         arg_directory = canonicalize_file_name(optarg);
281                         if (!arg_directory) {
282                                 log_error("Invalid root directory: %m");
283                                 return -ENOMEM;
284                         }
285
286                         break;
287
288                 case 'i':
289                         arg_image = optarg;
290                         break;
291
292                 case 'u':
293                         free(arg_user);
294                         arg_user = strdup(optarg);
295                         if (!arg_user)
296                                 return log_oom();
297
298                         break;
299
300                 case ARG_NETWORK_BRIDGE:
301                         arg_network_bridge = optarg;
302
303                         /* fall through */
304
305                 case ARG_NETWORK_VETH:
306                         arg_network_veth = true;
307                         arg_private_network = true;
308                         break;
309
310                 case ARG_NETWORK_INTERFACE:
311                         if (strv_extend(&arg_network_interfaces, optarg) < 0)
312                                 return log_oom();
313
314                         arg_private_network = true;
315                         break;
316
317                 case ARG_NETWORK_MACVLAN:
318                         if (strv_extend(&arg_network_macvlan, optarg) < 0)
319                                 return log_oom();
320
321                         /* fall through */
322
323                 case ARG_PRIVATE_NETWORK:
324                         arg_private_network = true;
325                         break;
326
327                 case 'b':
328                         arg_boot = true;
329                         break;
330
331                 case ARG_UUID:
332                         r = sd_id128_from_string(optarg, &arg_uuid);
333                         if (r < 0) {
334                                 log_error("Invalid UUID: %s", optarg);
335                                 return r;
336                         }
337                         break;
338
339                 case 'S':
340                         arg_slice = optarg;
341                         break;
342
343                 case 'M':
344                         if (isempty(optarg)) {
345                                 free(arg_machine);
346                                 arg_machine = NULL;
347                         } else {
348
349                                 if (!hostname_is_valid(optarg)) {
350                                         log_error("Invalid machine name: %s", optarg);
351                                         return -EINVAL;
352                                 }
353
354                                 free(arg_machine);
355                                 arg_machine = strdup(optarg);
356                                 if (!arg_machine)
357                                         return log_oom();
358
359                                 break;
360                         }
361
362                 case 'Z':
363                         arg_selinux_context = optarg;
364                         break;
365
366                 case 'L':
367                         arg_selinux_apifs_context = optarg;
368                         break;
369
370                 case ARG_READ_ONLY:
371                         arg_read_only = true;
372                         break;
373
374                 case ARG_CAPABILITY:
375                 case ARG_DROP_CAPABILITY: {
376                         char *state, *word;
377                         size_t length;
378
379                         FOREACH_WORD_SEPARATOR(word, length, optarg, ",", state) {
380                                 _cleanup_free_ char *t;
381                                 cap_value_t cap;
382
383                                 t = strndup(word, length);
384                                 if (!t)
385                                         return log_oom();
386
387                                 if (streq(t, "all")) {
388                                         if (c == ARG_CAPABILITY)
389                                                 plus = (uint64_t) -1;
390                                         else
391                                                 minus = (uint64_t) -1;
392                                 } else {
393                                         if (cap_from_name(t, &cap) < 0) {
394                                                 log_error("Failed to parse capability %s.", t);
395                                                 return -EINVAL;
396                                         }
397
398                                         if (c == ARG_CAPABILITY)
399                                                 plus |= 1ULL << (uint64_t) cap;
400                                         else
401                                                 minus |= 1ULL << (uint64_t) cap;
402                                 }
403                         }
404
405                         break;
406                 }
407
408                 case 'j':
409                         arg_link_journal = LINK_GUEST;
410                         break;
411
412                 case ARG_LINK_JOURNAL:
413                         if (streq(optarg, "auto"))
414                                 arg_link_journal = LINK_AUTO;
415                         else if (streq(optarg, "no"))
416                                 arg_link_journal = LINK_NO;
417                         else if (streq(optarg, "guest"))
418                                 arg_link_journal = LINK_GUEST;
419                         else if (streq(optarg, "host"))
420                                 arg_link_journal = LINK_HOST;
421                         else {
422                                 log_error("Failed to parse link journal mode %s", optarg);
423                                 return -EINVAL;
424                         }
425
426                         break;
427
428                 case ARG_BIND:
429                 case ARG_BIND_RO: {
430                         _cleanup_free_ char *a = NULL, *b = NULL;
431                         char *e;
432                         char ***x;
433
434                         x = c == ARG_BIND ? &arg_bind : &arg_bind_ro;
435
436                         e = strchr(optarg, ':');
437                         if (e) {
438                                 a = strndup(optarg, e - optarg);
439                                 b = strdup(e + 1);
440                         } else {
441                                 a = strdup(optarg);
442                                 b = strdup(optarg);
443                         }
444
445                         if (!a || !b)
446                                 return log_oom();
447
448                         if (!path_is_absolute(a) || !path_is_absolute(b)) {
449                                 log_error("Invalid bind mount specification: %s", optarg);
450                                 return -EINVAL;
451                         }
452
453                         r = strv_extend(x, a);
454                         if (r < 0)
455                                 return log_oom();
456
457                         r = strv_extend(x, b);
458                         if (r < 0)
459                                 return log_oom();
460
461                         break;
462                 }
463
464                 case ARG_SETENV: {
465                         char **n;
466
467                         if (!env_assignment_is_valid(optarg)) {
468                                 log_error("Environment variable assignment '%s' is not valid.", optarg);
469                                 return -EINVAL;
470                         }
471
472                         n = strv_env_set(arg_setenv, optarg);
473                         if (!n)
474                                 return log_oom();
475
476                         strv_free(arg_setenv);
477                         arg_setenv = n;
478                         break;
479                 }
480
481                 case 'q':
482                         arg_quiet = true;
483                         break;
484
485                 case ARG_SHARE_SYSTEM:
486                         arg_share_system = true;
487                         break;
488
489                 case ARG_REGISTER:
490                         r = parse_boolean(optarg);
491                         if (r < 0) {
492                                 log_error("Failed to parse --register= argument: %s", optarg);
493                                 return r;
494                         }
495
496                         arg_register = r;
497                         break;
498
499                 case ARG_KEEP_UNIT:
500                         arg_keep_unit = true;
501                         break;
502
503                 case ARG_PERSONALITY:
504
505                         arg_personality = personality_from_string(optarg);
506                         if (arg_personality == 0xffffffffLU) {
507                                 log_error("Unknown or unsupported personality '%s'.", optarg);
508                                 return -EINVAL;
509                         }
510
511                         break;
512
513                 case '?':
514                         return -EINVAL;
515
516                 default:
517                         assert_not_reached("Unhandled option");
518                 }
519         }
520
521         if (arg_share_system)
522                 arg_register = false;
523
524         if (arg_boot && arg_share_system) {
525                 log_error("--boot and --share-system may not be combined.");
526                 return -EINVAL;
527         }
528
529         if (arg_keep_unit && cg_pid_get_owner_uid(0, NULL) >= 0) {
530                 log_error("--keep-unit may not be used when invoked from a user session.");
531                 return -EINVAL;
532         }
533
534         if (arg_directory && arg_image) {
535                 log_error("--directory= and --image= may not be combined.");
536                 return -EINVAL;
537         }
538
539         arg_retain = (arg_retain | plus | (arg_private_network ? 1ULL << CAP_NET_ADMIN : 0)) & ~minus;
540
541         return 1;
542 }
543
544 static int mount_all(const char *dest) {
545
546         typedef struct MountPoint {
547                 const char *what;
548                 const char *where;
549                 const char *type;
550                 const char *options;
551                 unsigned long flags;
552                 bool fatal;
553         } MountPoint;
554
555         static const MountPoint mount_table[] = {
556                 { "proc",      "/proc",     "proc",  NULL,       MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
557                 { "/proc/sys", "/proc/sys", NULL,    NULL,       MS_BIND, true                       },   /* Bind mount first */
558                 { NULL,        "/proc/sys", NULL,    NULL,       MS_BIND|MS_RDONLY|MS_REMOUNT, true  },   /* Then, make it r/o */
559                 { "sysfs",     "/sys",      "sysfs", NULL,       MS_RDONLY|MS_NOSUID|MS_NOEXEC|MS_NODEV, true  },
560                 { "tmpfs",     "/dev",      "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,     true  },
561                 { "devpts",    "/dev/pts",  "devpts","newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, true },
562                 { "tmpfs",     "/dev/shm",  "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
563                 { "tmpfs",     "/run",      "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true  },
564 #ifdef HAVE_SELINUX
565                 { "/sys/fs/selinux", "/sys/fs/selinux", NULL, NULL, MS_BIND,                      false },  /* Bind mount first */
566                 { NULL,              "/sys/fs/selinux", NULL, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, false },  /* Then, make it r/o */
567 #endif
568         };
569
570         unsigned k;
571         int r = 0;
572
573         for (k = 0; k < ELEMENTSOF(mount_table); k++) {
574                 _cleanup_free_ char *where = NULL;
575 #ifdef HAVE_SELINUX
576                 _cleanup_free_ char *options = NULL;
577 #endif
578                 const char *o;
579                 int t;
580
581                 where = strjoin(dest, "/", mount_table[k].where, NULL);
582                 if (!where)
583                         return log_oom();
584
585                 t = path_is_mount_point(where, true);
586                 if (t < 0) {
587                         log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t));
588
589                         if (r == 0)
590                                 r = t;
591
592                         continue;
593                 }
594
595                 /* Skip this entry if it is not a remount. */
596                 if (mount_table[k].what && t > 0)
597                         continue;
598
599                 mkdir_p(where, 0755);
600
601 #ifdef HAVE_SELINUX
602                 if (arg_selinux_apifs_context &&
603                     (streq_ptr(mount_table[k].what, "tmpfs") || streq_ptr(mount_table[k].what, "devpts"))) {
604                         options = strjoin(mount_table[k].options, ",context=\"", arg_selinux_apifs_context, "\"", NULL);
605                         if (!options)
606                                 return log_oom();
607
608                         o = options;
609                 } else
610 #endif
611                         o = mount_table[k].options;
612
613
614                 if (mount(mount_table[k].what,
615                           where,
616                           mount_table[k].type,
617                           mount_table[k].flags,
618                           o) < 0 &&
619                     mount_table[k].fatal) {
620
621                         log_error("mount(%s) failed: %m", where);
622
623                         if (r == 0)
624                                 r = -errno;
625                 }
626         }
627
628         return r;
629 }
630
631 static int mount_binds(const char *dest, char **l, unsigned long flags) {
632         char **x, **y;
633
634         STRV_FOREACH_PAIR(x, y, l) {
635                 char *where;
636                 struct stat source_st, dest_st;
637                 int r;
638
639                 if (stat(*x, &source_st) < 0) {
640                         log_error("Failed to stat %s: %m", *x);
641                         return -errno;
642                 }
643
644                 where = strappenda(dest, *y);
645                 r = stat(where, &dest_st);
646                 if (r == 0) {
647                         if ((source_st.st_mode & S_IFMT) != (dest_st.st_mode & S_IFMT)) {
648                                 log_error("The file types of %s and %s do not match. Refusing bind mount",
649                                                 *x, where);
650                                 return -EINVAL;
651                         }
652                 } else if (errno == ENOENT) {
653                         r = mkdir_parents_label(where, 0755);
654                         if (r < 0) {
655                                 log_error("Failed to bind mount %s: %s", *x, strerror(-r));
656                                 return r;
657                         }
658                 } else {
659                         log_error("Failed to bind mount %s: %s", *x, strerror(errno));
660                         return -errno;
661                 }
662                 /* Create the mount point, but be conservative -- refuse to create block
663                 * and char devices. */
664                 if (S_ISDIR(source_st.st_mode))
665                         mkdir_label(where, 0755);
666                 else if (S_ISFIFO(source_st.st_mode))
667                         mkfifo(where, 0644);
668                 else if (S_ISSOCK(source_st.st_mode))
669                         mknod(where, 0644 | S_IFSOCK, 0);
670                 else if (S_ISREG(source_st.st_mode))
671                         touch(where);
672                 else {
673                         log_error("Refusing to create mountpoint for file: %s", *x);
674                         return -ENOTSUP;
675                 }
676
677                 if (mount(*x, where, "bind", MS_BIND, NULL) < 0) {
678                         log_error("mount(%s) failed: %m", where);
679                         return -errno;
680                 }
681
682                 if (flags && mount(NULL, where, NULL, MS_REMOUNT|MS_BIND|flags, NULL) < 0) {
683                         log_error("mount(%s) failed: %m", where);
684                         return -errno;
685                 }
686         }
687
688         return 0;
689 }
690
691 static int setup_timezone(const char *dest) {
692         _cleanup_free_ char *where = NULL, *p = NULL, *q = NULL, *check = NULL, *what = NULL;
693         char *z, *y;
694         int r;
695
696         assert(dest);
697
698         /* Fix the timezone, if possible */
699         r = readlink_malloc("/etc/localtime", &p);
700         if (r < 0) {
701                 log_warning("/etc/localtime is not a symlink, not updating container timezone.");
702                 return 0;
703         }
704
705         z = path_startswith(p, "../usr/share/zoneinfo/");
706         if (!z)
707                 z = path_startswith(p, "/usr/share/zoneinfo/");
708         if (!z) {
709                 log_warning("/etc/localtime does not point into /usr/share/zoneinfo/, not updating container timezone.");
710                 return 0;
711         }
712
713         where = strappend(dest, "/etc/localtime");
714         if (!where)
715                 return log_oom();
716
717         r = readlink_malloc(where, &q);
718         if (r >= 0) {
719                 y = path_startswith(q, "../usr/share/zoneinfo/");
720                 if (!y)
721                         y = path_startswith(q, "/usr/share/zoneinfo/");
722
723
724                 /* Already pointing to the right place? Then do nothing .. */
725                 if (y && streq(y, z))
726                         return 0;
727         }
728
729         check = strjoin(dest, "/usr/share/zoneinfo/", z, NULL);
730         if (!check)
731                 return log_oom();
732
733         if (access(check, F_OK) < 0) {
734                 log_warning("Timezone %s does not exist in container, not updating container timezone.", z);
735                 return 0;
736         }
737
738         what = strappend("../usr/share/zoneinfo/", z);
739         if (!what)
740                 return log_oom();
741
742         unlink(where);
743         if (symlink(what, where) < 0) {
744                 log_error("Failed to correct timezone of container: %m");
745                 return 0;
746         }
747
748         return 0;
749 }
750
751 static int setup_resolv_conf(const char *dest) {
752         char _cleanup_free_ *where = NULL;
753
754         assert(dest);
755
756         if (arg_private_network)
757                 return 0;
758
759         /* Fix resolv.conf, if possible */
760         where = strappend(dest, "/etc/resolv.conf");
761         if (!where)
762                 return log_oom();
763
764         /* We don't really care for the results of this really. If it
765          * fails, it fails, but meh... */
766         copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW);
767
768         return 0;
769 }
770
771 static int setup_boot_id(const char *dest) {
772         _cleanup_free_ char *from = NULL, *to = NULL;
773         sd_id128_t rnd = {};
774         char as_uuid[37];
775         int r;
776
777         assert(dest);
778
779         if (arg_share_system)
780                 return 0;
781
782         /* Generate a new randomized boot ID, so that each boot-up of
783          * the container gets a new one */
784
785         from = strappend(dest, "/dev/proc-sys-kernel-random-boot-id");
786         to = strappend(dest, "/proc/sys/kernel/random/boot_id");
787         if (!from || !to)
788                 return log_oom();
789
790         r = sd_id128_randomize(&rnd);
791         if (r < 0) {
792                 log_error("Failed to generate random boot id: %s", strerror(-r));
793                 return r;
794         }
795
796         snprintf(as_uuid, sizeof(as_uuid),
797                  "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
798                  SD_ID128_FORMAT_VAL(rnd));
799         char_array_0(as_uuid);
800
801         r = write_string_file(from, as_uuid);
802         if (r < 0) {
803                 log_error("Failed to write boot id: %s", strerror(-r));
804                 return r;
805         }
806
807         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
808                 log_error("Failed to bind mount boot id: %m");
809                 r = -errno;
810         } else if (mount(from, to, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY, NULL))
811                 log_warning("Failed to make boot id read-only: %m");
812
813         unlink(from);
814         return r;
815 }
816
817 static int copy_devnodes(const char *dest) {
818
819         static const char devnodes[] =
820                 "null\0"
821                 "zero\0"
822                 "full\0"
823                 "random\0"
824                 "urandom\0"
825                 "tty\0";
826
827         const char *d;
828         int r = 0;
829         _cleanup_umask_ mode_t u;
830
831         assert(dest);
832
833         u = umask(0000);
834
835         NULSTR_FOREACH(d, devnodes) {
836                 _cleanup_free_ char *from = NULL, *to = NULL;
837                 struct stat st;
838
839                 from = strappend("/dev/", d);
840                 to = strjoin(dest, "/dev/", d, NULL);
841                 if (!from || !to)
842                         return log_oom();
843
844                 if (stat(from, &st) < 0) {
845
846                         if (errno != ENOENT) {
847                                 log_error("Failed to stat %s: %m", from);
848                                 return -errno;
849                         }
850
851                 } else if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
852
853                         log_error("%s is not a char or block device, cannot copy", from);
854                         return -EIO;
855
856                 } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
857
858                         log_error("mknod(%s) failed: %m", dest);
859                         return  -errno;
860                 }
861         }
862
863         return r;
864 }
865
866 static int setup_ptmx(const char *dest) {
867         _cleanup_free_ char *p = NULL;
868
869         p = strappend(dest, "/dev/ptmx");
870         if (!p)
871                 return log_oom();
872
873         if (symlink("pts/ptmx", p) < 0) {
874                 log_error("Failed to create /dev/ptmx symlink: %m");
875                 return -errno;
876         }
877
878         return 0;
879 }
880
881 static int setup_dev_console(const char *dest, const char *console) {
882         _cleanup_umask_ mode_t u;
883         const char *to;
884         struct stat st;
885         int r;
886
887         assert(dest);
888         assert(console);
889
890         u = umask(0000);
891
892         if (stat("/dev/null", &st) < 0) {
893                 log_error("Failed to stat /dev/null: %m");
894                 return -errno;
895         }
896
897         r = chmod_and_chown(console, 0600, 0, 0);
898         if (r < 0) {
899                 log_error("Failed to correct access mode for TTY: %s", strerror(-r));
900                 return r;
901         }
902
903         /* We need to bind mount the right tty to /dev/console since
904          * ptys can only exist on pts file systems. To have something
905          * to bind mount things on we create a device node first, and
906          * use /dev/null for that since we the cgroups device policy
907          * allows us to create that freely, while we cannot create
908          * /dev/console. (Note that the major minor doesn't actually
909          * matter here, since we mount it over anyway). */
910
911         to = strappenda(dest, "/dev/console");
912         if (mknod(to, (st.st_mode & ~07777) | 0600, st.st_rdev) < 0) {
913                 log_error("mknod() for /dev/console failed: %m");
914                 return -errno;
915         }
916
917         if (mount(console, to, "bind", MS_BIND, NULL) < 0) {
918                 log_error("Bind mount for /dev/console failed: %m");
919                 return -errno;
920         }
921
922         return 0;
923 }
924
925 static int setup_kmsg(const char *dest, int kmsg_socket) {
926         _cleanup_free_ char *from = NULL, *to = NULL;
927         int r, fd, k;
928         _cleanup_umask_ mode_t u;
929         union {
930                 struct cmsghdr cmsghdr;
931                 uint8_t buf[CMSG_SPACE(sizeof(int))];
932         } control = {};
933         struct msghdr mh = {
934                 .msg_control = &control,
935                 .msg_controllen = sizeof(control),
936         };
937         struct cmsghdr *cmsg;
938
939         assert(dest);
940         assert(kmsg_socket >= 0);
941
942         u = umask(0000);
943
944         /* We create the kmsg FIFO as /dev/kmsg, but immediately
945          * delete it after bind mounting it to /proc/kmsg. While FIFOs
946          * on the reading side behave very similar to /proc/kmsg,
947          * their writing side behaves differently from /dev/kmsg in
948          * that writing blocks when nothing is reading. In order to
949          * avoid any problems with containers deadlocking due to this
950          * we simply make /dev/kmsg unavailable to the container. */
951         if (asprintf(&from, "%s/dev/kmsg", dest) < 0 ||
952             asprintf(&to, "%s/proc/kmsg", dest) < 0)
953                 return log_oom();
954
955         if (mkfifo(from, 0600) < 0) {
956                 log_error("mkfifo() for /dev/kmsg failed: %m");
957                 return -errno;
958         }
959
960         r = chmod_and_chown(from, 0600, 0, 0);
961         if (r < 0) {
962                 log_error("Failed to correct access mode for /dev/kmsg: %s", strerror(-r));
963                 return r;
964         }
965
966         if (mount(from, to, "bind", MS_BIND, NULL) < 0) {
967                 log_error("Bind mount for /proc/kmsg failed: %m");
968                 return -errno;
969         }
970
971         fd = open(from, O_RDWR|O_NDELAY|O_CLOEXEC);
972         if (fd < 0) {
973                 log_error("Failed to open fifo: %m");
974                 return -errno;
975         }
976
977         cmsg = CMSG_FIRSTHDR(&mh);
978         cmsg->cmsg_level = SOL_SOCKET;
979         cmsg->cmsg_type = SCM_RIGHTS;
980         cmsg->cmsg_len = CMSG_LEN(sizeof(int));
981         memcpy(CMSG_DATA(cmsg), &fd, sizeof(int));
982
983         mh.msg_controllen = cmsg->cmsg_len;
984
985         /* Store away the fd in the socket, so that it stays open as
986          * long as we run the child */
987         k = sendmsg(kmsg_socket, &mh, MSG_DONTWAIT|MSG_NOSIGNAL);
988         close_nointr_nofail(fd);
989
990         if (k < 0) {
991                 log_error("Failed to send FIFO fd: %m");
992                 return -errno;
993         }
994
995         /* And now make the FIFO unavailable as /dev/kmsg... */
996         unlink(from);
997         return 0;
998 }
999
1000 static int setup_hostname(void) {
1001
1002         if (arg_share_system)
1003                 return 0;
1004
1005         if (sethostname(arg_machine, strlen(arg_machine)) < 0)
1006                 return -errno;
1007
1008         return 0;
1009 }
1010
1011 static int setup_journal(const char *directory) {
1012         sd_id128_t machine_id, this_id;
1013         _cleanup_free_ char *p = NULL, *b = NULL, *q = NULL, *d = NULL;
1014         char *id;
1015         int r;
1016
1017         p = strappend(directory, "/etc/machine-id");
1018         if (!p)
1019                 return log_oom();
1020
1021         r = read_one_line_file(p, &b);
1022         if (r == -ENOENT && arg_link_journal == LINK_AUTO)
1023                 return 0;
1024         else if (r < 0) {
1025                 log_error("Failed to read machine ID from %s: %s", p, strerror(-r));
1026                 return r;
1027         }
1028
1029         id = strstrip(b);
1030         if (isempty(id) && arg_link_journal == LINK_AUTO)
1031                 return 0;
1032
1033         /* Verify validity */
1034         r = sd_id128_from_string(id, &machine_id);
1035         if (r < 0) {
1036                 log_error("Failed to parse machine ID from %s: %s", p, strerror(-r));
1037                 return r;
1038         }
1039
1040         r = sd_id128_get_machine(&this_id);
1041         if (r < 0) {
1042                 log_error("Failed to retrieve machine ID: %s", strerror(-r));
1043                 return r;
1044         }
1045
1046         if (sd_id128_equal(machine_id, this_id)) {
1047                 log_full(arg_link_journal == LINK_AUTO ? LOG_WARNING : LOG_ERR,
1048                          "Host and machine ids are equal (%s): refusing to link journals", id);
1049                 if (arg_link_journal == LINK_AUTO)
1050                         return 0;
1051                 return
1052                         -EEXIST;
1053         }
1054
1055         if (arg_link_journal == LINK_NO)
1056                 return 0;
1057
1058         free(p);
1059         p = strappend("/var/log/journal/", id);
1060         q = strjoin(directory, "/var/log/journal/", id, NULL);
1061         if (!p || !q)
1062                 return log_oom();
1063
1064         if (path_is_mount_point(p, false) > 0) {
1065                 if (arg_link_journal != LINK_AUTO) {
1066                         log_error("%s: already a mount point, refusing to use for journal", p);
1067                         return -EEXIST;
1068                 }
1069
1070                 return 0;
1071         }
1072
1073         if (path_is_mount_point(q, false) > 0) {
1074                 if (arg_link_journal != LINK_AUTO) {
1075                         log_error("%s: already a mount point, refusing to use for journal", q);
1076                         return -EEXIST;
1077                 }
1078
1079                 return 0;
1080         }
1081
1082         r = readlink_and_make_absolute(p, &d);
1083         if (r >= 0) {
1084                 if ((arg_link_journal == LINK_GUEST ||
1085                      arg_link_journal == LINK_AUTO) &&
1086                     path_equal(d, q)) {
1087
1088                         r = mkdir_p(q, 0755);
1089                         if (r < 0)
1090                                 log_warning("failed to create directory %s: %m", q);
1091                         return 0;
1092                 }
1093
1094                 if (unlink(p) < 0) {
1095                         log_error("Failed to remove symlink %s: %m", p);
1096                         return -errno;
1097                 }
1098         } else if (r == -EINVAL) {
1099
1100                 if (arg_link_journal == LINK_GUEST &&
1101                     rmdir(p) < 0) {
1102
1103                         if (errno == ENOTDIR) {
1104                                 log_error("%s already exists and is neither a symlink nor a directory", p);
1105                                 return r;
1106                         } else {
1107                                 log_error("Failed to remove %s: %m", p);
1108                                 return -errno;
1109                         }
1110                 }
1111         } else if (r != -ENOENT) {
1112                 log_error("readlink(%s) failed: %m", p);
1113                 return r;
1114         }
1115
1116         if (arg_link_journal == LINK_GUEST) {
1117
1118                 if (symlink(q, p) < 0) {
1119                         log_error("Failed to symlink %s to %s: %m", q, p);
1120                         return -errno;
1121                 }
1122
1123                 r = mkdir_p(q, 0755);
1124                 if (r < 0)
1125                         log_warning("failed to create directory %s: %m", q);
1126                 return 0;
1127         }
1128
1129         if (arg_link_journal == LINK_HOST) {
1130                 r = mkdir_p(p, 0755);
1131                 if (r < 0) {
1132                         log_error("Failed to create %s: %m", p);
1133                         return r;
1134                 }
1135
1136         } else if (access(p, F_OK) < 0)
1137                 return 0;
1138
1139         if (dir_is_empty(q) == 0) {
1140                 log_error("%s not empty.", q);
1141                 return -ENOTEMPTY;
1142         }
1143
1144         r = mkdir_p(q, 0755);
1145         if (r < 0) {
1146                 log_error("Failed to create %s: %m", q);
1147                 return r;
1148         }
1149
1150         if (mount(p, q, "bind", MS_BIND, NULL) < 0) {
1151                 log_error("Failed to bind mount journal from host into guest: %m");
1152                 return -errno;
1153         }
1154
1155         return 0;
1156 }
1157
1158 static int setup_kdbus(const char *dest, const char *path) {
1159         const char *p;
1160
1161         if (!path)
1162                 return 0;
1163
1164         p = strappenda(dest, "/dev/kdbus");
1165         if (mkdir(p, 0755) < 0) {
1166                 log_error("Failed to create kdbus path: %m");
1167                 return  -errno;
1168         }
1169
1170         if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
1171                 log_error("Failed to mount kdbus domain path: %m");
1172                 return -errno;
1173         }
1174
1175         return 0;
1176 }
1177
1178 static int drop_capabilities(void) {
1179         return capability_bounding_set_drop(~arg_retain, false);
1180 }
1181
1182 static int register_machine(pid_t pid) {
1183         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1184         _cleanup_bus_unref_ sd_bus *bus = NULL;
1185         int r;
1186
1187         if (!arg_register)
1188                 return 0;
1189
1190         r = sd_bus_default_system(&bus);
1191         if (r < 0) {
1192                 log_error("Failed to open system bus: %s", strerror(-r));
1193                 return r;
1194         }
1195
1196         if (arg_keep_unit) {
1197                 r = sd_bus_call_method(
1198                                 bus,
1199                                 "org.freedesktop.machine1",
1200                                 "/org/freedesktop/machine1",
1201                                 "org.freedesktop.machine1.Manager",
1202                                 "RegisterMachine",
1203                                 &error,
1204                                 NULL,
1205                                 "sayssus",
1206                                 arg_machine,
1207                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1208                                 "nspawn",
1209                                 "container",
1210                                 (uint32_t) pid,
1211                                 strempty(arg_directory));
1212         } else {
1213                 _cleanup_bus_message_unref_ sd_bus_message *m = NULL;
1214
1215                 r = sd_bus_message_new_method_call(
1216                                 bus,
1217                                 &m,
1218                                 "org.freedesktop.machine1",
1219                                 "/org/freedesktop/machine1",
1220                                 "org.freedesktop.machine1.Manager",
1221                                 "CreateMachine");
1222                 if (r < 0) {
1223                         log_error("Failed to create message: %s", strerror(-r));
1224                         return r;
1225                 }
1226
1227                 r = sd_bus_message_append(
1228                                 m,
1229                                 "sayssus",
1230                                 arg_machine,
1231                                 SD_BUS_MESSAGE_APPEND_ID128(arg_uuid),
1232                                 "nspawn",
1233                                 "container",
1234                                 (uint32_t) pid,
1235                                 strempty(arg_directory));
1236                 if (r < 0) {
1237                         log_error("Failed to append message arguments: %s", strerror(-r));
1238                         return r;
1239                 }
1240
1241                 r = sd_bus_message_open_container(m, 'a', "(sv)");
1242                 if (r < 0) {
1243                         log_error("Failed to open container: %s", strerror(-r));
1244                         return r;
1245                 }
1246
1247                 if (!isempty(arg_slice)) {
1248                         r = sd_bus_message_append(m, "(sv)", "Slice", "s", arg_slice);
1249                         if (r < 0) {
1250                                 log_error("Failed to append slice: %s", strerror(-r));
1251                                 return r;
1252                         }
1253                 }
1254
1255                 r = sd_bus_message_append(m, "(sv)", "DevicePolicy", "s", "strict");
1256                 if (r < 0) {
1257                         log_error("Failed to add device policy: %s", strerror(-r));
1258                         return r;
1259                 }
1260
1261                 r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 8,
1262                                           /* Allow the container to
1263                                            * access and create the API
1264                                            * device nodes, so that
1265                                            * PrivateDevices= in the
1266                                            * container can work
1267                                            * fine */
1268                                           "/dev/null", "rwm",
1269                                           "/dev/zero", "rwm",
1270                                           "/dev/full", "rwm",
1271                                           "/dev/random", "rwm",
1272                                           "/dev/urandom", "rwm",
1273                                           "/dev/tty", "rwm",
1274                                           /* Allow the container
1275                                            * access to ptys. However,
1276                                            * do not permit the
1277                                            * container to ever create
1278                                            * these device nodes. */
1279                                           "/dev/pts/ptmx", "rw",
1280                                           "char-pts", "rw");
1281                 if (r < 0) {
1282                         log_error("Failed to add device whitelist: %s", strerror(-r));
1283                         return r;
1284                 }
1285
1286                 r = sd_bus_message_close_container(m);
1287                 if (r < 0) {
1288                         log_error("Failed to close container: %s", strerror(-r));
1289                         return r;
1290                 }
1291
1292                 r = sd_bus_call(bus, m, 0, &error, NULL);
1293         }
1294
1295         if (r < 0) {
1296                 log_error("Failed to register machine: %s", bus_error_message(&error, r));
1297                 return r;
1298         }
1299
1300         return 0;
1301 }
1302
1303 static int terminate_machine(pid_t pid) {
1304         _cleanup_bus_error_free_ sd_bus_error error = SD_BUS_ERROR_NULL;
1305         _cleanup_bus_message_unref_ sd_bus_message *reply = NULL;
1306         _cleanup_bus_unref_ sd_bus *bus = NULL;
1307         const char *path;
1308         int r;
1309
1310         if (!arg_register)
1311                 return 0;
1312
1313         r = sd_bus_default_system(&bus);
1314         if (r < 0) {
1315                 log_error("Failed to open system bus: %s", strerror(-r));
1316                 return r;
1317         }
1318
1319         r = sd_bus_call_method(
1320                         bus,
1321                         "org.freedesktop.machine1",
1322                         "/org/freedesktop/machine1",
1323                         "org.freedesktop.machine1.Manager",
1324                         "GetMachineByPID",
1325                         &error,
1326                         &reply,
1327                         "u",
1328                         (uint32_t) pid);
1329         if (r < 0) {
1330                 /* Note that the machine might already have been
1331                  * cleaned up automatically, hence don't consider it a
1332                  * failure if we cannot get the machine object. */
1333                 log_debug("Failed to get machine: %s", bus_error_message(&error, r));
1334                 return 0;
1335         }
1336
1337         r = sd_bus_message_read(reply, "o", &path);
1338         if (r < 0)
1339                 return bus_log_parse_error(r);
1340
1341         r = sd_bus_call_method(
1342                         bus,
1343                         "org.freedesktop.machine1",
1344                         path,
1345                         "org.freedesktop.machine1.Machine",
1346                         "Terminate",
1347                         &error,
1348                         NULL,
1349                         NULL);
1350         if (r < 0) {
1351                 log_debug("Failed to terminate machine: %s", bus_error_message(&error, r));
1352                 return 0;
1353         }
1354
1355         return 0;
1356 }
1357
1358 static int reset_audit_loginuid(void) {
1359         _cleanup_free_ char *p = NULL;
1360         int r;
1361
1362         if (arg_share_system)
1363                 return 0;
1364
1365         r = read_one_line_file("/proc/self/loginuid", &p);
1366         if (r == -ENOENT)
1367                 return 0;
1368         if (r < 0) {
1369                 log_error("Failed to read /proc/self/loginuid: %s", strerror(-r));
1370                 return r;
1371         }
1372
1373         /* Already reset? */
1374         if (streq(p, "4294967295"))
1375                 return 0;
1376
1377         r = write_string_file("/proc/self/loginuid", "4294967295");
1378         if (r < 0) {
1379                 log_error("Failed to reset audit login UID. This probably means that your kernel is too\n"
1380                           "old and you have audit enabled. Note that the auditing subsystem is known to\n"
1381                           "be incompatible with containers on old kernels. Please make sure to upgrade\n"
1382                           "your kernel or to off auditing with 'audit=0' on the kernel command line before\n"
1383                           "using systemd-nspawn. Sleeping for 5s... (%s)\n", strerror(-r));
1384
1385                 sleep(5);
1386         }
1387
1388         return 0;
1389 }
1390
1391 static int setup_veth(pid_t pid, char iface_name[IFNAMSIZ]) {
1392         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1393         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1394         int r;
1395
1396         if (!arg_private_network)
1397                 return 0;
1398
1399         if (!arg_network_veth)
1400                 return 0;
1401
1402         /* Use two different interface name prefixes depending whether
1403          * we are in bridge mode or not. */
1404         if (arg_network_bridge)
1405                 memcpy(iface_name, "vb-", 3);
1406         else
1407                 memcpy(iface_name, "ve-", 3);
1408
1409         strncpy(iface_name+3, arg_machine, IFNAMSIZ - 3);
1410
1411         r = sd_rtnl_open(&rtnl, 0);
1412         if (r < 0) {
1413                 log_error("Failed to connect to netlink: %s", strerror(-r));
1414                 return r;
1415         }
1416
1417         r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1418         if (r < 0) {
1419                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1420                 return r;
1421         }
1422
1423         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, iface_name);
1424         if (r < 0) {
1425                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1426                 return r;
1427         }
1428
1429         r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1430         if (r < 0) {
1431                 log_error("Failed to open netlink container: %s", strerror(-r));
1432                 return r;
1433         }
1434
1435         r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "veth");
1436         if (r < 0) {
1437                 log_error("Failed to append netlink kind: %s", strerror(-r));
1438                 return r;
1439         }
1440
1441         r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1442         if (r < 0) {
1443                 log_error("Failed to open netlink container: %s", strerror(-r));
1444                 return r;
1445         }
1446
1447         r = sd_rtnl_message_open_container(m, VETH_INFO_PEER);
1448         if (r < 0) {
1449                 log_error("Failed to open netlink container: %s", strerror(-r));
1450                 return r;
1451         }
1452
1453         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, "host0");
1454         if (r < 0) {
1455                 log_error("Failed to add netlink interface name: %s", strerror(-r));
1456                 return r;
1457         }
1458
1459         r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1460         if (r < 0) {
1461                 log_error("Failed to add netlink namespace field: %s", strerror(-r));
1462                 return r;
1463         }
1464
1465         r = sd_rtnl_message_close_container(m);
1466         if (r < 0) {
1467                 log_error("Failed to close netlink container: %s", strerror(-r));
1468                 return r;
1469         }
1470
1471         r = sd_rtnl_message_close_container(m);
1472         if (r < 0) {
1473                 log_error("Failed to close netlink container: %s", strerror(-r));
1474                 return r;
1475         }
1476
1477         r = sd_rtnl_message_close_container(m);
1478         if (r < 0) {
1479                 log_error("Failed to close netlink container: %s", strerror(-r));
1480                 return r;
1481         }
1482
1483         r = sd_rtnl_call(rtnl, m, 0, NULL);
1484         if (r < 0) {
1485                 log_error("Failed to add new veth interfaces: %s", strerror(-r));
1486                 return r;
1487         }
1488
1489         return 0;
1490 }
1491
1492 static int setup_bridge(const char veth_name[]) {
1493         _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1494         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1495         int r, bridge;
1496
1497         if (!arg_private_network)
1498                 return 0;
1499
1500         if (!arg_network_veth)
1501                 return 0;
1502
1503         if (!arg_network_bridge)
1504                 return 0;
1505
1506         bridge = (int) if_nametoindex(arg_network_bridge);
1507         if (bridge <= 0) {
1508                 log_error("Failed to resolve interface %s: %m", arg_network_bridge);
1509                 return -errno;
1510         }
1511
1512         r = sd_rtnl_open(&rtnl, 0);
1513         if (r < 0) {
1514                 log_error("Failed to connect to netlink: %s", strerror(-r));
1515                 return r;
1516         }
1517
1518         r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, 0);
1519         if (r < 0) {
1520                 log_error("Failed to allocate netlink message: %s", strerror(-r));
1521                 return r;
1522         }
1523
1524         r = sd_rtnl_message_append_string(m, IFLA_IFNAME, veth_name);
1525         if (r < 0) {
1526                 log_error("Failed to add netlink interface name field: %s", strerror(-r));
1527                 return r;
1528         }
1529
1530         r = sd_rtnl_message_append_u32(m, IFLA_MASTER, bridge);
1531         if (r < 0) {
1532                 log_error("Failed to add netlink master field: %s", strerror(-r));
1533                 return r;
1534         }
1535
1536         r = sd_rtnl_call(rtnl, m, 0, NULL);
1537         if (r < 0) {
1538                 log_error("Failed to add veth interface to bridge: %s", strerror(-r));
1539                 return r;
1540         }
1541
1542         return 0;
1543 }
1544
1545 static int parse_interface(struct udev *udev, const char *name) {
1546         _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1547         char ifi_str[2 + DECIMAL_STR_MAX(int)];
1548         int ifi;
1549
1550         ifi = (int) if_nametoindex(name);
1551         if (ifi <= 0) {
1552                 log_error("Failed to resolve interface %s: %m", name);
1553                 return -errno;
1554         }
1555
1556         sprintf(ifi_str, "n%i", ifi);
1557         d = udev_device_new_from_device_id(udev, ifi_str);
1558         if (!d) {
1559                 log_error("Failed to get udev device for interface %s: %m", name);
1560                 return -errno;
1561         }
1562
1563         if (udev_device_get_is_initialized(d) <= 0) {
1564                 log_error("Network interface %s is not initialized yet.", name);
1565                 return -EBUSY;
1566         }
1567
1568         return ifi;
1569 }
1570
1571 static int move_network_interfaces(pid_t pid) {
1572         _cleanup_udev_unref_ struct udev *udev = NULL;
1573         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1574         char **i;
1575         int r;
1576
1577         if (!arg_private_network)
1578                 return 0;
1579
1580         if (strv_isempty(arg_network_interfaces))
1581                 return 0;
1582
1583         r = sd_rtnl_open(&rtnl, 0);
1584         if (r < 0) {
1585                 log_error("Failed to connect to netlink: %s", strerror(-r));
1586                 return r;
1587         }
1588
1589         udev = udev_new();
1590         if (!udev) {
1591                 log_error("Failed to connect to udev.");
1592                 return -ENOMEM;
1593         }
1594
1595         STRV_FOREACH(i, arg_network_interfaces) {
1596                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1597                 int ifi;
1598
1599                 ifi = parse_interface(udev, *i);
1600                 if (ifi < 0)
1601                         return ifi;
1602
1603                 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
1604                 if (r < 0) {
1605                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1606                         return r;
1607                 }
1608
1609                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1610                 if (r < 0) {
1611                         log_error("Failed to append namespace PID to netlink message: %s", strerror(-r));
1612                         return r;
1613                 }
1614
1615                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1616                 if (r < 0) {
1617                         log_error("Failed to move interface %s to namespace: %s", *i, strerror(-r));
1618                         return r;
1619                 }
1620         }
1621
1622         return 0;
1623 }
1624
1625 static int setup_macvlan(pid_t pid) {
1626         _cleanup_udev_unref_ struct udev *udev = NULL;
1627         _cleanup_rtnl_unref_ sd_rtnl *rtnl = NULL;
1628         char **i;
1629         int r;
1630
1631         if (!arg_private_network)
1632                 return 0;
1633
1634         if (strv_isempty(arg_network_macvlan))
1635                 return 0;
1636
1637         r = sd_rtnl_open(&rtnl, 0);
1638         if (r < 0) {
1639                 log_error("Failed to connect to netlink: %s", strerror(-r));
1640                 return r;
1641         }
1642
1643         udev = udev_new();
1644         if (!udev) {
1645                 log_error("Failed to connect to udev.");
1646                 return -ENOMEM;
1647         }
1648
1649         STRV_FOREACH(i, arg_network_macvlan) {
1650                 _cleanup_rtnl_message_unref_ sd_rtnl_message *m = NULL;
1651                 _cleanup_free_ char *n = NULL;
1652                 int ifi;
1653
1654                 ifi = parse_interface(udev, *i);
1655                 if (ifi < 0)
1656                         return ifi;
1657
1658                 r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, 0);
1659                 if (r < 0) {
1660                         log_error("Failed to allocate netlink message: %s", strerror(-r));
1661                         return r;
1662                 }
1663
1664                 r = sd_rtnl_message_append_u32(m, IFLA_LINK, ifi);
1665                 if (r < 0) {
1666                         log_error("Failed to add netlink interface index: %s", strerror(-r));
1667                         return r;
1668                 }
1669
1670                 n = strappend("mv-", *i);
1671                 if (!n)
1672                         return log_oom();
1673
1674                 strshorten(n, IFNAMSIZ-1);
1675
1676                 r = sd_rtnl_message_append_string(m, IFLA_IFNAME, n);
1677                 if (r < 0) {
1678                         log_error("Failed to add netlink interface name: %s", strerror(-r));
1679                         return r;
1680                 }
1681
1682                 r = sd_rtnl_message_append_u32(m, IFLA_NET_NS_PID, pid);
1683                 if (r < 0) {
1684                         log_error("Failed to add netlink namespace field: %s", strerror(-r));
1685                         return r;
1686                 }
1687
1688                 r = sd_rtnl_message_open_container(m, IFLA_LINKINFO);
1689                 if (r < 0) {
1690                         log_error("Failed to open netlink container: %s", strerror(-r));
1691                         return r;
1692                 }
1693
1694                 r = sd_rtnl_message_append_string(m, IFLA_INFO_KIND, "macvlan");
1695                 if (r < 0) {
1696                         log_error("Failed to append netlink kind: %s", strerror(-r));
1697                         return r;
1698                 }
1699
1700                 r = sd_rtnl_message_open_container(m, IFLA_INFO_DATA);
1701                 if (r < 0) {
1702                         log_error("Failed to open netlink container: %s", strerror(-r));
1703                         return r;
1704                 }
1705
1706                 r = sd_rtnl_message_append_u32(m, IFLA_MACVLAN_MODE, MACVLAN_MODE_BRIDGE);
1707                 if (r < 0) {
1708                         log_error("Failed to append macvlan mode: %s", strerror(-r));
1709                         return r;
1710                 }
1711
1712                 r = sd_rtnl_message_close_container(m);
1713                 if (r < 0) {
1714                         log_error("Failed to close netlink container: %s", strerror(-r));
1715                         return r;
1716                 }
1717
1718                 r = sd_rtnl_message_close_container(m);
1719                 if (r < 0) {
1720                         log_error("Failed to close netlink container: %s", strerror(-r));
1721                         return r;
1722                 }
1723
1724                 r = sd_rtnl_call(rtnl, m, 0, NULL);
1725                 if (r < 0) {
1726                         log_error("Failed to add new macvlan interfaces: %s", strerror(-r));
1727                         return r;
1728                 }
1729         }
1730
1731         return 0;
1732 }
1733
1734 static int audit_still_doesnt_work_in_containers(void) {
1735
1736 #ifdef HAVE_SECCOMP
1737         scmp_filter_ctx seccomp;
1738         int r;
1739
1740         /*
1741            Audit is broken in containers, much of the userspace audit
1742            hookup will fail if running inside a container. We don't
1743            care and just turn off creation of audit sockets.
1744
1745            This will make socket(AF_NETLINK, *, NETLINK_AUDIT) fail
1746            with EAFNOSUPPORT which audit userspace uses as indication
1747            that audit is disabled in the kernel.
1748          */
1749
1750         seccomp = seccomp_init(SCMP_ACT_ALLOW);
1751         if (!seccomp)
1752                 return log_oom();
1753
1754         r = seccomp_add_secondary_archs(seccomp);
1755         if (r < 0) {
1756                 log_error("Failed to add secondary archs to seccomp filter: %s", strerror(-r));
1757                 goto finish;
1758         }
1759
1760         r = seccomp_rule_add(
1761                         seccomp,
1762                         SCMP_ACT_ERRNO(EAFNOSUPPORT),
1763                         SCMP_SYS(socket),
1764                         2,
1765                         SCMP_A0(SCMP_CMP_EQ, AF_NETLINK),
1766                         SCMP_A2(SCMP_CMP_EQ, NETLINK_AUDIT));
1767         if (r < 0) {
1768                 log_error("Failed to add audit seccomp rule: %s", strerror(-r));
1769                 goto finish;
1770         }
1771
1772         r = seccomp_attr_set(seccomp, SCMP_FLTATR_CTL_NNP, 0);
1773         if (r < 0) {
1774                 log_error("Failed to unset NO_NEW_PRIVS: %s", strerror(-r));
1775                 goto finish;
1776         }
1777
1778         r = seccomp_load(seccomp);
1779         if (r < 0)
1780                 log_error("Failed to install seccomp audit filter: %s", strerror(-r));
1781
1782 finish:
1783         seccomp_release(seccomp);
1784         return r;
1785 #else
1786         return 0;
1787 #endif
1788
1789 }
1790
1791 static int setup_image(char **device_path, int *loop_nr) {
1792         struct loop_info64 info = {
1793                 .lo_flags = LO_FLAGS_AUTOCLEAR|LO_FLAGS_PARTSCAN
1794         };
1795         _cleanup_close_ int fd = -1, control = -1, loop = -1;
1796         _cleanup_free_ char* loopdev = NULL;
1797         struct stat st;
1798         int r, nr;
1799
1800         assert(device_path);
1801         assert(loop_nr);
1802
1803         fd = open(arg_image, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1804         if (fd < 0) {
1805                 log_error("Failed to open %s: %m", arg_image);
1806                 return -errno;
1807         }
1808
1809         if (fstat(fd, &st) < 0) {
1810                 log_error("Failed to stat %s: %m", arg_image);
1811                 return -errno;
1812         }
1813
1814         if (S_ISBLK(st.st_mode)) {
1815                 char *p;
1816
1817                 p = strdup(arg_image);
1818                 if (!p)
1819                         return log_oom();
1820
1821                 *device_path = p;
1822
1823                 *loop_nr = -1;
1824
1825                 r = fd;
1826                 fd = -1;
1827
1828                 return r;
1829         }
1830
1831         if (!S_ISREG(st.st_mode)) {
1832                 log_error("%s is not a regular file or block device: %m", arg_image);
1833                 return -EINVAL;
1834         }
1835
1836         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
1837         if (control < 0) {
1838                 log_error("Failed to open /dev/loop-control: %m");
1839                 return -errno;
1840         }
1841
1842         nr = ioctl(control, LOOP_CTL_GET_FREE);
1843         if (nr < 0) {
1844                 log_error("Failed to allocate loop device: %m");
1845                 return -errno;
1846         }
1847
1848         if (asprintf(&loopdev, "/dev/loop%i", nr) < 0)
1849                 return log_oom();
1850
1851         loop = open(loopdev, O_CLOEXEC|(arg_read_only ? O_RDONLY : O_RDWR)|O_NONBLOCK|O_NOCTTY);
1852         if (loop < 0) {
1853                 log_error("Failed to open loop device %s: %m", loopdev);
1854                 return -errno;
1855         }
1856
1857         if (ioctl(loop, LOOP_SET_FD, fd) < 0) {
1858                 log_error("Failed to set loopback file descriptor on %s: %m", loopdev);
1859                 return -errno;
1860         }
1861
1862         if (arg_read_only)
1863                 info.lo_flags |= LO_FLAGS_READ_ONLY;
1864
1865         if (ioctl(loop, LOOP_SET_STATUS64, &info) < 0) {
1866                 log_error("Failed to set loopback settings on %s: %m", loopdev);
1867                 return -errno;
1868         }
1869
1870         *device_path = loopdev;
1871         loopdev = NULL;
1872
1873         *loop_nr = nr;
1874
1875         r = loop;
1876         loop = -1;
1877
1878         return r;
1879 }
1880
1881 static int dissect_image(
1882                 int fd,
1883                 char **root_device,
1884                 char **home_device,
1885                 char **srv_device,
1886                 bool *secondary) {
1887
1888 #ifdef HAVE_BLKID
1889         int home_nr = -1, root_nr = -1, secondary_root_nr = -1, srv_nr = -1;
1890         _cleanup_free_ char *home = NULL, *root = NULL, *secondary_root = NULL, *srv = NULL;
1891         _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
1892         _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1893         _cleanup_blkid_free_probe_ blkid_probe b = NULL;
1894         _cleanup_udev_unref_ struct udev *udev = NULL;
1895         struct udev_list_entry *first, *item;
1896         const char *pttype = NULL;
1897         blkid_partlist pl;
1898         struct stat st;
1899         int r;
1900
1901         assert(fd >= 0);
1902         assert(root_device);
1903         assert(home_device);
1904         assert(srv_device);
1905         assert(secondary);
1906
1907         b = blkid_new_probe();
1908         if (!b)
1909                 return log_oom();
1910
1911         errno = 0;
1912         r = blkid_probe_set_device(b, fd, 0, 0);
1913         if (r != 0) {
1914                 if (errno == 0)
1915                         return log_oom();
1916
1917                 log_error("Failed to set device on blkid probe: %m");
1918                 return -errno;
1919         }
1920
1921         blkid_probe_enable_partitions(b, 1);
1922         blkid_probe_set_partitions_flags(b, BLKID_PARTS_ENTRY_DETAILS);
1923
1924         errno = 0;
1925         r = blkid_do_safeprobe(b);
1926         if (r == -2 || r == 1) {
1927                 log_error("Failed to identify any partition table on %s.\n"
1928                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
1929                 return -EINVAL;
1930         } else if (r != 0) {
1931                 if (errno == 0)
1932                         errno = EIO;
1933                 log_error("Failed to probe: %m");
1934                 return -errno;
1935         }
1936
1937         blkid_probe_lookup_value(b, "PTTYPE", &pttype, NULL);
1938         if (!streq_ptr(pttype, "gpt")) {
1939                 log_error("Image %s does not carry a GUID Partition Table.\n"
1940                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
1941                 return -EINVAL;
1942         }
1943
1944         errno = 0;
1945         pl = blkid_probe_get_partitions(b);
1946         if (!pl) {
1947                 if (errno == 0)
1948                         return log_oom();
1949
1950                 log_error("Failed to list partitions of %s", arg_image);
1951                 return -errno;
1952         }
1953
1954         udev = udev_new();
1955         if (!udev)
1956                 return log_oom();
1957
1958         if (fstat(fd, &st) < 0) {
1959                 log_error("Failed to stat block device: %m");
1960                 return -errno;
1961         }
1962
1963         d = udev_device_new_from_devnum(udev, 'b', st.st_rdev);
1964         if (!d)
1965                 return log_oom();
1966
1967         e = udev_enumerate_new(udev);
1968         if (!e)
1969                 return log_oom();
1970
1971         r = udev_enumerate_add_match_parent(e, d);
1972         if (r < 0)
1973                 return log_oom();
1974
1975         r = udev_enumerate_scan_devices(e);
1976         if (r < 0) {
1977                 log_error("Failed to scan for partition devices of %s: %s", arg_image, strerror(-r));
1978                 return r;
1979         }
1980
1981         first = udev_enumerate_get_list_entry(e);
1982         udev_list_entry_foreach(item, first) {
1983                 _cleanup_udev_device_unref_ struct udev_device *q;
1984                 const char *stype, *node;
1985                 sd_id128_t type_id;
1986                 blkid_partition pp;
1987                 dev_t qn;
1988                 int nr;
1989
1990                 errno = 0;
1991                 q = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1992                 if (!q) {
1993                         if (!errno)
1994                                 errno = ENOMEM;
1995
1996                         log_error("Failed to get partition device of %s: %m", arg_image);
1997                         return -errno;
1998                 }
1999
2000                 qn = udev_device_get_devnum(q);
2001                 if (major(qn) == 0)
2002                         continue;
2003
2004                 if (st.st_rdev == qn)
2005                         continue;
2006
2007                 node = udev_device_get_devnode(q);
2008                 if (!node)
2009                         continue;
2010
2011                 pp = blkid_partlist_devno_to_partition(pl, qn);
2012                 if (!pp)
2013                         continue;
2014
2015                 nr = blkid_partition_get_partno(pp);
2016                 if (nr < 0)
2017                         continue;
2018
2019                 stype = blkid_partition_get_type_string(pp);
2020                 if (!stype)
2021                         continue;
2022
2023                 if (sd_id128_from_string(stype, &type_id) < 0)
2024                         continue;
2025
2026                 if (sd_id128_equal(type_id, GPT_HOME)) {
2027
2028                         if (home && nr >= home_nr)
2029                                 continue;
2030
2031                         home_nr = nr;
2032                         free(home);
2033                         home = strdup(node);
2034                         if (!home)
2035                                 return log_oom();
2036                 } else if (sd_id128_equal(type_id, GPT_SRV)) {
2037
2038                         if (srv && nr >= srv_nr)
2039                                 continue;
2040
2041                         srv_nr = nr;
2042                         free(srv);
2043                         srv = strdup(node);
2044                         if (!srv)
2045                                 return log_oom();
2046                 }
2047 #ifdef GPT_ROOT_NATIVE
2048                 else if (sd_id128_equal(type_id, GPT_ROOT_NATIVE)) {
2049
2050                         if (root && nr >= root_nr)
2051                                 continue;
2052
2053                         root_nr = nr;
2054                         free(root);
2055                         root = strdup(node);
2056                         if (!root)
2057                                 return log_oom();
2058                 }
2059 #endif
2060 #ifdef GPT_ROOT_SECONDARY
2061                 else if (sd_id128_equal(type_id, GPT_ROOT_SECONDARY)) {
2062
2063                         if (secondary_root && nr >= secondary_root_nr)
2064                                 continue;
2065
2066                         secondary_root_nr = nr;
2067                         free(secondary_root);
2068                         secondary_root = strdup(node);
2069                         if (!secondary_root)
2070                                 return log_oom();
2071                 }
2072 #endif
2073         }
2074
2075         if (!root && !secondary_root) {
2076                 log_error("Failed to identify root partition in disk image %s.\n"
2077                           "Note that the disk image needs to follow http://www.freedesktop.org/wiki/Specifications/DiscoverablePartitionsSpec/ to be supported by systemd-nspawn.", arg_image);
2078                 return -EINVAL;
2079         }
2080
2081         if (root) {
2082                 *root_device = root;
2083                 root = NULL;
2084                 *secondary = false;
2085         } else if (secondary_root) {
2086                 *root_device = secondary_root;
2087                 secondary_root = NULL;
2088                 *secondary = true;
2089         }
2090
2091         if (home) {
2092                 *home_device = home;
2093                 home = NULL;
2094         }
2095
2096         if (srv) {
2097                 *srv_device = srv;
2098                 srv = NULL;
2099         }
2100
2101         return 0;
2102 #else
2103         log_error("--image= is not supported, compiled without blkid support.");
2104         return -ENOTSUP;
2105 #endif
2106 }
2107
2108 static int mount_device(const char *what, const char *where, const char *directory) {
2109 #ifdef HAVE_BLKID
2110         _cleanup_blkid_free_probe_ blkid_probe b = NULL;
2111         const char *fstype, *p;
2112         int r;
2113
2114         assert(what);
2115         assert(where);
2116
2117         if (directory)
2118                 p = strappenda(where, directory);
2119         else
2120                 p = where;
2121
2122         errno = 0;
2123         b = blkid_new_probe_from_filename(what);
2124         if (!b) {
2125                 if (errno == 0)
2126                         return log_oom();
2127                 log_error("Failed to allocate prober for %s: %m", what);
2128                 return -errno;
2129         }
2130
2131         blkid_probe_enable_superblocks(b, 1);
2132         blkid_probe_set_superblocks_flags(b, BLKID_SUBLKS_TYPE);
2133
2134         errno = 0;
2135         r = blkid_do_safeprobe(b);
2136         if (r == -1 || r == 1) {
2137                 log_error("Cannot determine file system type of %s", what);
2138                 return -EINVAL;
2139         } else if (r != 0) {
2140                 if (errno == 0)
2141                         errno = EIO;
2142                 log_error("Failed to probe %s: %m", what);
2143                 return -errno;
2144         }
2145
2146         errno = 0;
2147         if (blkid_probe_lookup_value(b, "TYPE", &fstype, NULL) < 0) {
2148                 if (errno == 0)
2149                         errno = EINVAL;
2150                 log_error("Failed to determine file system type of %s", what);
2151                 return -errno;
2152         }
2153
2154         if (streq(fstype, "crypto_LUKS")) {
2155                 log_error("nspawn currently does not support LUKS disk images.");
2156                 return -ENOTSUP;
2157         }
2158
2159         if (mount(what, p, fstype, arg_read_only ? MS_NODEV|MS_RDONLY : 0, NULL) < 0) {
2160                 log_error("Failed to mount %s: %m", what);
2161                 return -errno;
2162         }
2163
2164         return 0;
2165 #else
2166         log_error("--image= is not supported, compiled without blkid support.");
2167         return -ENOTSUP;
2168 #endif
2169 }
2170
2171 static int mount_devices(const char *where, const char *root_device, const char *home_device, const char *srv_device) {
2172         int r;
2173
2174         assert(where);
2175
2176         if (root_device) {
2177                 r = mount_device(root_device, arg_directory, NULL);
2178                 if (r < 0) {
2179                         log_error("Failed to mount root directory: %s", strerror(-r));
2180                         return r;
2181                 }
2182         }
2183
2184         if (home_device) {
2185                 r = mount_device(home_device, arg_directory, "/home");
2186                 if (r < 0) {
2187                         log_error("Failed to mount home directory: %s", strerror(-r));
2188                         return r;
2189                 }
2190         }
2191
2192         if (srv_device) {
2193                 r = mount_device(srv_device, arg_directory, "/srv");
2194                 if (r < 0) {
2195                         log_error("Failed to mount server data directory: %s", strerror(-r));
2196                         return r;
2197                 }
2198         }
2199
2200         return 0;
2201 }
2202
2203 static void loop_remove(int nr, int *image_fd) {
2204         _cleanup_close_ int control = -1;
2205
2206         if (nr < 0)
2207                 return;
2208
2209         if (image_fd && *image_fd >= 0) {
2210                 ioctl(*image_fd, LOOP_CLR_FD);
2211                 close_nointr_nofail(*image_fd);
2212                 *image_fd = -1;
2213         }
2214
2215         control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
2216         if (control < 0)
2217                 return;
2218
2219         ioctl(control, LOOP_CTL_REMOVE, nr);
2220 }
2221
2222 int main(int argc, char *argv[]) {
2223
2224         _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
2225         _cleanup_close_ int master = -1, kdbus_fd = -1, sync_fd = -1, image_fd = -1;
2226         _cleanup_close_pipe_ int kmsg_socket_pair[2] = { -1, -1 };
2227         _cleanup_fdset_free_ FDSet *fds = NULL;
2228         int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
2229         const char *console = NULL;
2230         char veth_name[IFNAMSIZ];
2231         bool secondary = false;
2232         pid_t pid = 0;
2233         sigset_t mask;
2234
2235         log_parse_environment();
2236         log_open();
2237
2238         k = parse_argv(argc, argv);
2239         if (k < 0)
2240                 goto finish;
2241         else if (k == 0) {
2242                 r = EXIT_SUCCESS;
2243                 goto finish;
2244         }
2245
2246         if (!arg_image) {
2247                 if (arg_directory) {
2248                         char *p;
2249
2250                         p = path_make_absolute_cwd(arg_directory);
2251                         free(arg_directory);
2252                         arg_directory = p;
2253                 } else
2254                         arg_directory = get_current_dir_name();
2255
2256                 if (!arg_directory) {
2257                         log_error("Failed to determine path, please use -D.");
2258                         goto finish;
2259                 }
2260                 path_kill_slashes(arg_directory);
2261         }
2262
2263         if (!arg_machine) {
2264                 arg_machine = strdup(basename(arg_image ? arg_image : arg_directory));
2265                 if (!arg_machine) {
2266                         log_oom();
2267                         goto finish;
2268                 }
2269
2270                 hostname_cleanup(arg_machine, false);
2271                 if (isempty(arg_machine)) {
2272                         log_error("Failed to determine machine name automatically, please use -M.");
2273                         goto finish;
2274                 }
2275         }
2276
2277         if (geteuid() != 0) {
2278                 log_error("Need to be root.");
2279                 goto finish;
2280         }
2281
2282         if (sd_booted() <= 0) {
2283                 log_error("Not running on a systemd system.");
2284                 goto finish;
2285         }
2286
2287         log_close();
2288         n_fd_passed = sd_listen_fds(false);
2289         if (n_fd_passed > 0) {
2290                 k = fdset_new_listen_fds(&fds, false);
2291                 if (k < 0) {
2292                         log_error("Failed to collect file descriptors: %s", strerror(-k));
2293                         goto finish;
2294                 }
2295         }
2296         fdset_close_others(fds);
2297         log_open();
2298
2299         if (arg_directory) {
2300                 if (path_equal(arg_directory, "/")) {
2301                         log_error("Spawning container on root directory not supported.");
2302                         goto finish;
2303                 }
2304
2305                 if (arg_boot) {
2306                         if (path_is_os_tree(arg_directory) <= 0) {
2307                                 log_error("Directory %s doesn't look like an OS root directory (/etc/os-release is missing). Refusing.", arg_directory);
2308                                 goto finish;
2309                         }
2310                 } else {
2311                         const char *p;
2312
2313                         p = strappenda(arg_directory,
2314                                        argc > optind && path_is_absolute(argv[optind]) ? argv[optind] : "/usr/bin/");
2315                         if (access(p, F_OK) < 0) {
2316                                 log_error("Directory %s lacks the binary to execute or doesn't look like a binary tree. Refusing.", arg_directory);
2317                                 goto finish;
2318
2319                         }
2320                 }
2321         } else {
2322                 char template[] = "/tmp/nspawn-root-XXXXXX";
2323
2324                 if (!mkdtemp(template)) {
2325                         log_error("Failed to create temporary directory: %m");
2326                         r = -errno;
2327                         goto finish;
2328                 }
2329
2330                 arg_directory = strdup(template);
2331                 if (!arg_directory) {
2332                         r = log_oom();
2333                         goto finish;
2334                 }
2335
2336                 image_fd = setup_image(&device_path, &loop_nr);
2337                 if (image_fd < 0) {
2338                         r = image_fd;
2339                         goto finish;
2340                 }
2341
2342                 r = dissect_image(image_fd, &root_device, &home_device, &srv_device, &secondary);
2343                 if (r < 0)
2344                         goto finish;
2345         }
2346
2347         master = posix_openpt(O_RDWR|O_NOCTTY|O_CLOEXEC|O_NDELAY);
2348         if (master < 0) {
2349                 log_error("Failed to acquire pseudo tty: %m");
2350                 goto finish;
2351         }
2352
2353         console = ptsname(master);
2354         if (!console) {
2355                 log_error("Failed to determine tty name: %m");
2356                 goto finish;
2357         }
2358
2359         if (!arg_quiet)
2360                 log_info("Spawning container %s on %s. Press ^] three times within 1s to abort execution.", arg_machine, arg_image ? arg_image : arg_directory);
2361
2362         if (unlockpt(master) < 0) {
2363                 log_error("Failed to unlock tty: %m");
2364                 goto finish;
2365         }
2366
2367         if (access("/dev/kdbus/control", F_OK) >= 0) {
2368
2369                 if (arg_share_system) {
2370                         kdbus_domain = strdup("/dev/kdbus");
2371                         if (!kdbus_domain) {
2372                                 log_oom();
2373                                 goto finish;
2374                         }
2375                 } else {
2376                         const char *ns;
2377
2378                         ns = strappenda("machine-", arg_machine);
2379                         kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
2380                         if (r < 0)
2381                                 log_debug("Failed to create kdbus domain: %s", strerror(-r));
2382                         else
2383                                 log_debug("Successfully created kdbus domain as %s", kdbus_domain);
2384                 }
2385         }
2386
2387         if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
2388                 log_error("Failed to create kmsg socket pair: %m");
2389                 goto finish;
2390         }
2391
2392         sd_notify(0, "READY=1");
2393
2394         assert_se(sigemptyset(&mask) == 0);
2395         sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
2396         assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
2397
2398         for (;;) {
2399                 siginfo_t status;
2400
2401                 sync_fd = eventfd(0, EFD_CLOEXEC);
2402                 if (sync_fd < 0) {
2403                         log_error("Failed to create event fd: %m");
2404                         goto finish;
2405                 }
2406
2407                 pid = syscall(__NR_clone,
2408                               SIGCHLD|CLONE_NEWNS|
2409                               (arg_share_system ? 0 : CLONE_NEWIPC|CLONE_NEWPID|CLONE_NEWUTS)|
2410                               (arg_private_network ? CLONE_NEWNET : 0), NULL);
2411                 if (pid < 0) {
2412                         if (errno == EINVAL)
2413                                 log_error("clone() failed, do you have namespace support enabled in your kernel? (You need UTS, IPC, PID and NET namespacing built in): %m");
2414                         else
2415                                 log_error("clone() failed: %m");
2416
2417                         goto finish;
2418                 }
2419
2420                 if (pid == 0) {
2421                         /* child */
2422                         const char *home = NULL;
2423                         uid_t uid = (uid_t) -1;
2424                         gid_t gid = (gid_t) -1;
2425                         unsigned n_env = 2;
2426                         const char *envp[] = {
2427                                 "PATH=" DEFAULT_PATH_SPLIT_USR,
2428                                 "container=systemd-nspawn", /* LXC sets container=lxc, so follow the scheme here */
2429                                 NULL, /* TERM */
2430                                 NULL, /* HOME */
2431                                 NULL, /* USER */
2432                                 NULL, /* LOGNAME */
2433                                 NULL, /* container_uuid */
2434                                 NULL, /* LISTEN_FDS */
2435                                 NULL, /* LISTEN_PID */
2436                                 NULL
2437                         };
2438                         char **env_use;
2439                         eventfd_t x;
2440
2441                         envp[n_env] = strv_find_prefix(environ, "TERM=");
2442                         if (envp[n_env])
2443                                 n_env ++;
2444
2445                         close_nointr_nofail(master);
2446                         master = -1;
2447
2448                         close_nointr(STDIN_FILENO);
2449                         close_nointr(STDOUT_FILENO);
2450                         close_nointr(STDERR_FILENO);
2451
2452                         close_nointr_nofail(kmsg_socket_pair[0]);
2453                         kmsg_socket_pair[0] = -1;
2454
2455                         reset_all_signal_handlers();
2456
2457                         assert_se(sigemptyset(&mask) == 0);
2458                         assert_se(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
2459
2460                         k = open_terminal(console, O_RDWR);
2461                         if (k != STDIN_FILENO) {
2462                                 if (k >= 0) {
2463                                         close_nointr_nofail(k);
2464                                         k = -EINVAL;
2465                                 }
2466
2467                                 log_error("Failed to open console: %s", strerror(-k));
2468                                 goto child_fail;
2469                         }
2470
2471                         if (dup2(STDIN_FILENO, STDOUT_FILENO) != STDOUT_FILENO ||
2472                             dup2(STDIN_FILENO, STDERR_FILENO) != STDERR_FILENO) {
2473                                 log_error("Failed to duplicate console: %m");
2474                                 goto child_fail;
2475                         }
2476
2477                         if (setsid() < 0) {
2478                                 log_error("setsid() failed: %m");
2479                                 goto child_fail;
2480                         }
2481
2482                         if (reset_audit_loginuid() < 0)
2483                                 goto child_fail;
2484
2485                         if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) {
2486                                 log_error("PR_SET_PDEATHSIG failed: %m");
2487                                 goto child_fail;
2488                         }
2489
2490                         /* Mark everything as slave, so that we still
2491                          * receive mounts from the real root, but don't
2492                          * propagate mounts to the real root. */
2493                         if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
2494                                 log_error("MS_SLAVE|MS_REC failed: %m");
2495                                 goto child_fail;
2496                         }
2497
2498                         if (mount_devices(arg_directory, root_device, home_device, srv_device) < 0)
2499                                 goto child_fail;
2500
2501                         /* Turn directory into bind mount */
2502                         if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REC, NULL) < 0) {
2503                                 log_error("Failed to make bind mount.");
2504                                 goto child_fail;
2505                         }
2506
2507                         if (arg_read_only)
2508                                 if (mount(arg_directory, arg_directory, "bind", MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL) < 0) {
2509                                         log_error("Failed to make read-only.");
2510                                         goto child_fail;
2511                                 }
2512
2513                         if (mount_all(arg_directory) < 0)
2514                                 goto child_fail;
2515
2516                         if (copy_devnodes(arg_directory) < 0)
2517                                 goto child_fail;
2518
2519                         if (setup_ptmx(arg_directory) < 0)
2520                                 goto child_fail;
2521
2522                         dev_setup(arg_directory);
2523
2524                         if (audit_still_doesnt_work_in_containers() < 0)
2525                                 goto child_fail;
2526
2527                         if (setup_dev_console(arg_directory, console) < 0)
2528                                 goto child_fail;
2529
2530                         if (setup_kmsg(arg_directory, kmsg_socket_pair[1]) < 0)
2531                                 goto child_fail;
2532
2533                         close_nointr_nofail(kmsg_socket_pair[1]);
2534                         kmsg_socket_pair[1] = -1;
2535
2536                         if (setup_boot_id(arg_directory) < 0)
2537                                 goto child_fail;
2538
2539                         if (setup_timezone(arg_directory) < 0)
2540                                 goto child_fail;
2541
2542                         if (setup_resolv_conf(arg_directory) < 0)
2543                                 goto child_fail;
2544
2545                         if (setup_journal(arg_directory) < 0)
2546                                 goto child_fail;
2547
2548                         if (mount_binds(arg_directory, arg_bind, 0) < 0)
2549                                 goto child_fail;
2550
2551                         if (mount_binds(arg_directory, arg_bind_ro, MS_RDONLY) < 0)
2552                                 goto child_fail;
2553
2554                         if (setup_kdbus(arg_directory, kdbus_domain) < 0)
2555                                 goto child_fail;
2556
2557                         if (chdir(arg_directory) < 0) {
2558                                 log_error("chdir(%s) failed: %m", arg_directory);
2559                                 goto child_fail;
2560                         }
2561
2562                         if (mount(arg_directory, "/", NULL, MS_MOVE, NULL) < 0) {
2563                                 log_error("mount(MS_MOVE) failed: %m");
2564                                 goto child_fail;
2565                         }
2566
2567                         if (chroot(".") < 0) {
2568                                 log_error("chroot() failed: %m");
2569                                 goto child_fail;
2570                         }
2571
2572                         if (chdir("/") < 0) {
2573                                 log_error("chdir() failed: %m");
2574                                 goto child_fail;
2575                         }
2576
2577                         umask(0022);
2578
2579                         if (arg_private_network)
2580                                 loopback_setup();
2581
2582                         if (drop_capabilities() < 0) {
2583                                 log_error("drop_capabilities() failed: %m");
2584                                 goto child_fail;
2585                         }
2586
2587                         if (arg_user) {
2588
2589                                 /* Note that this resolves user names
2590                                  * inside the container, and hence
2591                                  * accesses the NSS modules from the
2592                                  * container and not the host. This is
2593                                  * a bit weird... */
2594
2595                                 if (get_user_creds((const char**)&arg_user, &uid, &gid, &home, NULL) < 0) {
2596                                         log_error("get_user_creds() failed: %m");
2597                                         goto child_fail;
2598                                 }
2599
2600                                 if (mkdir_parents_label(home, 0775) < 0) {
2601                                         log_error("mkdir_parents_label() failed: %m");
2602                                         goto child_fail;
2603                                 }
2604
2605                                 if (mkdir_safe_label(home, 0775, uid, gid) < 0) {
2606                                         log_error("mkdir_safe_label() failed: %m");
2607                                         goto child_fail;
2608                                 }
2609
2610                                 if (initgroups((const char*)arg_user, gid) < 0) {
2611                                         log_error("initgroups() failed: %m");
2612                                         goto child_fail;
2613                                 }
2614
2615                                 if (setresgid(gid, gid, gid) < 0) {
2616                                         log_error("setregid() failed: %m");
2617                                         goto child_fail;
2618                                 }
2619
2620                                 if (setresuid(uid, uid, uid) < 0) {
2621                                         log_error("setreuid() failed: %m");
2622                                         goto child_fail;
2623                                 }
2624                         } else {
2625                                 /* Reset everything fully to 0, just in case */
2626
2627                                 if (setgroups(0, NULL) < 0) {
2628                                         log_error("setgroups() failed: %m");
2629                                         goto child_fail;
2630                                 }
2631
2632                                 if (setresgid(0, 0, 0) < 0) {
2633                                         log_error("setregid() failed: %m");
2634                                         goto child_fail;
2635                                 }
2636
2637                                 if (setresuid(0, 0, 0) < 0) {
2638                                         log_error("setreuid() failed: %m");
2639                                         goto child_fail;
2640                                 }
2641                         }
2642
2643                         if ((asprintf((char**)(envp + n_env++), "HOME=%s", home ? home: "/root") < 0) ||
2644                             (asprintf((char**)(envp + n_env++), "USER=%s", arg_user ? arg_user : "root") < 0) ||
2645                             (asprintf((char**)(envp + n_env++), "LOGNAME=%s", arg_user ? arg_user : "root") < 0)) {
2646                                 log_oom();
2647                                 goto child_fail;
2648                         }
2649
2650                         if (!sd_id128_equal(arg_uuid, SD_ID128_NULL)) {
2651                                 if (asprintf((char**)(envp + n_env++), "container_uuid=" SD_ID128_FORMAT_STR, SD_ID128_FORMAT_VAL(arg_uuid)) < 0) {
2652                                         log_oom();
2653                                         goto child_fail;
2654                                 }
2655                         }
2656
2657                         if (fdset_size(fds) > 0) {
2658                                 k = fdset_cloexec(fds, false);
2659                                 if (k < 0) {
2660                                         log_error("Failed to unset O_CLOEXEC for file descriptors.");
2661                                         goto child_fail;
2662                                 }
2663
2664                                 if ((asprintf((char **)(envp + n_env++), "LISTEN_FDS=%u", n_fd_passed) < 0) ||
2665                                     (asprintf((char **)(envp + n_env++), "LISTEN_PID=1") < 0)) {
2666                                         log_oom();
2667                                         goto child_fail;
2668                                 }
2669                         }
2670
2671                         setup_hostname();
2672
2673                         if (arg_personality != 0xffffffffLU) {
2674                                 if (personality(arg_personality) < 0) {
2675                                         log_error("personality() failed: %m");
2676                                         goto child_fail;
2677                                 }
2678                         } else if (secondary) {
2679                                 if (personality(PER_LINUX32) < 0) {
2680                                         log_error("personality() failed: %m");
2681                                         goto child_fail;
2682                                 }
2683                         }
2684
2685                         eventfd_read(sync_fd, &x);
2686                         close_nointr_nofail(sync_fd);
2687                         sync_fd = -1;
2688
2689                         if (!strv_isempty(arg_setenv)) {
2690                                 char **n;
2691
2692                                 n = strv_env_merge(2, envp, arg_setenv);
2693                                 if (!n) {
2694                                         log_oom();
2695                                         goto child_fail;
2696                                 }
2697
2698                                 env_use = n;
2699                         } else
2700                                 env_use = (char**) envp;
2701
2702 #ifdef HAVE_SELINUX
2703                         if (arg_selinux_context)
2704                                 if (setexeccon((security_context_t) arg_selinux_context) < 0)
2705                                         log_error("setexeccon(\"%s\") failed: %m", arg_selinux_context);
2706 #endif
2707                         if (arg_boot) {
2708                                 char **a;
2709                                 size_t l;
2710
2711                                 /* Automatically search for the init system */
2712
2713                                 l = 1 + argc - optind;
2714                                 a = newa(char*, l + 1);
2715                                 memcpy(a + 1, argv + optind, l * sizeof(char*));
2716
2717                                 a[0] = (char*) "/usr/lib/systemd/systemd";
2718                                 execve(a[0], a, env_use);
2719
2720                                 a[0] = (char*) "/lib/systemd/systemd";
2721                                 execve(a[0], a, env_use);
2722
2723                                 a[0] = (char*) "/sbin/init";
2724                                 execve(a[0], a, env_use);
2725                         } else if (argc > optind)
2726                                 execvpe(argv[optind], argv + optind, env_use);
2727                         else {
2728                                 chdir(home ? home : "/root");
2729                                 execle("/bin/bash", "-bash", NULL, env_use);
2730                                 execle("/bin/sh", "-sh", NULL, env_use);
2731                         }
2732
2733                         log_error("execv() failed: %m");
2734
2735                 child_fail:
2736                         _exit(EXIT_FAILURE);
2737                 }
2738
2739                 fdset_free(fds);
2740                 fds = NULL;
2741
2742                 r = register_machine(pid);
2743                 if (r < 0)
2744                         goto finish;
2745
2746                 r = move_network_interfaces(pid);
2747                 if (r < 0)
2748                         goto finish;
2749
2750                 r = setup_veth(pid, veth_name);
2751                 if (r < 0)
2752                         goto finish;
2753
2754                 r = setup_bridge(veth_name);
2755                 if (r < 0)
2756                         goto finish;
2757
2758                 r = setup_macvlan(pid);
2759                 if (r < 0)
2760                         goto finish;
2761
2762                 eventfd_write(sync_fd, 1);
2763                 close_nointr_nofail(sync_fd);
2764                 sync_fd = -1;
2765
2766                 k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
2767                 if (k < 0) {
2768                         r = EXIT_FAILURE;
2769                         break;
2770                 }
2771
2772                 if (!arg_quiet)
2773                         putc('\n', stdout);
2774
2775                 /* Kill if it is not dead yet anyway */
2776                 terminate_machine(pid);
2777
2778                 /* Redundant, but better safe than sorry */
2779                 kill(pid, SIGKILL);
2780
2781                 k = wait_for_terminate(pid, &status);
2782                 pid = 0;
2783
2784                 if (k < 0) {
2785                         r = EXIT_FAILURE;
2786                         break;
2787                 }
2788
2789                 if (status.si_code == CLD_EXITED) {
2790                         r = status.si_status;
2791                         if (status.si_status != 0) {
2792                                 log_error("Container %s failed with error code %i.", arg_machine, status.si_status);
2793                                 break;
2794                         }
2795
2796                         if (!arg_quiet)
2797                                 log_debug("Container %s exited successfully.", arg_machine);
2798                         break;
2799                 } else if (status.si_code == CLD_KILLED &&
2800                            status.si_status == SIGINT) {
2801
2802                         if (!arg_quiet)
2803                                 log_info("Container %s has been shut down.", arg_machine);
2804                         r = 0;
2805                         break;
2806                 } else if (status.si_code == CLD_KILLED &&
2807                            status.si_status == SIGHUP) {
2808
2809                         if (!arg_quiet)
2810                                 log_info("Container %s is being rebooted.", arg_machine);
2811                         continue;
2812                 } else if (status.si_code == CLD_KILLED ||
2813                            status.si_code == CLD_DUMPED) {
2814
2815                         log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
2816                         r = EXIT_FAILURE;
2817                         break;
2818                 } else {
2819                         log_error("Container %s failed due to unknown reason.", arg_machine);
2820                         r = EXIT_FAILURE;
2821                         break;
2822                 }
2823         }
2824
2825 finish:
2826         loop_remove(loop_nr, &image_fd);
2827
2828         if (pid > 0)
2829                 kill(pid, SIGKILL);
2830
2831         free(arg_directory);
2832         free(arg_machine);
2833         free(arg_user);
2834         strv_free(arg_setenv);
2835         strv_free(arg_network_interfaces);
2836         strv_free(arg_network_macvlan);
2837         strv_free(arg_bind);
2838         strv_free(arg_bind_ro);
2839
2840         return r;
2841 }