#include "copy.h"
#include "base-filesystem.h"
#include "barrier.h"
+#include "event-util.h"
#ifdef HAVE_SECCOMP
#include "seccomp-util.h"
static bool arg_read_only = false;
static bool arg_boot = false;
static LinkJournal arg_link_journal = LINK_AUTO;
+static bool arg_link_journal_try = false;
static uint64_t arg_retain =
(1ULL << CAP_CHOWN) |
(1ULL << CAP_DAC_OVERRIDE) |
" --capability=CAP In addition to the default, retain specified\n"
" capability\n"
" --drop-capability=CAP Drop the specified capability from the default set\n"
- " --link-journal=MODE Link up guest journal, one of no, auto, guest, host\n"
- " -j Equivalent to --link-journal=host\n"
+ " --link-journal=MODE Link up guest journal, one of no, auto, guest, host,\n"
+ " try-guest, try-host\n"
+ " -j Equivalent to --link-journal=try-guest\n"
" --read-only Mount the root directory read-only\n"
" --bind=PATH[:PATH] Bind mount a file or directory from the host into\n"
" the container\n"
case 'j':
arg_link_journal = LINK_GUEST;
+ arg_link_journal_try = true;
break;
case ARG_LINK_JOURNAL:
arg_link_journal = LINK_GUEST;
else if (streq(optarg, "host"))
arg_link_journal = LINK_HOST;
- else {
+ else if (streq(optarg, "try-guest")) {
+ arg_link_journal = LINK_GUEST;
+ arg_link_journal_try = true;
+ } else if (streq(optarg, "try-host")) {
+ arg_link_journal = LINK_HOST;
+ arg_link_journal_try = true;
+ } else {
log_error("Failed to parse link journal mode %s", optarg);
return -EINVAL;
}
if (mount_table[k].what && t > 0)
continue;
- mkdir_p(where, 0755);
+ t = mkdir_p(where, 0755);
+ if (t < 0) {
+ if (mount_table[k].fatal) {
+ log_error("Failed to create directory %s: %s", where, strerror(-t));
+
+ if (r == 0)
+ r = t;
+ } else
+ log_warning("Failed to create directory %s: %s", where, strerror(-t));
+
+ continue;
+ }
#ifdef HAVE_SELINUX
if (arg_selinux_apifs_context &&
where,
mount_table[k].type,
mount_table[k].flags,
- o) < 0 &&
- mount_table[k].fatal) {
+ o) < 0) {
- log_error("mount(%s) failed: %m", where);
+ if (mount_table[k].fatal) {
+ log_error("mount(%s) failed: %m", where);
- if (r == 0)
- r = -errno;
+ if (r == 0)
+ r = -errno;
+ } else
+ log_warning("mount(%s) failed: %m", where);
}
}
/* Create the mount point, but be conservative -- refuse to create block
* and char devices. */
- if (S_ISDIR(source_st.st_mode))
- mkdir_label(where, 0755);
- else if (S_ISFIFO(source_st.st_mode))
- mkfifo(where, 0644);
- else if (S_ISSOCK(source_st.st_mode))
- mknod(where, 0644 | S_IFSOCK, 0);
- else if (S_ISREG(source_st.st_mode))
- touch(where);
- else {
+ if (S_ISDIR(source_st.st_mode)) {
+ r = mkdir_label(where, 0755);
+ if (r < 0 && errno != EEXIST) {
+ log_error("Failed to create mount point %s: %s", where, strerror(-r));
+
+ return r;
+ }
+ } else if (S_ISFIFO(source_st.st_mode)) {
+ r = mkfifo(where, 0644);
+ if (r < 0 && errno != EEXIST) {
+ log_error("Failed to create mount point %s: %m", where);
+
+ return -errno;
+ }
+ } else if (S_ISSOCK(source_st.st_mode)) {
+ r = mknod(where, 0644 | S_IFSOCK, 0);
+ if (r < 0 && errno != EEXIST) {
+ log_error("Failed to create mount point %s: %m", where);
+
+ return -errno;
+ }
+ } else if (S_ISREG(source_st.st_mode)) {
+ r = touch(where);
+ if (r < 0) {
+ log_error("Failed to create mount point %s: %s", where, strerror(-r));
+
+ return r;
+ }
+ } else {
log_error("Refusing to create mountpoint for file: %s", *x);
return -ENOTSUP;
}
STRV_FOREACH_PAIR(i, o, arg_tmpfs) {
_cleanup_free_ char *where = NULL;
+ int r;
where = strappend(dest, *i);
if (!where)
return log_oom();
- mkdir_label(where, 0755);
+ r = mkdir_label(where, 0755);
+ if (r < 0 && errno != EEXIST) {
+ log_error("creating mount point for tmpfs %s failed: %s", where, strerror(-r));
+
+ return r;
+ }
if (mount("tmpfs", where, "tmpfs", MS_NODEV|MS_STRICTATIME, *o) < 0) {
log_error("tmpfs mount to %s failed: %m", where);
if (!what)
return log_oom();
- mkdir_parents(where, 0755);
- unlink(where);
+ r = mkdir_parents(where, 0755);
+ if (r < 0) {
+ log_error("Failed to create directory for timezone info %s in container: %s", where, strerror(-r));
+
+ return 0;
+ }
+
+ r = unlink(where);
+ if (r < 0 && errno != ENOENT) {
+ log_error("Failed to remove existing timezone info %s in container: %m", where);
+
+ return 0;
+ }
if (symlink(what, where) < 0) {
log_error("Failed to correct timezone of container: %m");
static int setup_resolv_conf(const char *dest) {
_cleanup_free_ char *where = NULL;
+ int r;
assert(dest);
/* We don't really care for the results of this really. If it
* fails, it fails, but meh... */
- mkdir_parents(where, 0755);
- copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
+ r = mkdir_parents(where, 0755);
+ if (r < 0) {
+ log_warning("Failed to create parent directory for resolv.conf %s: %s", where, strerror(-r));
+
+ return 0;
+ }
+
+ r = copy_file("/etc/resolv.conf", where, O_TRUNC|O_NOFOLLOW, 0644);
+ if (r < 0) {
+ log_warning("Failed to copy /etc/resolv.conf to %s: %s", where, strerror(-r));
+
+ return 0;
+ }
return 0;
}
}
p = strappenda(directory, "/var");
- mkdir(p, 0755);
+ r = mkdir(p, 0755);
+ if (r < 0 && errno != EEXIST) {
+ log_error("Failed to create %s: %m", directory);
+ return -errno;
+ }
if (mount("tmpfs", p, "tmpfs", MS_STRICTATIME, "mode=755") < 0) {
log_error("Failed to mount tmpfs to /var: %m");
f = strappenda(directory, "/usr");
t = strappenda(template, "/usr");
- mkdir(t, 0755);
+ r = mkdir(t, 0755);
+ if (r < 0 && errno != EEXIST) {
+ log_error("Failed to create %s: %m", t);
+ r = -errno;
+ goto fail;
+ }
+
if (mount(f, t, "bind", MS_BIND|MS_REC, NULL) < 0) {
log_error("Failed to create /usr bind mount: %m");
r = -errno;
"full\0"
"random\0"
"urandom\0"
- "tty\0";
+ "tty\0"
+ "net/tun\0";
const char *d;
int r = 0;
log_error("%s is not a char or block device, cannot copy", from);
return -EIO;
- } else if (mknod(to, st.st_mode, st.st_rdev) < 0) {
+ } else {
+ r = mkdir_parents(to, 0775);
+ if (r < 0) {
+ log_error("Failed to create parent directory of %s: %s", to, strerror(-r));
+ return -r;
+ }
- log_error("mknod(%s) failed: %m", dest);
- return -errno;
+ if (mknod(to, st.st_mode, st.st_rdev) < 0) {
+ log_error("mknod(%s) failed: %m", dest);
+ return -errno;
+ }
}
}
if (arg_share_system)
return 0;
- if (sethostname(arg_machine, strlen(arg_machine)) < 0)
+ if (sethostname_idempotent(arg_machine) < 0)
return -errno;
return 0;
r = mkdir_p(q, 0755);
if (r < 0)
- log_warning("failed to create directory %s: %m", q);
+ log_warning("Failed to create directory %s: %m", q);
return 0;
}
if (arg_link_journal == LINK_GUEST) {
if (symlink(q, p) < 0) {
- log_error("Failed to symlink %s to %s: %m", q, p);
- return -errno;
+ if (arg_link_journal_try) {
+ log_debug("Failed to symlink %s to %s, skipping journal setup: %m", q, p);
+ return 0;
+ } else {
+ log_error("Failed to symlink %s to %s: %m", q, p);
+ return -errno;
+ }
}
r = mkdir_p(q, 0755);
if (r < 0)
- log_warning("failed to create directory %s: %m", q);
+ log_warning("Failed to create directory %s: %m", q);
return 0;
}
if (arg_link_journal == LINK_HOST) {
- r = mkdir_p(p, 0755);
+ /* don't create parents here -- if the host doesn't have
+ * permanent journal set up, don't force it here */
+ r = mkdir(p, 0755);
if (r < 0) {
- log_error("Failed to create %s: %m", p);
- return r;
+ if (arg_link_journal_try) {
+ log_debug("Failed to create %s, skipping journal setup: %m", p);
+ return 0;
+ } else {
+ log_error("Failed to create %s: %m", p);
+ return r;
+ }
}
} else if (access(p, F_OK) < 0)
return 0;
}
-static int setup_kdbus(const char *dest, const char *path) {
- const char *p;
-
- if (!path)
- return 0;
-
- p = strappenda(dest, "/dev/kdbus");
- if (mkdir(p, 0755) < 0) {
- log_error("Failed to create kdbus path: %m");
- return -errno;
- }
-
- if (mount(path, p, "bind", MS_BIND, NULL) < 0) {
- log_error("Failed to mount kdbus domain path: %m");
- return -errno;
- }
-
- return 0;
-}
-
static int drop_capabilities(void) {
return capability_bounding_set_drop(~arg_retain, false);
}
return r;
}
- r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 10,
+ r = sd_bus_message_append(m, "(sv)", "DeviceAllow", "a(ss)", 9,
/* Allow the container to
* access and create the API
* device nodes, so that
"/dev/random", "rwm",
"/dev/urandom", "rwm",
"/dev/tty", "rwm",
+ "/dev/net/tun", "rwm",
/* Allow the container
* access to ptys. However,
* do not permit the
* container to ever create
* these device nodes. */
"/dev/pts/ptmx", "rw",
- "char-pts", "rw",
- /* Allow the container
- * access to all kdbus
- * devices. Again, the
- * container cannot create
- * these nodes, only use
- * them. We use a pretty
- * open match here, so that
- * the kernel API can still
- * change. */
- "char-kdbus", "rw",
- "char-kdbus/*", "rw");
+ "char-pts", "rw");
if (r < 0) {
log_error("Failed to add device whitelist: %s", strerror(-r));
return r;
/* Use two different interface name prefixes depending whether
* we are in bridge mode or not. */
- snprintf(iface_name, IFNAMSIZ, "%s-%s",
+ snprintf(iface_name, IFNAMSIZ - 1, "%s-%s",
arg_network_bridge ? "vb" : "ve", arg_machine);
r = generate_mac(&mac_container, CONTAINER_HASH_KEY);
if (ifi < 0)
return ifi;
- r = sd_rtnl_message_new_link(rtnl, &m, RTM_NEWLINK, ifi);
+ r = sd_rtnl_message_new_link(rtnl, &m, RTM_SETLINK, ifi);
if (r < 0) {
log_error("Failed to allocate netlink message: %s", strerror(-r));
return r;
static void loop_remove(int nr, int *image_fd) {
_cleanup_close_ int control = -1;
+ int r;
if (nr < 0)
return;
if (image_fd && *image_fd >= 0) {
- ioctl(*image_fd, LOOP_CLR_FD);
+ r = ioctl(*image_fd, LOOP_CLR_FD);
+ if (r < 0)
+ log_warning("Failed to close loop image: %m");
*image_fd = safe_close(*image_fd);
}
control = open("/dev/loop-control", O_RDWR|O_CLOEXEC|O_NOCTTY|O_NONBLOCK);
- if (control < 0)
+ if (control < 0) {
+ log_warning("Failed to open /dev/loop-control: %m");
return;
+ }
- ioctl(control, LOOP_CTL_REMOVE, nr);
+ r = ioctl(control, LOOP_CTL_REMOVE, nr);
+ if (r < 0)
+ log_warning("Failed to remove loop %d: %m", nr);
}
static int spawn_getent(const char *database, const char *key, pid_t *rpid) {
* container argument.
* > 0 : The program executed in the container terminated with an
* error. The exit code of the program executed in the
- * container is returned. No change is made to the container
- * argument.
+ * container is returned. The container argument has been set
+ * to CONTAINER_TERMINATED.
* 0 : The container is being rebooted, has been shut down or exited
* successfully. The container argument has been set to either
* CONTAINER_TERMINATED or CONTAINER_REBOOTED.
* error is indicated by a non-zero value.
*/
static int wait_for_container(pid_t pid, ContainerStatus *container) {
- int r;
siginfo_t status;
+ int r;
r = wait_for_terminate(pid, &status);
if (r < 0) {
}
switch (status.si_code) {
+
case CLD_EXITED:
- r = status.si_status;
- if (r == 0) {
- if (!arg_quiet)
- log_debug("Container %s exited successfully.",
- arg_machine);
+ if (status.si_status == 0) {
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s exited successfully.", arg_machine);
- *container = CONTAINER_TERMINATED;
- } else {
- log_error("Container %s failed with error code %i.",
- arg_machine, status.si_status);
- }
- break;
+ } else
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s failed with error code %i.", arg_machine, status.si_status);
+
+ *container = CONTAINER_TERMINATED;
+ return status.si_status;
case CLD_KILLED:
if (status.si_status == SIGINT) {
- if (!arg_quiet)
- log_info("Container %s has been shut down.",
- arg_machine);
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s has been shut down.", arg_machine);
*container = CONTAINER_TERMINATED;
- r = 0;
- break;
+ return 0;
+
} else if (status.si_status == SIGHUP) {
- if (!arg_quiet)
- log_info("Container %s is being rebooted.",
- arg_machine);
+ log_full(arg_quiet ? LOG_DEBUG : LOG_INFO, "Container %s is being rebooted.", arg_machine);
*container = CONTAINER_REBOOTED;
- r = 0;
- break;
+ return 0;
}
+
/* CLD_KILLED fallthrough */
case CLD_DUMPED:
- log_error("Container %s terminated by signal %s.",
- arg_machine, signal_to_string(status.si_status));
- r = -1;
- break;
+ log_error("Container %s terminated by signal %s.", arg_machine, signal_to_string(status.si_status));
+ return -EIO;
default:
- log_error("Container %s failed due to unknown reason.",
- arg_machine);
- r = -1;
- break;
+ log_error("Container %s failed due to unknown reason.", arg_machine);
+ return -EIO;
}
return r;
static void nop_handler(int sig) {}
+static int on_orderly_shutdown(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
+ pid_t pid;
+
+ pid = PTR_TO_UINT32(userdata);
+ if (pid > 0) {
+ if (kill(pid, SIGRTMIN+3) >= 0) {
+ log_info("Trying to halt container. Send SIGTERM again to trigger immediate termination.");
+ sd_event_source_set_userdata(s, NULL);
+ return 0;
+ }
+ }
+
+ sd_event_exit(sd_event_source_get_event(s), 0);
+ return 0;
+}
+
int main(int argc, char *argv[]) {
- _cleanup_free_ char *kdbus_domain = NULL, *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
+ _cleanup_free_ char *device_path = NULL, *root_device = NULL, *home_device = NULL, *srv_device = NULL;
bool root_device_rw = true, home_device_rw = true, srv_device_rw = true;
- _cleanup_close_ int master = -1, kdbus_fd = -1, image_fd = -1;
+ _cleanup_close_ int master = -1, image_fd = -1;
_cleanup_close_pair_ int kmsg_socket_pair[2] = { -1, -1 };
_cleanup_fdset_free_ FDSet *fds = NULL;
int r = EXIT_FAILURE, k, n_fd_passed, loop_nr = -1;
goto finish;
}
- if (access("/dev/kdbus/control", F_OK) >= 0) {
-
- if (arg_share_system) {
- kdbus_domain = strdup("/dev/kdbus");
- if (!kdbus_domain) {
- log_oom();
- goto finish;
- }
- } else {
- const char *ns;
-
- ns = strappenda("machine-", arg_machine);
- kdbus_fd = bus_kernel_create_domain(ns, &kdbus_domain);
- if (r < 0)
- log_debug("Failed to create kdbus domain: %s", strerror(-r));
- else
- log_debug("Successfully created kdbus domain as %s", kdbus_domain);
- }
- }
-
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_NONBLOCK|SOCK_CLOEXEC, 0, kmsg_socket_pair) < 0) {
log_error("Failed to create kmsg socket pair: %m");
goto finish;
"STATUS=Container running.");
assert_se(sigemptyset(&mask) == 0);
- assert_se(sigemptyset(&mask_chld) == 0);
- sigaddset(&mask_chld, SIGCHLD);
sigset_add_many(&mask, SIGCHLD, SIGWINCH, SIGTERM, SIGINT, -1);
assert_se(sigprocmask(SIG_BLOCK, &mask, NULL) == 0);
+ assert_se(sigemptyset(&mask_chld) == 0);
+ assert_se(sigaddset(&mask_chld, SIGCHLD) == 0);
+
for (;;) {
ContainerStatus container_status;
_cleanup_(barrier_destroy) Barrier barrier = BARRIER_NULL;
if (mount_tmpfs(arg_directory) < 0)
_exit(EXIT_FAILURE);
- if (setup_kdbus(arg_directory, kdbus_domain) < 0)
- _exit(EXIT_FAILURE);
-
/* Tell the parent that we are ready, and that
* it can cgroupify us to that we lack access
* to certain devices and resources. */
- barrier_place(&barrier);
+ (void)barrier_place(&barrier);
if (chdir(arg_directory) < 0) {
log_error("chdir(%s) failed: %m", arg_directory);
/* wait for child-setup to be done */
if (barrier_place_and_sync(&barrier)) {
+ _cleanup_event_unref_ sd_event *event = NULL;
+ _cleanup_(pty_forward_freep) PTYForward *forward = NULL;
int ifi = 0;
r = move_network_interfaces(pid);
/* Notify the child that the parent is ready with all
* its setup, and that the child can now hand over
* control to the code to run inside the container. */
- barrier_place(&barrier);
+ (void)barrier_place(&barrier);
- k = process_pty(master, &mask, arg_boot ? pid : 0, SIGRTMIN+3);
- if (k < 0) {
- r = EXIT_FAILURE;
- break;
+ r = sd_event_new(&event);
+ if (r < 0) {
+ log_error("Failed to get default event source: %s", strerror(-r));
+ goto finish;
+ }
+
+ if (arg_boot) {
+ /* Try to kill the init system on SIGINT or SIGTERM */
+ sd_event_add_signal(event, NULL, SIGINT, on_orderly_shutdown, UINT32_TO_PTR(pid));
+ sd_event_add_signal(event, NULL, SIGTERM, on_orderly_shutdown, UINT32_TO_PTR(pid));
+ } else {
+ /* Immediately exit */
+ sd_event_add_signal(event, NULL, SIGINT, NULL, NULL);
+ sd_event_add_signal(event, NULL, SIGTERM, NULL, NULL);
+ }
+
+ /* simply exit on sigchld */
+ sd_event_add_signal(event, NULL, SIGCHLD, NULL, NULL);
+
+ r = pty_forward_new(event, master, &forward);
+ if (r < 0) {
+ log_error("Failed to create PTY forwarder: %s", strerror(-r));
+ goto finish;
}
+ r = sd_event_loop(event);
+ if (r < 0) {
+ log_error("Failed to run event loop: %s", strerror(-r));
+ return r;
+ }
+
+ forward = pty_forward_free(forward);
+
if (!arg_quiet)
putc('\n', stdout);