#include "set.h"
#include "strv.h"
#include "mkdir.h"
+#include "path-util.h"
+#include "missing.h"
+#include "virt.h"
+#include "efivars.h"
#ifndef TTY_GID
#define TTY_GID 5
#endif
+typedef enum MountMode {
+ MNT_NONE = 0,
+ MNT_FATAL = 1 << 0,
+ MNT_IN_CONTAINER = 1 << 1,
+} MountMode;
+
typedef struct MountPoint {
const char *what;
const char *where;
const char *type;
const char *options;
unsigned long flags;
- bool fatal;
+ bool (*condition_fn)(void);
+ MountMode mode;
} MountPoint;
/* The first three entries we might need before SELinux is up. The
#define N_EARLY_MOUNT 4
static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
- { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
- { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
- { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
- { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
- { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, false },
- { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+ { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ is_efiboot, MNT_NONE },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
+ NULL, MNT_IN_CONTAINER },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_IN_CONTAINER },
};
/* These are API file systems that might be mounted by other software,
/* Container bind mounts */
"/proc/sys\0"
"/dev/console\0"
- "/proc/kmsg\0"
- "/etc/localtime\0"
- "/etc/timezone\0"
- "/etc/machine-id\0";
+ "/proc/kmsg\0";
bool mount_point_is_api(const char *path) {
unsigned i;
assert(p);
+ if (p->condition_fn && !p->condition_fn())
+ return 0;
+
/* Relabel first, just in case */
if (relabel)
- label_fix(p->where, true);
+ label_fix(p->where, true, true);
- if ((r = path_is_mount_point(p->where, true)) < 0)
+ r = path_is_mount_point(p->where, true);
+ if (r < 0)
return r;
if (r > 0)
return 0;
+ /* Skip securityfs in a container */
+ if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
+ return 0;
+
/* The access mode here doesn't really matter too much, since
* the mounted file system will take precedence anyway. */
- mkdir_p(p->where, 0755);
+ mkdir_p_label(p->where, 0755);
log_debug("Mounting %s to %s of type %s with options %s.",
p->what,
p->type,
p->flags,
p->options) < 0) {
- log_full(p->fatal ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
- return p->fatal ? -errno : 0;
+ log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
+ return (p->mode & MNT_FATAL) ? -errno : 0;
}
/* Relabel again, since we now mounted something fresh here */
if (relabel)
- label_fix(p->where, false);
+ label_fix(p->where, false, false);
return 1;
}
controllers = set_new(string_hash_func, string_compare_func);
if (!controllers) {
- r = -ENOMEM;
- log_error("Failed to allocate controller set.");
+ r = log_oom();
goto finish;
}
options = strv_join(*k, ",");
if (!options) {
- log_error("Failed to join options");
free(controller);
- r = -ENOMEM;
+ r = log_oom();
goto finish;
}
where = strappend("/sys/fs/cgroup/", options);
if (!where) {
- log_error("Failed to build path");
free(options);
- r = -ENOMEM;
+ r = log_oom();
goto finish;
}
p.type = "cgroup";
p.options = options;
p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
- p.fatal = false;
r = mount_one(&p, true);
free(controller);
t = strappend("/sys/fs/cgroup/", *i);
if (!t) {
- log_error("Failed to build path");
- r = -ENOMEM;
+ r = log_oom();
free(options);
goto finish;
}
if (_unlikely_(ftwbuf->level == 0))
return FTW_CONTINUE;
- label_fix(fpath, true);
+ label_fix(fpath, false, false);
/* /run/initramfs is static data and big, no need to
* dynamically relabel its contents at boot... */
/* Explicitly relabel these */
NULSTR_FOREACH(j, relabel)
- label_fix(j, true);
+ label_fix(j, true, false);
after_relabel = now(CLOCK_MONOTONIC);
/* Create a few default symlinks, which are normally created
* by udevd, but some scripts might need them before we start
* udevd. */
- dev_setup();
+ dev_setup(NULL);
+
+ /* Mark the root directory as shared in regards to mount
+ * propagation. The kernel defaults to "private", but we think
+ * it makes more sense to have a default of "shared" so that
+ * nspawn and the container tools work out of the box. If
+ * specific setups need other settings they can reset the
+ * propagation mode to private if needed. */
+ if (detect_container(NULL) <= 0)
+ if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
+ log_warning("Failed to set up the root directory for shared mount propagation: %m");
/* Create a few directories we always want around */
- label_mkdir("/run/systemd", 0755);
- label_mkdir("/run/systemd/system", 0755);
+ mkdir_label("/run/systemd", 0755);
+ mkdir_label("/run/systemd/system", 0755);
return 0;
}