#include "mkdir.h"
#include "path-util.h"
#include "missing.h"
+#include "virt.h"
+#include "efivars.h"
#ifndef TTY_GID
#define TTY_GID 5
#endif
+typedef enum MountMode {
+ MNT_NONE = 0,
+ MNT_FATAL = 1 << 0,
+ MNT_IN_CONTAINER = 1 << 1,
+} MountMode;
+
typedef struct MountPoint {
const char *what;
const char *where;
const char *type;
const char *options;
unsigned long flags;
- bool fatal;
+ bool (*condition_fn)(void);
+ MountMode mode;
} MountPoint;
/* The first three entries we might need before SELinux is up. The
* fourth (securityfs) is needed by IMA to load a custom policy. The
* other ones we can delay until SELinux and IMA are loaded. */
-#define N_EARLY_MOUNT 4
+#define N_EARLY_MOUNT 5
static const MountPoint mount_table[] = {
- { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
- { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
- { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME, true },
- { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
- { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
- { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, false },
- { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME, true },
- { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME, false },
- { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+ { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+ { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_NONE },
+ { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
+ NULL, MNT_IN_CONTAINER },
+ { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_FATAL|MNT_IN_CONTAINER },
+ { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
+ NULL, MNT_IN_CONTAINER },
+ { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_IN_CONTAINER },
+ { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ NULL, MNT_NONE },
+#ifdef ENABLE_EFI
+ { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
+ is_efi_boot, MNT_NONE },
+#endif
};
/* These are API file systems that might be mounted by other software,
/* Container bind mounts */
"/proc/sys\0"
"/dev/console\0"
- "/proc/kmsg\0"
- "/etc/localtime\0"
- "/etc/timezone\0"
- "/etc/machine-id\0";
+ "/proc/kmsg\0";
bool mount_point_is_api(const char *path) {
unsigned i;
assert(p);
+ if (p->condition_fn && !p->condition_fn())
+ return 0;
+
/* Relabel first, just in case */
if (relabel)
label_fix(p->where, true, true);
- if ((r = path_is_mount_point(p->where, true)) < 0)
+ r = path_is_mount_point(p->where, true);
+ if (r < 0)
return r;
if (r > 0)
return 0;
+ /* Skip securityfs in a container */
+ if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
+ return 0;
+
/* The access mode here doesn't really matter too much, since
* the mounted file system will take precedence anyway. */
mkdir_p_label(p->where, 0755);
p->type,
p->flags,
p->options) < 0) {
- log_full(p->fatal ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
- return p->fatal ? -errno : 0;
+ log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
+ return (p->mode & MNT_FATAL) ? -errno : 0;
}
/* Relabel again, since we now mounted something fresh here */
p.type = "cgroup";
p.options = options;
p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
- p.fatal = false;
+ p.mode = MNT_IN_CONTAINER;
r = mount_one(&p, true);
free(controller);
after_relabel = now(CLOCK_MONOTONIC);
log_info("Relabelled /dev and /run in %s.",
- format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
+ format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
}
/* Create a few default symlinks, which are normally created
* by udevd, but some scripts might need them before we start
* udevd. */
- dev_setup();
+ dev_setup(NULL);
/* Mark the root directory as shared in regards to mount
* propagation. The kernel defaults to "private", but we think
* nspawn and the container tools work out of the box. If
* specific setups need other settings they can reset the
* propagation mode to private if needed. */
- if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
- log_warning("Failed to set up the root directory for shared mount propagation: %m");
-
- /* Create a few directories we always want around */
+ if (detect_container(NULL) <= 0)
+ if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
+ log_warning("Failed to set up the root directory for shared mount propagation: %m");
+
+ /* Create a few directories we always want around, Note that
+ * sd_booted() checks for /run/systemd/system, so this mkdir
+ * really needs to stay for good, otherwise software that
+ * copied sd-daemon.c into their sources will misdetect
+ * systemd. */
mkdir_label("/run/systemd", 0755);
mkdir_label("/run/systemd/system", 0755);
+ mkdir_label("/run/systemd/inaccessible", 0000);
return 0;
}