1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/mount.h>
32 #include "mount-setup.h"
33 #include "dev-setup.h"
41 #include "path-util.h"
50 typedef enum MountMode {
53 MNT_IN_CONTAINER = 1 << 1,
56 typedef struct MountPoint {
62 bool (*condition_fn)(void);
66 /* The first three entries we might need before SELinux is up. The
67 * fourth (securityfs) is needed by IMA to load a custom policy. The
68 * other ones we can delay until SELinux and IMA are loaded. */
69 #define N_EARLY_MOUNT 5
72 # define FS_XATTR_OPT ",xattr"
74 # define FS_XATTR_OPT ""
77 static const MountPoint mount_table[] = {
78 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
79 NULL, MNT_FATAL|MNT_IN_CONTAINER },
80 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
81 NULL, MNT_FATAL|MNT_IN_CONTAINER },
82 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
83 NULL, MNT_FATAL|MNT_IN_CONTAINER },
84 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
86 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
88 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
89 NULL, MNT_FATAL|MNT_IN_CONTAINER },
90 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
91 NULL, MNT_IN_CONTAINER },
92 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
93 NULL, MNT_FATAL|MNT_IN_CONTAINER },
94 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
95 NULL, MNT_IN_CONTAINER },
96 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd" FS_XATTR_OPT, MS_NOSUID|MS_NOEXEC|MS_NODEV,
97 NULL, MNT_IN_CONTAINER },
98 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
101 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 is_efi_boot, MNT_NONE },
106 /* These are API file systems that might be mounted by other software,
107 * we just list them here so that we know that we should ignore them */
109 static const char ignore_paths[] =
110 /* SELinux file systems */
113 /* Legacy cgroup mount points */
116 /* Legacy kernel file system */
118 /* Container bind mounts */
123 bool mount_point_is_api(const char *path) {
126 /* Checks if this mount point is considered "API", and hence
127 * should be ignored */
129 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
130 if (path_equal(path, mount_table[i].where))
133 return path_startswith(path, "/sys/fs/cgroup/");
136 bool mount_point_ignore(const char *path) {
139 NULSTR_FOREACH(i, ignore_paths)
140 if (path_equal(path, i))
146 static int mount_one(const MountPoint *p, bool relabel) {
151 if (p->condition_fn && !p->condition_fn())
154 /* Relabel first, just in case */
156 label_fix(p->where, true, true);
158 r = path_is_mount_point(p->where, true);
165 /* Skip securityfs in a container */
166 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
169 /* The access mode here doesn't really matter too much, since
170 * the mounted file system will take precedence anyway. */
171 mkdir_p_label(p->where, 0755);
173 log_debug("Mounting %s to %s of type %s with options %s.",
184 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s: %s", p->where, strerror(errno));
185 return (p->mode & MNT_FATAL) ? -errno : 0;
188 /* Relabel again, since we now mounted something fresh here */
190 label_fix(p->where, false, false);
195 int mount_setup_early(void) {
199 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
201 /* Do a minimal mount of /proc and friends to enable the most
202 * basic stuff, such as SELinux */
203 for (i = 0; i < N_EARLY_MOUNT; i ++) {
206 j = mount_one(mount_table + i, false);
214 int mount_cgroup_controllers(char ***join_controllers) {
220 /* Mount all available cgroup controllers that are built into the kernel. */
222 f = fopen("/proc/cgroups", "re");
224 log_error("Failed to enumerate cgroup controllers: %m");
228 controllers = set_new(string_hash_func, string_compare_func);
234 /* Ignore the header line */
235 (void) fgets(buf, sizeof(buf), f);
241 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
246 log_error("Failed to parse /proc/cgroups.");
256 r = set_put(controllers, controller);
258 log_error("Failed to add controller to set.");
266 char *controller, *where, *options;
269 controller = set_steal_first(controllers);
273 if (join_controllers)
274 for (k = join_controllers; *k; k++)
275 if (strv_find(*k, controller))
281 for (i = *k, j = *k; *i; i++) {
283 if (!streq(*i, controller)) {
286 t = set_remove(controllers, *i);
299 options = strv_join(*k, ",");
307 options = controller;
311 where = strappend("/sys/fs/cgroup/", options);
323 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
324 p.mode = MNT_IN_CONTAINER;
326 r = mount_one(&p, true);
335 if (r > 0 && k && *k) {
338 for (i = *k; *i; i++) {
341 t = strappend("/sys/fs/cgroup/", *i);
348 r = symlink(options, t);
351 if (r < 0 && errno != EEXIST) {
352 log_error("Failed to create symlink: %m");
366 set_free_free(controllers);
375 const struct stat *sb,
377 struct FTW *ftwbuf) {
379 /* No need to label /dev twice in a row... */
380 if (_unlikely_(ftwbuf->level == 0))
383 label_fix(fpath, false, false);
385 /* /run/initramfs is static data and big, no need to
386 * dynamically relabel its contents at boot... */
387 if (_unlikely_(ftwbuf->level == 1 &&
389 streq(fpath, "/run/initramfs")))
390 return FTW_SKIP_SUBTREE;
395 int mount_setup(bool loaded_policy) {
397 static const char relabel[] =
398 "/run/initramfs/root-fsck\0"
399 "/run/initramfs/shutdown\0";
405 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
406 r = mount_one(mount_table + i, true);
412 /* Nodes in devtmpfs and /run need to be manually updated for
413 * the appropriate labels, after mounting. The other virtual
414 * API file systems like /sys and /proc do not need that, they
415 * use the same label for all their files. */
417 usec_t before_relabel, after_relabel;
418 char timespan[FORMAT_TIMESPAN_MAX];
420 before_relabel = now(CLOCK_MONOTONIC);
422 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
423 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
425 /* Explicitly relabel these */
426 NULSTR_FOREACH(j, relabel)
427 label_fix(j, true, false);
429 after_relabel = now(CLOCK_MONOTONIC);
431 log_info("Relabelled /dev and /run in %s.",
432 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
435 /* Create a few default symlinks, which are normally created
436 * by udevd, but some scripts might need them before we start
440 /* Mark the root directory as shared in regards to mount
441 * propagation. The kernel defaults to "private", but we think
442 * it makes more sense to have a default of "shared" so that
443 * nspawn and the container tools work out of the box. If
444 * specific setups need other settings they can reset the
445 * propagation mode to private if needed. */
446 if (detect_container(NULL) <= 0)
447 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
448 log_warning("Failed to set up the root directory for shared mount propagation: %m");
450 /* Create a few directories we always want around, Note that
451 * sd_booted() checks for /run/systemd/system, so this mkdir
452 * really needs to stay for good, otherwise software that
453 * copied sd-daemon.c into their sources will misdetect
455 mkdir_label("/run/systemd", 0755);
456 mkdir_label("/run/systemd/system", 0755);
457 mkdir_label("/run/systemd/inaccessible", 0000);