chiark / gitweb /
cgroup: optionally mount a specific cgroup controllers together, and add cpu+cpuacct...
[elogind.git] / src / mount-setup.c
index cb91e181bf35ef62cfef9c23201a6b80a331cb58..abb0c19d2543e110478831b5ee9a5e944497b87f 100644 (file)
@@ -1,4 +1,4 @@
-/*-*- Mode: C; c-basic-offset: 8 -*-*/
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
 
 /***
   This file is part of systemd.
 #include <string.h>
 #include <libgen.h>
 #include <assert.h>
+#include <unistd.h>
+#include <ftw.h>
 
 #include "mount-setup.h"
 #include "log.h"
 #include "macro.h"
 #include "util.h"
+#include "label.h"
+#include "set.h"
+#include "strv.h"
+
+#ifndef TTY_GID
+#define TTY_GID 5
+#endif
 
 typedef struct MountPoint {
         const char *what;
@@ -41,16 +50,28 @@ typedef struct MountPoint {
         bool fatal;
 } MountPoint;
 
+/* The first three entries we might need before SELinux is up. The
+ * other ones we can delay until SELinux is loaded. */
+#define N_EARLY_MOUNT 3
+
 static const MountPoint mount_table[] = {
-        { "proc",        "/proc",                    "proc",        NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
-        { "sysfs",       "/sys",                     "sysfs",       NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
-        { "devtmps",     "/dev",                     "devtmpfs",    "mode=755",  MS_NOSUID,                    true },
-        { "tmpfs",       "/dev/shm",                 "tmpfs",       "mode=1777", MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
-        { "devpts",      "/dev/pts",                 "devpts",      NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
-        { "cgroup",      "/cgroup/debug",            "cgroup",      "debug",     MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
-        { "debugfs",     "/sys/kernel/debug",        "debugfs",     NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
-        { "binfmt_misc", "/proc/sys/fs/binfmt_misc", "binfmt_misc", NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
-        { "mqueue",      "/dev/mqueue",              "mqueue",      NULL,        MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+        { "proc",     "/proc",                  "proc",     NULL,                MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
+        { "sysfs",    "/sys",                   "sysfs",    NULL,                MS_NOSUID|MS_NOEXEC|MS_NODEV, true },
+        { "devtmpfs", "/dev",                   "devtmpfs", "mode=755",          MS_NOSUID,                    true },
+        { "tmpfs",    "/dev/shm",               "tmpfs",    "mode=1777",         MS_NOSUID|MS_NODEV,           true },
+        { "devpts",   "/dev/pts",               "devpts",   "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC, false },
+        { "tmpfs",    "/run",                   "tmpfs",    "mode=755",          MS_NOSUID|MS_NODEV, true },
+        { "tmpfs",    "/sys/fs/cgroup",         "tmpfs",    "mode=755",          MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+        { "cgroup",   "/sys/fs/cgroup/systemd", "cgroup",   "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV, false },
+};
+
+/* These are API file systems that might be mounted by other software,
+ * we just list them here so that we know that we should ignore them */
+
+static const char * const ignore_paths[] = {
+        "/sys/fs/selinux",
+        "/selinux",
+        "/proc/bus/usb"
 };
 
 bool mount_point_is_api(const char *path) {
@@ -60,18 +81,32 @@ bool mount_point_is_api(const char *path) {
          * should be ignored */
 
         for (i = 0; i < ELEMENTSOF(mount_table); i ++)
-                if (path_startswith(path, mount_table[i].where))
+                if (path_equal(path, mount_table[i].where))
+                        return true;
+
+        return path_startswith(path, "/sys/fs/cgroup/");
+}
+
+bool mount_point_ignore(const char *path) {
+        unsigned i;
+
+        for (i = 0; i < ELEMENTSOF(ignore_paths); i++)
+                if (path_equal(path, ignore_paths[i]))
                         return true;
 
-        return path_startswith(path, "/cgroup/");
+        return false;
 }
 
-static int mount_one(const MountPoint *p) {
+static int mount_one(const MountPoint *p, bool relabel) {
         int r;
 
         assert(p);
 
-        if ((r = path_is_mount_point(p->where)) < 0)
+        /* Relabel first, just in case */
+        if (relabel)
+                label_fix(p->where, true);
+
+        if ((r = path_is_mount_point(p->where, true)) < 0)
                 return r;
 
         if (r > 0)
@@ -96,27 +131,61 @@ static int mount_one(const MountPoint *p) {
                 return p->fatal ? -errno : 0;
         }
 
-        return 0;
+        /* Relabel again, since we now mounted something fresh here */
+        if (relabel)
+                label_fix(p->where, false);
+
+        return 1;
 }
 
-static int mount_cgroup_controllers(void) {
+int mount_setup_early(void) {
+        unsigned i;
+        int r = 0;
+
+        assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
+
+        /* Do a minimal mount of /proc and friends to enable the most
+         * basic stuff, such as SELinux */
+        for (i = 0; i < N_EARLY_MOUNT; i ++)  {
+                int j;
+
+                j = mount_one(mount_table + i, false);
+                if (r == 0)
+                        r = j;
+        }
+
+        return r;
+}
+
+int mount_cgroup_controllers(char ***join_controllers) {
         int r;
         FILE *f;
-        char buf [256];
+        char buf[LINE_MAX];
+        Set *controllers;
 
-        /* Mount all available cgroup controllers. */
+        /* Mount all available cgroup controllers that are built into the kernel. */
+
+        f = fopen("/proc/cgroups", "re");
+        if (!f) {
+                log_error("Failed to enumerate cgroup controllers: %m");
+                return 0;
+        }
 
-        if (!(f = fopen("/proc/cgroups", "re")))
-                return -ENOENT;
+        controllers = set_new(string_hash_func, string_compare_func);
+        if (!controllers) {
+                r = -ENOMEM;
+                log_error("Failed to allocate controller set.");
+                goto finish;
+        }
 
         /* Ignore the header line */
         (void) fgets(buf, sizeof(buf), f);
 
         for (;;) {
-                MountPoint p;
-                char *controller, *where;
+                char *controller;
+                int enabled = 0;
 
-                if (fscanf(f, "%ms %*i %*i %*i", &controller) != 1) {
+                if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
 
                         if (feof(f))
                                 break;
@@ -126,8 +195,71 @@ static int mount_cgroup_controllers(void) {
                         goto finish;
                 }
 
-                if (asprintf(&where, "/cgroup/%s", controller) < 0) {
+                if (!enabled) {
                         free(controller);
+                        continue;
+                }
+
+                r = set_put(controllers, controller);
+                if (r < 0) {
+                        log_error("Failed to add controller to set.");
+                        free(controller);
+                        goto finish;
+                }
+        }
+
+        for (;;) {
+                MountPoint p;
+                char *controller, *where, *options;
+                char ***k = NULL;
+
+                controller = set_steal_first(controllers);
+                if (!controller)
+                        break;
+
+                if (join_controllers)
+                        for (k = join_controllers; *k; k++)
+                                if (strv_find(*k, controller))
+                                        break;
+
+                if (k && *k) {
+                        char **i, **j;
+
+                        for (i = *k, j = *k; *i; i++) {
+
+                                if (!streq(*i, controller)) {
+                                        char *t;
+
+                                        t = set_remove(controllers, *i);
+                                        if (!t) {
+                                                free(*i);
+                                                continue;
+                                        }
+                                        free(t);
+                                }
+
+                                *(j++) = *i;
+                        }
+
+                        *j = NULL;
+
+                        options = strv_join(*k, ",");
+                        if (!options) {
+                                log_error("Failed to join options");
+                                free(controller);
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                } else {
+                        options = controller;
+                        controller = NULL;
+                }
+
+                where = strappend("/sys/fs/cgroup/", options);
+                if (!where) {
+                        log_error("Failed to build path");
+                        free(options);
                         r = -ENOMEM;
                         goto finish;
                 }
@@ -136,33 +268,138 @@ static int mount_cgroup_controllers(void) {
                 p.what = "cgroup";
                 p.where = where;
                 p.type = "cgroup";
-                p.options = controller;
+                p.options = options;
                 p.flags = MS_NOSUID|MS_NOEXEC|MS_NODEV;
                 p.fatal = false;
 
-                r = mount_one(&p);
+                r = mount_one(&p, true);
                 free(controller);
                 free(where);
 
-                if (r < 0)
+                if (r < 0) {
+                        free(options);
                         goto finish;
+                }
+
+                if (r > 0 && k && *k) {
+                        char **i;
+
+                        for (i = *k; *i; i++) {
+                                char *t;
+
+                                t = strappend("/sys/fs/cgroup/", *i);
+                                if (!t) {
+                                        log_error("Failed to build path");
+                                        r = -ENOMEM;
+                                        free(options);
+                                        goto finish;
+                                }
+
+                                r = symlink(options, t);
+                                free(t);
+
+                                if (r < 0 && errno != EEXIST) {
+                                        log_error("Failed to create symlink: %m");
+                                        r = -errno;
+                                        free(options);
+                                        goto finish;
+                                }
+                        }
+                }
+
+                free(options);
         }
 
         r = 0;
 
 finish:
+        set_free_free(controllers);
+
         fclose(f);
 
         return r;
 }
 
-int mount_setup(void) {
+static int symlink_and_label(const char *old_path, const char *new_path) {
+        int r;
+
+        assert(old_path);
+        assert(new_path);
+
+        if ((r = label_symlinkfile_set(new_path)) < 0)
+                return r;
+
+        if (symlink(old_path, new_path) < 0)
+                r = -errno;
+
+        label_file_clear();
+
+        return r;
+}
+
+static int nftw_cb(
+                const char *fpath,
+                const struct stat *sb,
+                int tflag,
+                struct FTW *ftwbuf) {
+
+        /* No need to label /dev twice in a row... */
+        if (ftwbuf->level == 0)
+                return 0;
+
+        label_fix(fpath, true);
+        return 0;
+};
+
+int mount_setup(bool loaded_policy) {
+
+        const char symlinks[] =
+                "/proc/kcore\0"      "/dev/core\0"
+                "/proc/self/fd\0"    "/dev/fd\0"
+                "/proc/self/fd/0\0"  "/dev/stdin\0"
+                "/proc/self/fd/1\0"  "/dev/stdout\0"
+                "/proc/self/fd/2\0"  "/dev/stderr\0";
+
         int r;
         unsigned i;
+        const char *j, *k;
 
-        for (i = 0; i < ELEMENTSOF(mount_table); i ++)
-                if ((r = mount_one(mount_table+i)) < 0)
+        for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
+                r = mount_one(mount_table + i, true);
+
+                if (r < 0)
                         return r;
+        }
+
+        /* Nodes in devtmpfs and /run need to be manually updated for
+         * the appropriate labels, after mounting. The other virtual
+         * API file systems like /sys and /proc do not need that, they
+         * use the same label for all their files. */
+        if (loaded_policy) {
+                usec_t before_relabel, after_relabel;
+                char timespan[FORMAT_TIMESPAN_MAX];
 
-        return mount_cgroup_controllers();
+                before_relabel = now(CLOCK_MONOTONIC);
+
+                nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS);
+                nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS);
+
+                after_relabel = now(CLOCK_MONOTONIC);
+
+                log_info("Relabelled /dev and /run in %s.",
+                         format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel));
+
+        }
+
+        /* Create a few default symlinks, which are normally created
+         * by udevd, but some scripts might need them before we start
+         * udevd. */
+        NULSTR_FOREACH_PAIR(j, k, symlinks)
+                symlink_and_label(j, k);
+
+        /* Create a few directories we always want around */
+        mkdir("/run/systemd", 0755);
+        mkdir("/run/systemd/system", 0755);
+
+        return 0;
 }