1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
28 #include <sys/types.h>
30 #include <sys/syscall.h>
37 #include "path-util.h"
38 #include "namespace.h"
41 #include "loopback-setup.h"
43 #include "dev-setup.h"
46 typedef enum MountMode {
47 /* This is ordered by priority! */
56 typedef struct BindMount {
63 static int append_mounts(BindMount **p, char **strv, MountMode mode) {
68 STRV_FOREACH(i, strv) {
72 if ((mode == INACCESSIBLE || mode == READONLY || mode == READWRITE) && (*i)[0] == '-') {
77 if (!path_is_absolute(*i))
88 static int mount_path_compare(const void *a, const void *b) {
89 const BindMount *p = a, *q = b;
91 if (path_equal(p->path, q->path)) {
93 /* If the paths are equal, check the mode */
94 if (p->mode < q->mode)
97 if (p->mode > q->mode)
103 /* If the paths are not equal, then order prefixes first */
104 if (path_startswith(p->path, q->path))
107 if (path_startswith(q->path, p->path))
113 static void drop_duplicates(BindMount *m, unsigned *n) {
114 BindMount *f, *t, *previous;
119 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
121 /* The first one wins */
122 if (previous && path_equal(f->path, previous->path))
136 static int mount_dev(BindMount *m) {
137 static const char devnodes[] =
145 struct stat devnodes_stat[6] = {};
148 _cleanup_umask_ mode_t u;
155 /* First: record device mode_t and dev_t */
156 NULSTR_FOREACH(d, devnodes) {
157 r = stat(d, &devnodes_stat[n]);
162 if (!S_ISBLK(devnodes_stat[n].st_mode) &&
163 !S_ISCHR(devnodes_stat[n].st_mode))
170 assert(n == ELEMENTSOF(devnodes_stat));
172 r = mount("tmpfs", "/dev", "tmpfs", MS_NOSUID|MS_STRICTATIME, "mode=755");
174 return m->ignore ? 0 : -errno;
177 mkdir_p("/dev/pts", 0755);
179 r = mount("devpts", "/dev/pts", "devpts", MS_NOSUID|MS_NOEXEC, "newinstance,ptmxmode=0666,mode=620,gid=" STRINGIFY(TTY_GID));
181 return m->ignore ? 0 : -errno;
183 mkdir_p("/dev/shm", 0755);
185 r = mount("tmpfs", "/dev/shm", "tmpfs", MS_NOSUID|MS_NODEV|MS_STRICTATIME, "mode=1777");
187 return m->ignore ? 0 : -errno;
189 /* Second: actually create it */
191 NULSTR_FOREACH(d, devnodes) {
192 if (devnodes_stat[n].st_rdev == 0)
195 r = mknod(d, devnodes_stat[n].st_mode, devnodes_stat[n].st_rdev);
197 return m->ignore ? 0 : -errno;
207 static int apply_mount(
210 const char *var_tmp_dir) {
223 what = "/run/systemd/inaccessible";
235 case PRIVATE_VAR_TMP:
240 assert_not_reached("Unknown mode");
245 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
247 log_debug("Successfully mounted %s to %s", what, m->path);
248 else if (m->ignore && errno == ENOENT)
254 static int make_read_only(BindMount *m) {
259 if (m->mode != INACCESSIBLE && m->mode != READONLY)
262 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
263 if (r < 0 && !(m->ignore && errno == ENOENT))
270 char** read_write_dirs,
271 char** read_only_dirs,
272 char** inaccessible_dirs,
276 unsigned mount_flags) {
278 BindMount *m, *mounts = NULL;
282 if (mount_flags == 0)
283 mount_flags = MS_SHARED;
285 if (unshare(CLONE_NEWNS) < 0)
288 n = !!tmp_dir + !!var_tmp_dir +
289 strv_length(read_write_dirs) +
290 strv_length(read_only_dirs) +
291 strv_length(inaccessible_dirs) +
295 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
296 r = append_mounts(&m, read_write_dirs, READWRITE);
300 r = append_mounts(&m, read_only_dirs, READONLY);
304 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
310 m->mode = PRIVATE_TMP;
315 m->path = "/var/tmp";
316 m->mode = PRIVATE_VAR_TMP;
322 m->mode = PRIVATE_DEV;
326 assert(mounts + n == m);
328 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
329 drop_duplicates(mounts, &n);
332 /* Remount / as SLAVE so that nothing now mounted in the namespace
333 shows up in the parent */
334 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
337 for (m = mounts; m < mounts + n; ++m) {
338 r = apply_mount(m, tmp_dir, var_tmp_dir);
343 for (m = mounts; m < mounts + n; ++m) {
344 r = make_read_only(m);
349 /* Remount / as the desired mode */
350 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
358 for (m = mounts; m < mounts + n; ++m)
360 umount2(m->path, MNT_DETACH);
365 static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
366 _cleanup_free_ char *x = NULL;
367 char bid[SD_ID128_STRING_MAX];
375 /* We include the boot id in the directory so that after a
376 * reboot we can easily identify obsolete directories. */
378 r = sd_id128_get_boot(&boot_id);
382 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
390 RUN_WITH_UMASK(0000) {
393 y = strappenda(x, "/tmp");
395 if (mkdir(y, 0777 | S_ISVTX) < 0)
405 int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
413 r = setup_one_tmp_dir(id, "/tmp", &a);
417 r = setup_one_tmp_dir(id, "/var/tmp", &b);
421 t = strappenda(a, "/tmp");
435 int setup_netns(int netns_storage_socket[2]) {
436 _cleanup_close_ int netns = -1;
438 struct cmsghdr cmsghdr;
439 uint8_t buf[CMSG_SPACE(sizeof(int))];
442 .msg_control = &control,
443 .msg_controllen = sizeof(control),
445 struct cmsghdr *cmsg;
448 assert(netns_storage_socket);
449 assert(netns_storage_socket[0] >= 0);
450 assert(netns_storage_socket[1] >= 0);
452 /* We use the passed socketpair as a storage buffer for our
453 * namespace reference fd. Whatever process runs this first
454 * shall create a new namespace, all others should just join
455 * it. To serialize that we use a file lock on the socket
458 * It's a bit crazy, but hey, works great! */
460 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
463 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
464 if (errno != EAGAIN) {
469 /* Nothing stored yet, so let's create a new namespace */
471 if (unshare(CLONE_NEWNET) < 0) {
478 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
486 /* Yay, found something, so let's join the namespace */
488 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
489 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
490 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
491 netns = *(int*) CMSG_DATA(cmsg);
495 if (setns(netns, CLONE_NEWNET) < 0) {
503 cmsg = CMSG_FIRSTHDR(&mh);
504 cmsg->cmsg_level = SOL_SOCKET;
505 cmsg->cmsg_type = SCM_RIGHTS;
506 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
507 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
508 mh.msg_controllen = cmsg->cmsg_len;
510 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
516 lockf(netns_storage_socket[0], F_ULOCK, 0);