1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
28 #include <sys/types.h>
30 #include <sys/syscall.h>
37 #include "path-util.h"
38 #include "namespace.h"
41 #include "loopback-setup.h"
43 typedef enum MountMode {
44 /* This is ordered by priority! */
52 typedef struct BindMount {
59 static int append_mounts(BindMount **p, char **strv, MountMode mode) {
64 STRV_FOREACH(i, strv) {
68 if ((mode == INACCESSIBLE || mode == READONLY) && (*i)[0] == '-') {
73 if (!path_is_absolute(*i))
84 static int mount_path_compare(const void *a, const void *b) {
85 const BindMount *p = a, *q = b;
87 if (path_equal(p->path, q->path)) {
89 /* If the paths are equal, check the mode */
90 if (p->mode < q->mode)
93 if (p->mode > q->mode)
99 /* If the paths are not equal, then order prefixes first */
100 if (path_startswith(p->path, q->path))
103 if (path_startswith(q->path, p->path))
109 static void drop_duplicates(BindMount *m, unsigned *n) {
110 BindMount *f, *t, *previous;
115 for (f = m, t = m, previous = NULL; f < m+*n; f++) {
117 /* The first one wins */
118 if (previous && path_equal(f->path, previous->path))
132 static int apply_mount(
135 const char *var_tmp_dir) {
145 what = "/run/systemd/inaccessible";
157 case PRIVATE_VAR_TMP:
162 assert_not_reached("Unknown mode");
167 r = mount(what, m->path, NULL, MS_BIND|MS_REC, NULL);
169 log_debug("Successfully mounted %s to %s", what, m->path);
170 else if (m->ignore && errno == ENOENT)
176 static int make_read_only(BindMount *m) {
181 if (m->mode != INACCESSIBLE && m->mode != READONLY)
184 r = mount(NULL, m->path, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY|MS_REC, NULL);
185 if (r < 0 && !(m->ignore && errno == ENOENT))
192 char** read_write_dirs,
193 char** read_only_dirs,
194 char** inaccessible_dirs,
197 unsigned mount_flags) {
199 BindMount *m, *mounts = NULL;
203 if (mount_flags == 0)
204 mount_flags = MS_SHARED;
206 if (unshare(CLONE_NEWNS) < 0)
209 n = !!tmp_dir + !!var_tmp_dir +
210 strv_length(read_write_dirs) +
211 strv_length(read_only_dirs) +
212 strv_length(inaccessible_dirs);
215 m = mounts = (BindMount *) alloca(n * sizeof(BindMount));
216 r = append_mounts(&m, read_write_dirs, READWRITE);
220 r = append_mounts(&m, read_only_dirs, READONLY);
224 r = append_mounts(&m, inaccessible_dirs, INACCESSIBLE);
230 m->mode = PRIVATE_TMP;
235 m->path = "/var/tmp";
236 m->mode = PRIVATE_VAR_TMP;
240 assert(mounts + n == m);
242 qsort(mounts, n, sizeof(BindMount), mount_path_compare);
243 drop_duplicates(mounts, &n);
246 /* Remount / as SLAVE so that nothing now mounted in the namespace
247 shows up in the parent */
248 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0)
251 for (m = mounts; m < mounts + n; ++m) {
252 r = apply_mount(m, tmp_dir, var_tmp_dir);
257 for (m = mounts; m < mounts + n; ++m) {
258 r = make_read_only(m);
263 /* Remount / as the desired mode */
264 if (mount(NULL, "/", NULL, mount_flags | MS_REC, NULL) < 0) {
272 for (m = mounts; m < mounts + n; ++m)
274 umount2(m->path, MNT_DETACH);
279 static int setup_one_tmp_dir(const char *id, const char *prefix, char **path) {
280 _cleanup_free_ char *x = NULL;
281 char bid[SD_ID128_STRING_MAX];
289 /* We include the boot id in the directory so that after a
290 * reboot we can easily identify obsolete directories. */
292 r = sd_id128_get_boot(&boot_id);
296 x = strjoin(prefix, "/systemd-private-", sd_id128_to_string(boot_id, bid), "-", id, "-XXXXXX", NULL);
304 RUN_WITH_UMASK(0000) {
307 y = strappenda(x, "/tmp");
309 if (mkdir(y, 0777 | S_ISVTX) < 0)
319 int setup_tmp_dirs(const char *id, char **tmp_dir, char **var_tmp_dir) {
327 r = setup_one_tmp_dir(id, "/tmp", &a);
331 r = setup_one_tmp_dir(id, "/var/tmp", &b);
335 t = strappenda(a, "/tmp");
349 int setup_netns(int netns_storage_socket[2]) {
350 _cleanup_close_ int netns = -1;
352 struct cmsghdr cmsghdr;
353 uint8_t buf[CMSG_SPACE(sizeof(int))];
356 .msg_control = &control,
357 .msg_controllen = sizeof(control),
359 struct cmsghdr *cmsg;
362 assert(netns_storage_socket);
363 assert(netns_storage_socket[0] >= 0);
364 assert(netns_storage_socket[1] >= 0);
366 /* We use the passed socketpair as a storage buffer for our
367 * namespace reference fd. Whatever process runs this first
368 * shall create a new namespace, all others should just join
369 * it. To serialize that we use a file lock on the socket
372 * It's a bit crazy, but hey, works great! */
374 if (lockf(netns_storage_socket[0], F_LOCK, 0) < 0)
377 if (recvmsg(netns_storage_socket[0], &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) < 0) {
378 if (errno != EAGAIN) {
383 /* Nothing stored yet, so let's create a new namespace */
385 if (unshare(CLONE_NEWNET) < 0) {
392 netns = open("/proc/self/ns/net", O_RDONLY|O_CLOEXEC|O_NOCTTY);
400 /* Yay, found something, so let's join the namespace */
402 for (cmsg = CMSG_FIRSTHDR(&mh); cmsg; cmsg = CMSG_NXTHDR(&mh, cmsg)) {
403 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
404 assert(cmsg->cmsg_len == CMSG_LEN(sizeof(int)));
405 netns = *(int*) CMSG_DATA(cmsg);
409 if (setns(netns, CLONE_NEWNET) < 0) {
417 cmsg = CMSG_FIRSTHDR(&mh);
418 cmsg->cmsg_level = SOL_SOCKET;
419 cmsg->cmsg_type = SCM_RIGHTS;
420 cmsg->cmsg_len = CMSG_LEN(sizeof(int));
421 memcpy(CMSG_DATA(cmsg), &netns, sizeof(int));
422 mh.msg_controllen = cmsg->cmsg_len;
424 if (sendmsg(netns_storage_socket[1], &mh, MSG_DONTWAIT|MSG_NOSIGNAL) < 0) {
430 lockf(netns_storage_socket[0], F_ULOCK, 0);