1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
24 #include <sys/statvfs.h>
26 #include "alloc-util.h"
30 #include "mount-util.h"
31 #include "parse-util.h"
32 #include "path-util.h"
34 #include "stdio-util.h"
35 #include "string-util.h"
38 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
39 char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
40 _cleanup_free_ char *fdinfo = NULL;
41 _cleanup_close_ int subfd = -1;
45 if ((flags & AT_EMPTY_PATH) && isempty(filename))
46 xsprintf(path, "/proc/self/fdinfo/%i", fd);
48 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
52 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
55 r = read_full_file(path, &fdinfo, NULL);
56 if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
61 p = startswith(fdinfo, "mnt_id:");
63 p = strstr(fdinfo, "\nmnt_id:");
64 if (!p) /* The mnt_id field is a relatively new addition */
70 p += strspn(p, WHITESPACE);
71 p[strcspn(p, WHITESPACE)] = 0;
73 return safe_atoi(p, mnt_id);
77 int fd_is_mount_point(int fd, const char *filename, int flags) {
78 union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
79 int mount_id = -1, mount_id_parent = -1;
80 bool nosupp = false, check_st_dev = true;
87 /* First we will try the name_to_handle_at() syscall, which
88 * tells us the mount id and an opaque file "handle". It is
89 * not supported everywhere though (kernel compile-time
90 * option, not all file systems are hooked up). If it works
91 * the mount id is usually good enough to tell us whether
92 * something is a mount point.
94 * If that didn't work we will try to read the mount id from
95 * /proc/self/fdinfo/<fd>. This is almost as good as
96 * name_to_handle_at(), however, does not return the
97 * opaque file handle. The opaque file handle is pretty useful
98 * to detect the root directory, which we should always
99 * consider a mount point. Hence we use this only as
100 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
103 * As last fallback we do traditional fstat() based st_dev
104 * comparisons. This is how things were traditionally done,
105 * but unionfs breaks breaks this since it exposes file
106 * systems with a variety of st_dev reported. Also, btrfs
107 * subvolumes have different st_dev, even though they aren't
108 * real mounts of their own. */
110 r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
113 /* This kernel does not support name_to_handle_at()
114 * fall back to simpler logic. */
115 goto fallback_fdinfo;
116 else if (errno == EOPNOTSUPP)
117 /* This kernel or file system does not support
118 * name_to_handle_at(), hence let's see if the
119 * upper fs supports it (in which case it is a
120 * mount point), otherwise fallback to the
121 * traditional stat() logic */
127 r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
129 if (errno == EOPNOTSUPP) {
131 /* Neither parent nor child do name_to_handle_at()?
132 We have no choice but to fall back. */
133 goto fallback_fdinfo;
135 /* The parent can't do name_to_handle_at() but the
136 * directory we are interested in can?
137 * If so, it must be a mount point. */
143 /* The parent can do name_to_handle_at() but the
144 * directory we are interested in can't? If so, it
145 * must be a mount point. */
149 /* If the file handle for the directory we are
150 * interested in and its parent are identical, we
151 * assume this is the root directory, which is a mount
154 if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
155 h.handle.handle_type == h_parent.handle.handle_type &&
156 memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
159 return mount_id != mount_id_parent;
162 r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
163 if (r == -EOPNOTSUPP)
168 r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
172 if (mount_id != mount_id_parent)
175 /* Hmm, so, the mount ids are the same. This leaves one
176 * special case though for the root file system. For that,
177 * let's see if the parent directory has the same inode as we
178 * are interested in. Hence, let's also do fstat() checks now,
179 * too, but avoid the st_dev comparisons, since they aren't
180 * that useful on unionfs mounts. */
181 check_st_dev = false;
184 /* yay for fstatat() taking a different set of flags than the other
186 if (flags & AT_SYMLINK_FOLLOW)
187 flags &= ~AT_SYMLINK_FOLLOW;
189 flags |= AT_SYMLINK_NOFOLLOW;
190 if (fstatat(fd, filename, &a, flags) < 0)
193 if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
196 /* A directory with same device and inode as its parent? Must
197 * be the root directory */
198 if (a.st_dev == b.st_dev &&
199 a.st_ino == b.st_ino)
202 return check_st_dev && (a.st_dev != b.st_dev);
205 /* flags can be AT_SYMLINK_FOLLOW or 0 */
206 int path_is_mount_point(const char *t, int flags) {
207 _cleanup_close_ int fd = -1;
208 _cleanup_free_ char *canonical = NULL, *parent = NULL;
212 if (path_equal(t, "/"))
215 /* we need to resolve symlinks manually, we can't just rely on
216 * fd_is_mount_point() to do that for us; if we have a structure like
217 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
218 * look at needs to be /usr, not /. */
219 if (flags & AT_SYMLINK_FOLLOW) {
220 canonical = canonicalize_file_name(t);
227 parent = dirname_malloc(t);
231 fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
235 return fd_is_mount_point(fd, basename(t), flags);
238 #if 0 /// UNNEEDED by elogind
239 int umount_recursive(const char *prefix, int flags) {
243 /* Try to umount everything recursively below a
244 * directory. Also, take care of stacked mounts, and keep
245 * unmounting them until they are gone. */
248 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
253 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
254 if (!proc_self_mountinfo)
258 _cleanup_free_ char *path = NULL, *p = NULL;
261 k = fscanf(proc_self_mountinfo,
262 "%*s " /* (1) mount id */
263 "%*s " /* (2) parent id */
264 "%*s " /* (3) major:minor */
265 "%*s " /* (4) root */
266 "%ms " /* (5) mount point */
267 "%*s" /* (6) mount options */
268 "%*[^-]" /* (7) optional fields */
269 "- " /* (8) separator */
270 "%*s " /* (9) file system type */
271 "%*s" /* (10) mount source */
272 "%*s" /* (11) mount options 2 */
273 "%*[^\n]", /* some rubbish at the end */
282 r = cunescape(path, UNESCAPE_RELAX, &p);
286 if (!path_startswith(p, prefix))
289 if (umount2(p, flags) < 0) {
305 static int get_mount_flags(const char *path, unsigned long *flags) {
308 if (statvfs(path, &buf) < 0)
314 int bind_remount_recursive(const char *prefix, bool ro) {
315 _cleanup_set_free_free_ Set *done = NULL;
316 _cleanup_free_ char *cleaned = NULL;
319 /* Recursively remount a directory (and all its submounts)
320 * read-only or read-write. If the directory is already
321 * mounted, we reuse the mount and simply mark it
322 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
323 * operation). If it isn't we first make it one. Afterwards we
324 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
325 * submounts we can access, too. When mounts are stacked on
326 * the same mount point we only care for each individual
327 * "top-level" mount on each point, as we cannot
328 * influence/access the underlying mounts anyway. We do not
329 * have any effect on future submounts that might get
330 * propagated, they migt be writable. This includes future
331 * submounts that have been triggered via autofs. */
333 cleaned = strdup(prefix);
337 path_kill_slashes(cleaned);
339 done = set_new(&string_hash_ops);
344 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
345 _cleanup_set_free_free_ Set *todo = NULL;
346 bool top_autofs = false;
348 unsigned long orig_flags;
350 todo = set_new(&string_hash_ops);
354 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
355 if (!proc_self_mountinfo)
359 _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
362 k = fscanf(proc_self_mountinfo,
363 "%*s " /* (1) mount id */
364 "%*s " /* (2) parent id */
365 "%*s " /* (3) major:minor */
366 "%*s " /* (4) root */
367 "%ms " /* (5) mount point */
368 "%*s" /* (6) mount options (superblock) */
369 "%*[^-]" /* (7) optional fields */
370 "- " /* (8) separator */
371 "%ms " /* (9) file system type */
372 "%*s" /* (10) mount source */
373 "%*s" /* (11) mount options (bind mount) */
374 "%*[^\n]", /* some rubbish at the end */
384 r = cunescape(path, UNESCAPE_RELAX, &p);
388 /* Let's ignore autofs mounts. If they aren't
389 * triggered yet, we want to avoid triggering
390 * them, as we don't make any guarantees for
391 * future submounts anyway. If they are
392 * already triggered, then we will find
393 * another entry for this. */
394 if (streq(type, "autofs")) {
395 top_autofs = top_autofs || path_equal(cleaned, p);
399 if (path_startswith(p, cleaned) &&
400 !set_contains(done, p)) {
402 r = set_consume(todo, p);
412 /* If we have no submounts to process anymore and if
413 * the root is either already done, or an autofs, we
415 if (set_isempty(todo) &&
416 (top_autofs || set_contains(done, cleaned)))
419 if (!set_contains(done, cleaned) &&
420 !set_contains(todo, cleaned)) {
421 /* The prefix directory itself is not yet a
422 * mount, make it one. */
423 if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
427 (void) get_mount_flags(cleaned, &orig_flags);
428 orig_flags &= ~MS_RDONLY;
430 if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
437 r = set_consume(done, x);
442 while ((x = set_steal_first(todo))) {
444 r = set_consume(done, x);
445 if (r == -EEXIST || r == 0)
450 /* Try to reuse the original flag set, but
451 * don't care for errors, in case of
452 * obstructed mounts */
454 (void) get_mount_flags(x, &orig_flags);
455 orig_flags &= ~MS_RDONLY;
457 if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
459 /* Deal with mount points that are
460 * obstructed by a later mount */
470 int mount_move_root(const char *path) {
476 if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
488 bool fstype_is_network(const char *fstype) {
489 static const char table[] =
504 x = startswith(fstype, "fuse.");
508 return nulstr_contains(table, fstype);
511 int repeat_unmount(const char *path, int flags) {
516 /* If there are multiple mounts on a mount point, this
517 * removes them all */
520 if (umount2(path, flags) < 0) {