src/basic/mount-util.c

   1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
   2
   3 /***
   4   This file is part of systemd.
   5
   6   Copyright 2010 Lennart Poettering
   7
   8   systemd is free software; you can redistribute it and/or modify it
   9   under the terms of the GNU Lesser General Public License as published by
  10   the Free Software Foundation; either version 2.1 of the License, or
  11   (at your option) any later version.
  12
  13   systemd is distributed in the hope that it will be useful, but
  14   WITHOUT ANY WARRANTY; without even the implied warranty of
  15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16   Lesser General Public License for more details.
  17
  18   You should have received a copy of the GNU Lesser General Public License
  19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  20 ***/
  21
  22 #include <string.h>
  23 #include <sys/mount.h>
  24 #include <sys/statvfs.h>
  25
  26 #include "alloc-util.h"
  27 #include "escape.h"
  28 #include "fd-util.h"
  29 #include "fileio.h"
  30 #include "mount-util.h"
  31 #include "parse-util.h"
  32 #include "path-util.h"
  33 #include "set.h"
  34 #include "stdio-util.h"
  35 #include "string-util.h"
  36 #include "util.h"
  37
  38 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  39         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  40         _cleanup_free_ char *fdinfo = NULL;
  41         _cleanup_close_ int subfd = -1;
  42         char *p;
  43         int r;
  44
  45         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  46                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
  47         else {
  48                 subfd = openat(fd, filename, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_PATH);
  49                 if (subfd < 0)
  50                         return -errno;
  51
  52                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
  53         }
  54
  55         r = read_full_file(path, &fdinfo, NULL);
  56         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
  57                 return -EOPNOTSUPP;
  58         if (r < 0)
  59                 return -errno;
  60
  61         p = startswith(fdinfo, "mnt_id:");
  62         if (!p) {
  63                 p = strstr(fdinfo, "\nmnt_id:");
  64                 if (!p) /* The mnt_id field is a relatively new addition */
  65                         return -EOPNOTSUPP;
  66
  67                 p += 8;
  68         }
  69
  70         p += strspn(p, WHITESPACE);
  71         p[strcspn(p, WHITESPACE)] = 0;
  72
  73         return safe_atoi(p, mnt_id);
  74 }
  75
  76
  77 int fd_is_mount_point(int fd, const char *filename, int flags) {
  78         union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
  79         int mount_id = -1, mount_id_parent = -1;
  80         bool nosupp = false, check_st_dev = true;
  81         struct stat a, b;
  82         int r;
  83
  84         assert(fd >= 0);
  85         assert(filename);
  86
  87         /* First we will try the name_to_handle_at() syscall, which
  88          * tells us the mount id and an opaque file "handle". It is
  89          * not supported everywhere though (kernel compile-time
  90          * option, not all file systems are hooked up). If it works
  91          * the mount id is usually good enough to tell us whether
  92          * something is a mount point.
  93          *
  94          * If that didn't work we will try to read the mount id from
  95          * /proc/self/fdinfo/<fd>. This is almost as good as
  96          * name_to_handle_at(), however, does not return the
  97          * opaque file handle. The opaque file handle is pretty useful
  98          * to detect the root directory, which we should always
  99          * consider a mount point. Hence we use this only as
 100          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 101          * kernel addition.
 102          *
 103          * As last fallback we do traditional fstat() based st_dev
 104          * comparisons. This is how things were traditionally done,
 105          * but unionfs breaks breaks this since it exposes file
 106          * systems with a variety of st_dev reported. Also, btrfs
 107          * subvolumes have different st_dev, even though they aren't
 108          * real mounts of their own. */
 109
 110         r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
 111         if (r < 0) {
 112                 if (errno == ENOSYS)
 113                         /* This kernel does not support name_to_handle_at()
 114                          * fall back to simpler logic. */
 115                         goto fallback_fdinfo;
 116                 else if (errno == EOPNOTSUPP)
 117                         /* This kernel or file system does not support
 118                          * name_to_handle_at(), hence let's see if the
 119                          * upper fs supports it (in which case it is a
 120                          * mount point), otherwise fallback to the
 121                          * traditional stat() logic */
 122                         nosupp = true;
 123                 else
 124                         return -errno;
 125         }
 126
 127         r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
 128         if (r < 0) {
 129                 if (errno == EOPNOTSUPP) {
 130                         if (nosupp)
 131                                 /* Neither parent nor child do name_to_handle_at()?
 132                                    We have no choice but to fall back. */
 133                                 goto fallback_fdinfo;
 134                         else
 135                                 /* The parent can't do name_to_handle_at() but the
 136                                  * directory we are interested in can?
 137                                  * If so, it must be a mount point. */
 138                                 return 1;
 139                 } else
 140                         return -errno;
 141         }
 142
 143         /* The parent can do name_to_handle_at() but the
 144          * directory we are interested in can't? If so, it
 145          * must be a mount point. */
 146         if (nosupp)
 147                 return 1;
 148
 149         /* If the file handle for the directory we are
 150          * interested in and its parent are identical, we
 151          * assume this is the root directory, which is a mount
 152          * point. */
 153
 154         if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
 155             h.handle.handle_type == h_parent.handle.handle_type &&
 156             memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
 157                 return 1;
 158
 159         return mount_id != mount_id_parent;
 160
 161 fallback_fdinfo:
 162         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 163         if (r == -EOPNOTSUPP)
 164                 goto fallback_fstat;
 165         if (r < 0)
 166                 return r;
 167
 168         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 169         if (r < 0)
 170                 return r;
 171
 172         if (mount_id != mount_id_parent)
 173                 return 1;
 174
 175         /* Hmm, so, the mount ids are the same. This leaves one
 176          * special case though for the root file system. For that,
 177          * let's see if the parent directory has the same inode as we
 178          * are interested in. Hence, let's also do fstat() checks now,
 179          * too, but avoid the st_dev comparisons, since they aren't
 180          * that useful on unionfs mounts. */
 181         check_st_dev = false;
 182
 183 fallback_fstat:
 184         /* yay for fstatat() taking a different set of flags than the other
 185          * _at() above */
 186         if (flags & AT_SYMLINK_FOLLOW)
 187                 flags &= ~AT_SYMLINK_FOLLOW;
 188         else
 189                 flags |= AT_SYMLINK_NOFOLLOW;
 190         if (fstatat(fd, filename, &a, flags) < 0)
 191                 return -errno;
 192
 193         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 194                 return -errno;
 195
 196         /* A directory with same device and inode as its parent? Must
 197          * be the root directory */
 198         if (a.st_dev == b.st_dev &&
 199             a.st_ino == b.st_ino)
 200                 return 1;
 201
 202         return check_st_dev && (a.st_dev != b.st_dev);
 203 }
 204
 205 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 206 int path_is_mount_point(const char *t, int flags) {
 207         _cleanup_close_ int fd = -1;
 208         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 209
 210         assert(t);
 211
 212         if (path_equal(t, "/"))
 213                 return 1;
 214
 215         /* we need to resolve symlinks manually, we can't just rely on
 216          * fd_is_mount_point() to do that for us; if we have a structure like
 217          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 218          * look at needs to be /usr, not /. */
 219         if (flags & AT_SYMLINK_FOLLOW) {
 220                 canonical = canonicalize_file_name(t);
 221                 if (!canonical)
 222                         return -errno;
 223
 224                 t = canonical;
 225         }
 226
 227         parent = dirname_malloc(t);
 228         if (!parent)
 229                 return -ENOMEM;
 230
 231         fd = openat(AT_FDCWD, parent, O_RDONLY|O_NONBLOCK|O_DIRECTORY|O_CLOEXEC|O_PATH);
 232         if (fd < 0)
 233                 return -errno;
 234
 235         return fd_is_mount_point(fd, basename(t), flags);
 236 }
 237
 238 #if 0 /// UNNEEDED by elogind
 239 int umount_recursive(const char *prefix, int flags) {
 240         bool again;
 241         int n = 0, r;
 242
 243         /* Try to umount everything recursively below a
 244          * directory. Also, take care of stacked mounts, and keep
 245          * unmounting them until they are gone. */
 246
 247         do {
 248                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 249
 250                 again = false;
 251                 r = 0;
 252
 253                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 254                 if (!proc_self_mountinfo)
 255                         return -errno;
 256
 257                 for (;;) {
 258                         _cleanup_free_ char *path = NULL, *p = NULL;
 259                         int k;
 260
 261                         k = fscanf(proc_self_mountinfo,
 262                                    "%*s "       /* (1) mount id */
 263                                    "%*s "       /* (2) parent id */
 264                                    "%*s "       /* (3) major:minor */
 265                                    "%*s "       /* (4) root */
 266                                    "%ms "       /* (5) mount point */
 267                                    "%*s"        /* (6) mount options */
 268                                    "%*[^-]"     /* (7) optional fields */
 269                                    "- "         /* (8) separator */
 270                                    "%*s "       /* (9) file system type */
 271                                    "%*s"        /* (10) mount source */
 272                                    "%*s"        /* (11) mount options 2 */
 273                                    "%*[^\n]",   /* some rubbish at the end */
 274                                    &path);
 275                         if (k != 1) {
 276                                 if (k == EOF)
 277                                         break;
 278
 279                                 continue;
 280                         }
 281
 282                         r = cunescape(path, UNESCAPE_RELAX, &p);
 283                         if (r < 0)
 284                                 return r;
 285
 286                         if (!path_startswith(p, prefix))
 287                                 continue;
 288
 289                         if (umount2(p, flags) < 0) {
 290                                 r = -errno;
 291                                 continue;
 292                         }
 293
 294                         again = true;
 295                         n++;
 296
 297                         break;
 298                 }
 299
 300         } while (again);
 301
 302         return r ? r : n;
 303 }
 304
 305 static int get_mount_flags(const char *path, unsigned long *flags) {
 306         struct statvfs buf;
 307
 308         if (statvfs(path, &buf) < 0)
 309                 return -errno;
 310         *flags = buf.f_flag;
 311         return 0;
 312 }
 313
 314 int bind_remount_recursive(const char *prefix, bool ro) {
 315         _cleanup_set_free_free_ Set *done = NULL;
 316         _cleanup_free_ char *cleaned = NULL;
 317         int r;
 318
 319         /* Recursively remount a directory (and all its submounts)
 320          * read-only or read-write. If the directory is already
 321          * mounted, we reuse the mount and simply mark it
 322          * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 323          * operation). If it isn't we first make it one. Afterwards we
 324          * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
 325          * submounts we can access, too. When mounts are stacked on
 326          * the same mount point we only care for each individual
 327          * "top-level" mount on each point, as we cannot
 328          * influence/access the underlying mounts anyway. We do not
 329          * have any effect on future submounts that might get
 330          * propagated, they migt be writable. This includes future
 331          * submounts that have been triggered via autofs. */
 332
 333         cleaned = strdup(prefix);
 334         if (!cleaned)
 335                 return -ENOMEM;
 336
 337         path_kill_slashes(cleaned);
 338
 339         done = set_new(&string_hash_ops);
 340         if (!done)
 341                 return -ENOMEM;
 342
 343         for (;;) {
 344                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 345                 _cleanup_set_free_free_ Set *todo = NULL;
 346                 bool top_autofs = false;
 347                 char *x;
 348                 unsigned long orig_flags;
 349
 350                 todo = set_new(&string_hash_ops);
 351                 if (!todo)
 352                         return -ENOMEM;
 353
 354                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 355                 if (!proc_self_mountinfo)
 356                         return -errno;
 357
 358                 for (;;) {
 359                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 360                         int k;
 361
 362                         k = fscanf(proc_self_mountinfo,
 363                                    "%*s "       /* (1) mount id */
 364                                    "%*s "       /* (2) parent id */
 365                                    "%*s "       /* (3) major:minor */
 366                                    "%*s "       /* (4) root */
 367                                    "%ms "       /* (5) mount point */
 368                                    "%*s"        /* (6) mount options (superblock) */
 369                                    "%*[^-]"     /* (7) optional fields */
 370                                    "- "         /* (8) separator */
 371                                    "%ms "       /* (9) file system type */
 372                                    "%*s"        /* (10) mount source */
 373                                    "%*s"        /* (11) mount options (bind mount) */
 374                                    "%*[^\n]",   /* some rubbish at the end */
 375                                    &path,
 376                                    &type);
 377                         if (k != 2) {
 378                                 if (k == EOF)
 379                                         break;
 380
 381                                 continue;
 382                         }
 383
 384                         r = cunescape(path, UNESCAPE_RELAX, &p);
 385                         if (r < 0)
 386                                 return r;
 387
 388                         /* Let's ignore autofs mounts.  If they aren't
 389                          * triggered yet, we want to avoid triggering
 390                          * them, as we don't make any guarantees for
 391                          * future submounts anyway.  If they are
 392                          * already triggered, then we will find
 393                          * another entry for this. */
 394                         if (streq(type, "autofs")) {
 395                                 top_autofs = top_autofs || path_equal(cleaned, p);
 396                                 continue;
 397                         }
 398
 399                         if (path_startswith(p, cleaned) &&
 400                             !set_contains(done, p)) {
 401
 402                                 r = set_consume(todo, p);
 403                                 p = NULL;
 404
 405                                 if (r == -EEXIST)
 406                                         continue;
 407                                 if (r < 0)
 408                                         return r;
 409                         }
 410                 }
 411
 412                 /* If we have no submounts to process anymore and if
 413                  * the root is either already done, or an autofs, we
 414                  * are done */
 415                 if (set_isempty(todo) &&
 416                     (top_autofs || set_contains(done, cleaned)))
 417                         return 0;
 418
 419                 if (!set_contains(done, cleaned) &&
 420                     !set_contains(todo, cleaned)) {
 421                         /* The prefix directory itself is not yet a
 422                          * mount, make it one. */
 423                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 424                                 return -errno;
 425
 426                         orig_flags = 0;
 427                         (void) get_mount_flags(cleaned, &orig_flags);
 428                         orig_flags &= ~MS_RDONLY;
 429
 430                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 431                                 return -errno;
 432
 433                         x = strdup(cleaned);
 434                         if (!x)
 435                                 return -ENOMEM;
 436
 437                         r = set_consume(done, x);
 438                         if (r < 0)
 439                                 return r;
 440                 }
 441
 442                 while ((x = set_steal_first(todo))) {
 443
 444                         r = set_consume(done, x);
 445                         if (r == -EEXIST || r == 0)
 446                                 continue;
 447                         if (r < 0)
 448                                 return r;
 449
 450                         /* Try to reuse the original flag set, but
 451                          * don't care for errors, in case of
 452                          * obstructed mounts */
 453                         orig_flags = 0;
 454                         (void) get_mount_flags(x, &orig_flags);
 455                         orig_flags &= ~MS_RDONLY;
 456
 457                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
 458
 459                                 /* Deal with mount points that are
 460                                  * obstructed by a later mount */
 461
 462                                 if (errno != ENOENT)
 463                                         return -errno;
 464                         }
 465
 466                 }
 467         }
 468 }
 469
 470 int mount_move_root(const char *path) {
 471         assert(path);
 472
 473         if (chdir(path) < 0)
 474                 return -errno;
 475
 476         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 477                 return -errno;
 478
 479         if (chroot(".") < 0)
 480                 return -errno;
 481
 482         if (chdir("/") < 0)
 483                 return -errno;
 484
 485         return 0;
 486 }
 487
 488 bool fstype_is_network(const char *fstype) {
 489         static const char table[] =
 490                 "afs\0"
 491                 "cifs\0"
 492                 "smbfs\0"
 493                 "sshfs\0"
 494                 "ncpfs\0"
 495                 "ncp\0"
 496                 "nfs\0"
 497                 "nfs4\0"
 498                 "gfs\0"
 499                 "gfs2\0"
 500                 "glusterfs\0";
 501
 502         const char *x;
 503
 504         x = startswith(fstype, "fuse.");
 505         if (x)
 506                 fstype = x;
 507
 508         return nulstr_contains(table, fstype);
 509 }
 510
 511 int repeat_unmount(const char *path, int flags) {
 512         bool done = false;
 513
 514         assert(path);
 515
 516         /* If there are multiple mounts on a mount point, this
 517          * removes them all */
 518
 519         for (;;) {
 520                 if (umount2(path, flags) < 0) {
 521
 522                         if (errno == EINVAL)
 523                                 return done;
 524
 525                         return -errno;
 526                 }
 527
 528                 done = true;
 529         }
 530 }
 531 #endif // 0