src/basic/mount-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <sys/mount.h>
  24 #include <sys/stat.h>
  25 #include <sys/statvfs.h>
  26 #include <unistd.h>
  27
  28 #include "alloc-util.h"
  29 #include "escape.h"
  30 #include "fd-util.h"
  31 #include "fileio.h"
  32 #include "hashmap.h"
  33 #include "mount-util.h"
  34 #include "parse-util.h"
  35 #include "path-util.h"
  36 #include "set.h"
  37 #include "stdio-util.h"
  38 #include "string-util.h"
  39
  40 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
  41         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
  42         _cleanup_free_ char *fdinfo = NULL;
  43         _cleanup_close_ int subfd = -1;
  44         char *p;
  45         int r;
  46
  47         if ((flags & AT_EMPTY_PATH) && isempty(filename))
  48                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
  49         else {
  50                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
  51                 if (subfd < 0)
  52                         return -errno;
  53
  54                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
  55         }
  56
  57         r = read_full_file(path, &fdinfo, NULL);
  58         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
  59                 return -EOPNOTSUPP;
  60         if (r < 0)
  61                 return -errno;
  62
  63         p = startswith(fdinfo, "mnt_id:");
  64         if (!p) {
  65                 p = strstr(fdinfo, "\nmnt_id:");
  66                 if (!p) /* The mnt_id field is a relatively new addition */
  67                         return -EOPNOTSUPP;
  68
  69                 p += 8;
  70         }
  71
  72         p += strspn(p, WHITESPACE);
  73         p[strcspn(p, WHITESPACE)] = 0;
  74
  75         return safe_atoi(p, mnt_id);
  76 }
  77
  78
  79 int fd_is_mount_point(int fd, const char *filename, int flags) {
  80         union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
  81         int mount_id = -1, mount_id_parent = -1;
  82         bool nosupp = false, check_st_dev = true;
  83         struct stat a, b;
  84         int r;
  85
  86         assert(fd >= 0);
  87         assert(filename);
  88
  89         /* First we will try the name_to_handle_at() syscall, which
  90          * tells us the mount id and an opaque file "handle". It is
  91          * not supported everywhere though (kernel compile-time
  92          * option, not all file systems are hooked up). If it works
  93          * the mount id is usually good enough to tell us whether
  94          * something is a mount point.
  95          *
  96          * If that didn't work we will try to read the mount id from
  97          * /proc/self/fdinfo/<fd>. This is almost as good as
  98          * name_to_handle_at(), however, does not return the
  99          * opaque file handle. The opaque file handle is pretty useful
 100          * to detect the root directory, which we should always
 101          * consider a mount point. Hence we use this only as
 102          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 103          * kernel addition.
 104          *
 105          * As last fallback we do traditional fstat() based st_dev
 106          * comparisons. This is how things were traditionally done,
 107          * but unionfs breaks this since it exposes file
 108          * systems with a variety of st_dev reported. Also, btrfs
 109          * subvolumes have different st_dev, even though they aren't
 110          * real mounts of their own. */
 111
 112         r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
 113         if (r < 0) {
 114                 if (errno == ENOSYS)
 115                         /* This kernel does not support name_to_handle_at()
 116                          * fall back to simpler logic. */
 117                         goto fallback_fdinfo;
 118                 else if (errno == EOPNOTSUPP)
 119                         /* This kernel or file system does not support
 120                          * name_to_handle_at(), hence let's see if the
 121                          * upper fs supports it (in which case it is a
 122                          * mount point), otherwise fallback to the
 123                          * traditional stat() logic */
 124                         nosupp = true;
 125                 else
 126                         return -errno;
 127         }
 128
 129         r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
 130         if (r < 0) {
 131                 if (errno == EOPNOTSUPP) {
 132                         if (nosupp)
 133                                 /* Neither parent nor child do name_to_handle_at()?
 134                                    We have no choice but to fall back. */
 135                                 goto fallback_fdinfo;
 136                         else
 137                                 /* The parent can't do name_to_handle_at() but the
 138                                  * directory we are interested in can?
 139                                  * If so, it must be a mount point. */
 140                                 return 1;
 141                 } else
 142                         return -errno;
 143         }
 144
 145         /* The parent can do name_to_handle_at() but the
 146          * directory we are interested in can't? If so, it
 147          * must be a mount point. */
 148         if (nosupp)
 149                 return 1;
 150
 151         /* If the file handle for the directory we are
 152          * interested in and its parent are identical, we
 153          * assume this is the root directory, which is a mount
 154          * point. */
 155
 156         if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
 157             h.handle.handle_type == h_parent.handle.handle_type &&
 158             memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
 159                 return 1;
 160
 161         return mount_id != mount_id_parent;
 162
 163 fallback_fdinfo:
 164         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 165         if (r == -EOPNOTSUPP)
 166                 goto fallback_fstat;
 167         if (r < 0)
 168                 return r;
 169
 170         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 171         if (r < 0)
 172                 return r;
 173
 174         if (mount_id != mount_id_parent)
 175                 return 1;
 176
 177         /* Hmm, so, the mount ids are the same. This leaves one
 178          * special case though for the root file system. For that,
 179          * let's see if the parent directory has the same inode as we
 180          * are interested in. Hence, let's also do fstat() checks now,
 181          * too, but avoid the st_dev comparisons, since they aren't
 182          * that useful on unionfs mounts. */
 183         check_st_dev = false;
 184
 185 fallback_fstat:
 186         /* yay for fstatat() taking a different set of flags than the other
 187          * _at() above */
 188         if (flags & AT_SYMLINK_FOLLOW)
 189                 flags &= ~AT_SYMLINK_FOLLOW;
 190         else
 191                 flags |= AT_SYMLINK_NOFOLLOW;
 192         if (fstatat(fd, filename, &a, flags) < 0)
 193                 return -errno;
 194
 195         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 196                 return -errno;
 197
 198         /* A directory with same device and inode as its parent? Must
 199          * be the root directory */
 200         if (a.st_dev == b.st_dev &&
 201             a.st_ino == b.st_ino)
 202                 return 1;
 203
 204         return check_st_dev && (a.st_dev != b.st_dev);
 205 }
 206
 207 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 208 int path_is_mount_point(const char *t, int flags) {
 209         _cleanup_close_ int fd = -1;
 210         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 211
 212         assert(t);
 213
 214         if (path_equal(t, "/"))
 215                 return 1;
 216
 217         /* we need to resolve symlinks manually, we can't just rely on
 218          * fd_is_mount_point() to do that for us; if we have a structure like
 219          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 220          * look at needs to be /usr, not /. */
 221         if (flags & AT_SYMLINK_FOLLOW) {
 222                 canonical = canonicalize_file_name(t);
 223                 if (!canonical)
 224                         return -errno;
 225
 226                 t = canonical;
 227         }
 228
 229         parent = dirname_malloc(t);
 230         if (!parent)
 231                 return -ENOMEM;
 232
 233         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 234         if (fd < 0)
 235                 return -errno;
 236
 237         return fd_is_mount_point(fd, basename(t), flags);
 238 }
 239
 240 #if 0 /// UNNEEDED by elogind
 241 int umount_recursive(const char *prefix, int flags) {
 242         bool again;
 243         int n = 0, r;
 244
 245         /* Try to umount everything recursively below a
 246          * directory. Also, take care of stacked mounts, and keep
 247          * unmounting them until they are gone. */
 248
 249         do {
 250                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 251
 252                 again = false;
 253                 r = 0;
 254
 255                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 256                 if (!proc_self_mountinfo)
 257                         return -errno;
 258
 259                 for (;;) {
 260                         _cleanup_free_ char *path = NULL, *p = NULL;
 261                         int k;
 262
 263                         k = fscanf(proc_self_mountinfo,
 264                                    "%*s "       /* (1) mount id */
 265                                    "%*s "       /* (2) parent id */
 266                                    "%*s "       /* (3) major:minor */
 267                                    "%*s "       /* (4) root */
 268                                    "%ms "       /* (5) mount point */
 269                                    "%*s"        /* (6) mount options */
 270                                    "%*[^-]"     /* (7) optional fields */
 271                                    "- "         /* (8) separator */
 272                                    "%*s "       /* (9) file system type */
 273                                    "%*s"        /* (10) mount source */
 274                                    "%*s"        /* (11) mount options 2 */
 275                                    "%*[^\n]",   /* some rubbish at the end */
 276                                    &path);
 277                         if (k != 1) {
 278                                 if (k == EOF)
 279                                         break;
 280
 281                                 continue;
 282                         }
 283
 284                         r = cunescape(path, UNESCAPE_RELAX, &p);
 285                         if (r < 0)
 286                                 return r;
 287
 288                         if (!path_startswith(p, prefix))
 289                                 continue;
 290
 291                         if (umount2(p, flags) < 0) {
 292                                 r = -errno;
 293                                 continue;
 294                         }
 295
 296                         again = true;
 297                         n++;
 298
 299                         break;
 300                 }
 301
 302         } while (again);
 303
 304         return r ? r : n;
 305 }
 306
 307 static int get_mount_flags(const char *path, unsigned long *flags) {
 308         struct statvfs buf;
 309
 310         if (statvfs(path, &buf) < 0)
 311                 return -errno;
 312         *flags = buf.f_flag;
 313         return 0;
 314 }
 315
 316 int bind_remount_recursive(const char *prefix, bool ro) {
 317         _cleanup_set_free_free_ Set *done = NULL;
 318         _cleanup_free_ char *cleaned = NULL;
 319         int r;
 320
 321         /* Recursively remount a directory (and all its submounts)
 322          * read-only or read-write. If the directory is already
 323          * mounted, we reuse the mount and simply mark it
 324          * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 325          * operation). If it isn't we first make it one. Afterwards we
 326          * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
 327          * submounts we can access, too. When mounts are stacked on
 328          * the same mount point we only care for each individual
 329          * "top-level" mount on each point, as we cannot
 330          * influence/access the underlying mounts anyway. We do not
 331          * have any effect on future submounts that might get
 332          * propagated, they migt be writable. This includes future
 333          * submounts that have been triggered via autofs. */
 334
 335         cleaned = strdup(prefix);
 336         if (!cleaned)
 337                 return -ENOMEM;
 338
 339         path_kill_slashes(cleaned);
 340
 341         done = set_new(&string_hash_ops);
 342         if (!done)
 343                 return -ENOMEM;
 344
 345         for (;;) {
 346                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 347                 _cleanup_set_free_free_ Set *todo = NULL;
 348                 bool top_autofs = false;
 349                 char *x;
 350                 unsigned long orig_flags;
 351
 352                 todo = set_new(&string_hash_ops);
 353                 if (!todo)
 354                         return -ENOMEM;
 355
 356                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 357                 if (!proc_self_mountinfo)
 358                         return -errno;
 359
 360                 for (;;) {
 361                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 362                         int k;
 363
 364                         k = fscanf(proc_self_mountinfo,
 365                                    "%*s "       /* (1) mount id */
 366                                    "%*s "       /* (2) parent id */
 367                                    "%*s "       /* (3) major:minor */
 368                                    "%*s "       /* (4) root */
 369                                    "%ms "       /* (5) mount point */
 370                                    "%*s"        /* (6) mount options (superblock) */
 371                                    "%*[^-]"     /* (7) optional fields */
 372                                    "- "         /* (8) separator */
 373                                    "%ms "       /* (9) file system type */
 374                                    "%*s"        /* (10) mount source */
 375                                    "%*s"        /* (11) mount options (bind mount) */
 376                                    "%*[^\n]",   /* some rubbish at the end */
 377                                    &path,
 378                                    &type);
 379                         if (k != 2) {
 380                                 if (k == EOF)
 381                                         break;
 382
 383                                 continue;
 384                         }
 385
 386                         r = cunescape(path, UNESCAPE_RELAX, &p);
 387                         if (r < 0)
 388                                 return r;
 389
 390                         /* Let's ignore autofs mounts.  If they aren't
 391                          * triggered yet, we want to avoid triggering
 392                          * them, as we don't make any guarantees for
 393                          * future submounts anyway.  If they are
 394                          * already triggered, then we will find
 395                          * another entry for this. */
 396                         if (streq(type, "autofs")) {
 397                                 top_autofs = top_autofs || path_equal(cleaned, p);
 398                                 continue;
 399                         }
 400
 401                         if (path_startswith(p, cleaned) &&
 402                             !set_contains(done, p)) {
 403
 404                                 r = set_consume(todo, p);
 405                                 p = NULL;
 406
 407                                 if (r == -EEXIST)
 408                                         continue;
 409                                 if (r < 0)
 410                                         return r;
 411                         }
 412                 }
 413
 414                 /* If we have no submounts to process anymore and if
 415                  * the root is either already done, or an autofs, we
 416                  * are done */
 417                 if (set_isempty(todo) &&
 418                     (top_autofs || set_contains(done, cleaned)))
 419                         return 0;
 420
 421                 if (!set_contains(done, cleaned) &&
 422                     !set_contains(todo, cleaned)) {
 423                         /* The prefix directory itself is not yet a
 424                          * mount, make it one. */
 425                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 426                                 return -errno;
 427
 428                         orig_flags = 0;
 429                         (void) get_mount_flags(cleaned, &orig_flags);
 430                         orig_flags &= ~MS_RDONLY;
 431
 432                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 433                                 return -errno;
 434
 435                         x = strdup(cleaned);
 436                         if (!x)
 437                                 return -ENOMEM;
 438
 439                         r = set_consume(done, x);
 440                         if (r < 0)
 441                                 return r;
 442                 }
 443
 444                 while ((x = set_steal_first(todo))) {
 445
 446                         r = set_consume(done, x);
 447                         if (r == -EEXIST || r == 0)
 448                                 continue;
 449                         if (r < 0)
 450                                 return r;
 451
 452                         /* Try to reuse the original flag set, but
 453                          * don't care for errors, in case of
 454                          * obstructed mounts */
 455                         orig_flags = 0;
 456                         (void) get_mount_flags(x, &orig_flags);
 457                         orig_flags &= ~MS_RDONLY;
 458
 459                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0) {
 460
 461                                 /* Deal with mount points that are
 462                                  * obstructed by a later mount */
 463
 464                                 if (errno != ENOENT)
 465                                         return -errno;
 466                         }
 467
 468                 }
 469         }
 470 }
 471
 472 int mount_move_root(const char *path) {
 473         assert(path);
 474
 475         if (chdir(path) < 0)
 476                 return -errno;
 477
 478         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 479                 return -errno;
 480
 481         if (chroot(".") < 0)
 482                 return -errno;
 483
 484         if (chdir("/") < 0)
 485                 return -errno;
 486
 487         return 0;
 488 }
 489
 490 bool fstype_is_network(const char *fstype) {
 491         static const char table[] =
 492                 "afs\0"
 493                 "cifs\0"
 494                 "smbfs\0"
 495                 "sshfs\0"
 496                 "ncpfs\0"
 497                 "ncp\0"
 498                 "nfs\0"
 499                 "nfs4\0"
 500                 "gfs\0"
 501                 "gfs2\0"
 502                 "glusterfs\0"
 503                 "pvfs2\0" /* OrangeFS */
 504                 "ocfs2\0"
 505                 ;
 506
 507         const char *x;
 508
 509         x = startswith(fstype, "fuse.");
 510         if (x)
 511                 fstype = x;
 512
 513         return nulstr_contains(table, fstype);
 514 }
 515
 516 int repeat_unmount(const char *path, int flags) {
 517         bool done = false;
 518
 519         assert(path);
 520
 521         /* If there are multiple mounts on a mount point, this
 522          * removes them all */
 523
 524         for (;;) {
 525                 if (umount2(path, flags) < 0) {
 526
 527                         if (errno == EINVAL)
 528                                 return done;
 529
 530                         return -errno;
 531                 }
 532
 533                 done = true;
 534         }
 535 }
 536 #endif // 0