src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2010 Lennart Poettering
   4 ***/
   5
   6 #include <errno.h>
   7 //#include <stdio_ext.h>
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <sys/mount.h>
  11 #include <sys/stat.h>
  12 #include <sys/statvfs.h>
  13 #include <unistd.h>
  14
  15 /* Include later */
  16 //#include <libmount.h>
  17
  18 #include "alloc-util.h"
  19 #include "escape.h"
  20 //#include "extract-word.h"
  21 #include "fd-util.h"
  22 #include "fileio.h"
  23 #include "fs-util.h"
  24 #include "hashmap.h"
  25 #include "mount-util.h"
  26 #include "parse-util.h"
  27 #include "path-util.h"
  28 #include "set.h"
  29 #include "stdio-util.h"
  30 #include "string-util.h"
  31 #include "strv.h"
  32
  33 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  34  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  35  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  36  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  37  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  38  * with large file handles anyway. */
  39 #define ORIGINAL_MAX_HANDLE_SZ 128
  40
  41 int name_to_handle_at_loop(
  42                 int fd,
  43                 const char *path,
  44                 struct file_handle **ret_handle,
  45                 int *ret_mnt_id,
  46                 int flags) {
  47
  48         _cleanup_free_ struct file_handle *h = NULL;
  49         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  50
  51         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  52          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  53          * start value, it is not an upper bound on the buffer size required.
  54          *
  55          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  56          * as NULL if there's no interest in either. */
  57
  58         for (;;) {
  59                 int mnt_id = -1;
  60
  61                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  62                 if (!h)
  63                         return -ENOMEM;
  64
  65                 h->handle_bytes = n;
  66
  67                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  68
  69                         if (ret_handle)
  70                                 *ret_handle = TAKE_PTR(h);
  71
  72                         if (ret_mnt_id)
  73                                 *ret_mnt_id = mnt_id;
  74
  75                         return 0;
  76                 }
  77                 if (errno != EOVERFLOW)
  78                         return -errno;
  79
  80                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  81
  82                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  83                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  84                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  85
  86                         *ret_mnt_id = mnt_id;
  87                         return 0;
  88                 }
  89
  90                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
  91                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
  92                  * buffer. In that case propagate EOVERFLOW */
  93                 if (h->handle_bytes <= n)
  94                         return -EOVERFLOW;
  95
  96                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
  97                 n = h->handle_bytes;
  98                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
  99                         return -EOVERFLOW;
 100
 101                 h = mfree(h);
 102         }
 103 }
 104
 105 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 106         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 107         _cleanup_free_ char *fdinfo = NULL;
 108         _cleanup_close_ int subfd = -1;
 109         char *p;
 110         int r;
 111
 112         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 113                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 114         else {
 115                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 116                 if (subfd < 0)
 117                         return -errno;
 118
 119                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 120         }
 121
 122         r = read_full_file(path, &fdinfo, NULL);
 123         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 124                 return -EOPNOTSUPP;
 125         if (r < 0)
 126                 return r;
 127
 128         p = startswith(fdinfo, "mnt_id:");
 129         if (!p) {
 130                 p = strstr(fdinfo, "\nmnt_id:");
 131                 if (!p) /* The mnt_id field is a relatively new addition */
 132                         return -EOPNOTSUPP;
 133
 134                 p += 8;
 135         }
 136
 137         p += strspn(p, WHITESPACE);
 138         p[strcspn(p, WHITESPACE)] = 0;
 139
 140         return safe_atoi(p, mnt_id);
 141 }
 142
 143 int fd_is_mount_point(int fd, const char *filename, int flags) {
 144         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 145         int mount_id = -1, mount_id_parent = -1;
 146         bool nosupp = false, check_st_dev = true;
 147         struct stat a, b;
 148         int r;
 149
 150         assert(fd >= 0);
 151         assert(filename);
 152
 153         /* First we will try the name_to_handle_at() syscall, which
 154          * tells us the mount id and an opaque file "handle". It is
 155          * not supported everywhere though (kernel compile-time
 156          * option, not all file systems are hooked up). If it works
 157          * the mount id is usually good enough to tell us whether
 158          * something is a mount point.
 159          *
 160          * If that didn't work we will try to read the mount id from
 161          * /proc/self/fdinfo/<fd>. This is almost as good as
 162          * name_to_handle_at(), however, does not return the
 163          * opaque file handle. The opaque file handle is pretty useful
 164          * to detect the root directory, which we should always
 165          * consider a mount point. Hence we use this only as
 166          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 167          * kernel addition.
 168          *
 169          * As last fallback we do traditional fstat() based st_dev
 170          * comparisons. This is how things were traditionally done,
 171          * but unionfs breaks this since it exposes file
 172          * systems with a variety of st_dev reported. Also, btrfs
 173          * subvolumes have different st_dev, even though they aren't
 174          * real mounts of their own. */
 175
 176         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 177         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 178                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 179                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 180                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 181                  * (EINVAL): fall back to simpler logic. */
 182                 goto fallback_fdinfo;
 183         else if (r == -EOPNOTSUPP)
 184                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 185                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 186                  * logic */
 187                 nosupp = true;
 188         else if (r < 0)
 189                 return r;
 190
 191         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 192         if (r == -EOPNOTSUPP) {
 193                 if (nosupp)
 194                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 195                         goto fallback_fdinfo;
 196                 else
 197                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 198                          * it must be a mount point. */
 199                         return 1;
 200         } else if (r < 0)
 201                 return r;
 202
 203         /* The parent can do name_to_handle_at() but the
 204          * directory we are interested in can't? If so, it
 205          * must be a mount point. */
 206         if (nosupp)
 207                 return 1;
 208
 209         /* If the file handle for the directory we are
 210          * interested in and its parent are identical, we
 211          * assume this is the root directory, which is a mount
 212          * point. */
 213
 214         if (h->handle_bytes == h_parent->handle_bytes &&
 215             h->handle_type == h_parent->handle_type &&
 216             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 217                 return 1;
 218
 219         return mount_id != mount_id_parent;
 220
 221 fallback_fdinfo:
 222         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 223         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 224                 goto fallback_fstat;
 225         if (r < 0)
 226                 return r;
 227
 228         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 229         if (r < 0)
 230                 return r;
 231
 232         if (mount_id != mount_id_parent)
 233                 return 1;
 234
 235         /* Hmm, so, the mount ids are the same. This leaves one
 236          * special case though for the root file system. For that,
 237          * let's see if the parent directory has the same inode as we
 238          * are interested in. Hence, let's also do fstat() checks now,
 239          * too, but avoid the st_dev comparisons, since they aren't
 240          * that useful on unionfs mounts. */
 241         check_st_dev = false;
 242
 243 fallback_fstat:
 244         /* yay for fstatat() taking a different set of flags than the other
 245          * _at() above */
 246         if (flags & AT_SYMLINK_FOLLOW)
 247                 flags &= ~AT_SYMLINK_FOLLOW;
 248         else
 249                 flags |= AT_SYMLINK_NOFOLLOW;
 250         if (fstatat(fd, filename, &a, flags) < 0)
 251                 return -errno;
 252
 253         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 254                 return -errno;
 255
 256         /* A directory with same device and inode as its parent? Must
 257          * be the root directory */
 258         if (a.st_dev == b.st_dev &&
 259             a.st_ino == b.st_ino)
 260                 return 1;
 261
 262         return check_st_dev && (a.st_dev != b.st_dev);
 263 }
 264
 265 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 266 int path_is_mount_point(const char *t, const char *root, int flags) {
 267         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 268         _cleanup_close_ int fd = -1;
 269         int r;
 270
 271         assert(t);
 272         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 273
 274         if (path_equal(t, "/"))
 275                 return 1;
 276
 277         /* we need to resolve symlinks manually, we can't just rely on
 278          * fd_is_mount_point() to do that for us; if we have a structure like
 279          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 280          * look at needs to be /usr, not /. */
 281         if (flags & AT_SYMLINK_FOLLOW) {
 282                 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
 283                 if (r < 0)
 284                         return r;
 285
 286                 t = canonical;
 287         }
 288
 289         parent = dirname_malloc(t);
 290         if (!parent)
 291                 return -ENOMEM;
 292
 293         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 294         if (fd < 0)
 295                 return -errno;
 296
 297         return fd_is_mount_point(fd, last_path_component(t), flags);
 298 }
 299
 300 int path_get_mnt_id(const char *path, int *ret) {
 301         int r;
 302
 303         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 304         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 305                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 306
 307         return r;
 308 }
 309
 310 #if 0 /// UNNEEDED by elogind
 311 int umount_recursive(const char *prefix, int flags) {
 312         bool again;
 313         int n = 0, r;
 314
 315         /* Try to umount everything recursively below a
 316          * directory. Also, take care of stacked mounts, and keep
 317          * unmounting them until they are gone. */
 318
 319         do {
 320                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 321
 322                 again = false;
 323                 r = 0;
 324
 325                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 326                 if (!proc_self_mountinfo)
 327                         return -errno;
 328
 329                 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 330
 331                 for (;;) {
 332                         _cleanup_free_ char *path = NULL, *p = NULL;
 333                         int k;
 334
 335                         k = fscanf(proc_self_mountinfo,
 336                                    "%*s "       /* (1) mount id */
 337                                    "%*s "       /* (2) parent id */
 338                                    "%*s "       /* (3) major:minor */
 339                                    "%*s "       /* (4) root */
 340                                    "%ms "       /* (5) mount point */
 341                                    "%*s"        /* (6) mount options */
 342                                    "%*[^-]"     /* (7) optional fields */
 343                                    "- "         /* (8) separator */
 344                                    "%*s "       /* (9) file system type */
 345                                    "%*s"        /* (10) mount source */
 346                                    "%*s"        /* (11) mount options 2 */
 347                                    "%*[^\n]",   /* some rubbish at the end */
 348                                    &path);
 349                         if (k != 1) {
 350                                 if (k == EOF)
 351                                         break;
 352
 353                                 continue;
 354                         }
 355
 356                         r = cunescape(path, UNESCAPE_RELAX, &p);
 357                         if (r < 0)
 358                                 return r;
 359
 360                         if (!path_startswith(p, prefix))
 361                                 continue;
 362
 363                         if (umount2(p, flags) < 0) {
 364                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 365                                 continue;
 366                         }
 367
 368                         log_debug("Successfully unmounted %s", p);
 369
 370                         again = true;
 371                         n++;
 372
 373                         break;
 374                 }
 375
 376         } while (again);
 377
 378         return r ? r : n;
 379 }
 380
 381 static int get_mount_flags(const char *path, unsigned long *flags) {
 382         struct statvfs buf;
 383
 384         if (statvfs(path, &buf) < 0)
 385                 return -errno;
 386         *flags = buf.f_flag;
 387         return 0;
 388 }
 389
 390 /* Use this function only if do you have direct access to /proc/self/mountinfo
 391  * and need the caller to open it for you. This is the case when /proc is
 392  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 393 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 394         _cleanup_set_free_free_ Set *done = NULL;
 395         _cleanup_free_ char *cleaned = NULL;
 396         int r;
 397
 398         assert(proc_self_mountinfo);
 399
 400         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 401          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 402          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 403          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 404          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 405          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 406          * future submounts that have been triggered via autofs.
 407          *
 408          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 409          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 410
 411         cleaned = strdup(prefix);
 412         if (!cleaned)
 413                 return -ENOMEM;
 414
 415         path_simplify(cleaned, false);
 416
 417         done = set_new(&path_hash_ops);
 418         if (!done)
 419                 return -ENOMEM;
 420
 421         for (;;) {
 422                 _cleanup_set_free_free_ Set *todo = NULL;
 423                 bool top_autofs = false;
 424                 char *x;
 425                 unsigned long orig_flags;
 426
 427                 todo = set_new(&path_hash_ops);
 428                 if (!todo)
 429                         return -ENOMEM;
 430
 431                 rewind(proc_self_mountinfo);
 432
 433                 for (;;) {
 434                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 435                         int k;
 436
 437                         k = fscanf(proc_self_mountinfo,
 438                                    "%*s "       /* (1) mount id */
 439                                    "%*s "       /* (2) parent id */
 440                                    "%*s "       /* (3) major:minor */
 441                                    "%*s "       /* (4) root */
 442                                    "%ms "       /* (5) mount point */
 443                                    "%*s"        /* (6) mount options (superblock) */
 444                                    "%*[^-]"     /* (7) optional fields */
 445                                    "- "         /* (8) separator */
 446                                    "%ms "       /* (9) file system type */
 447                                    "%*s"        /* (10) mount source */
 448                                    "%*s"        /* (11) mount options (bind mount) */
 449                                    "%*[^\n]",   /* some rubbish at the end */
 450                                    &path,
 451                                    &type);
 452                         if (k != 2) {
 453                                 if (k == EOF)
 454                                         break;
 455
 456                                 continue;
 457                         }
 458
 459                         r = cunescape(path, UNESCAPE_RELAX, &p);
 460                         if (r < 0)
 461                                 return r;
 462
 463                         if (!path_startswith(p, cleaned))
 464                                 continue;
 465
 466                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 467                          * operate on. */
 468                         if (!path_equal(cleaned, p)) {
 469                                 bool blacklisted = false;
 470                                 char **i;
 471
 472                                 STRV_FOREACH(i, blacklist) {
 473
 474                                         if (path_equal(*i, cleaned))
 475                                                 continue;
 476
 477                                         if (!path_startswith(*i, cleaned))
 478                                                 continue;
 479
 480                                         if (path_startswith(p, *i)) {
 481                                                 blacklisted = true;
 482                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 483                                                 break;
 484                                         }
 485                                 }
 486                                 if (blacklisted)
 487                                         continue;
 488                         }
 489
 490                         /* Let's ignore autofs mounts.  If they aren't
 491                          * triggered yet, we want to avoid triggering
 492                          * them, as we don't make any guarantees for
 493                          * future submounts anyway.  If they are
 494                          * already triggered, then we will find
 495                          * another entry for this. */
 496                         if (streq(type, "autofs")) {
 497                                 top_autofs = top_autofs || path_equal(cleaned, p);
 498                                 continue;
 499                         }
 500
 501                         if (!set_contains(done, p)) {
 502                                 r = set_consume(todo, p);
 503                                 p = NULL;
 504                                 if (r == -EEXIST)
 505                                         continue;
 506                                 if (r < 0)
 507                                         return r;
 508                         }
 509                 }
 510
 511                 /* If we have no submounts to process anymore and if
 512                  * the root is either already done, or an autofs, we
 513                  * are done */
 514                 if (set_isempty(todo) &&
 515                     (top_autofs || set_contains(done, cleaned)))
 516                         return 0;
 517
 518                 if (!set_contains(done, cleaned) &&
 519                     !set_contains(todo, cleaned)) {
 520                         /* The prefix directory itself is not yet a mount, make it one. */
 521                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 522                                 return -errno;
 523
 524                         orig_flags = 0;
 525                         (void) get_mount_flags(cleaned, &orig_flags);
 526                         orig_flags &= ~MS_RDONLY;
 527
 528                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 529                                 return -errno;
 530
 531                         log_debug("Made top-level directory %s a mount point.", prefix);
 532
 533                         x = strdup(cleaned);
 534                         if (!x)
 535                                 return -ENOMEM;
 536
 537                         r = set_consume(done, x);
 538                         if (r < 0)
 539                                 return r;
 540                 }
 541
 542                 while ((x = set_steal_first(todo))) {
 543
 544                         r = set_consume(done, x);
 545                         if (IN_SET(r, 0, -EEXIST))
 546                                 continue;
 547                         if (r < 0)
 548                                 return r;
 549
 550                         /* Deal with mount points that are obstructed by a later mount */
 551                         r = path_is_mount_point(x, NULL, 0);
 552                         if (IN_SET(r, 0, -ENOENT))
 553                                 continue;
 554                         if (r < 0)
 555                                 return r;
 556
 557                         /* Try to reuse the original flag set */
 558                         orig_flags = 0;
 559                         (void) get_mount_flags(x, &orig_flags);
 560                         orig_flags &= ~MS_RDONLY;
 561
 562                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 563                                 return -errno;
 564
 565                         log_debug("Remounted %s read-only.", x);
 566                 }
 567         }
 568 }
 569
 570 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 571         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 572
 573         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 574         if (!proc_self_mountinfo)
 575                 return -errno;
 576
 577         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 578
 579         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 580 }
 581
 582 int mount_move_root(const char *path) {
 583         assert(path);
 584
 585         if (chdir(path) < 0)
 586                 return -errno;
 587
 588         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 589                 return -errno;
 590
 591         if (chroot(".") < 0)
 592                 return -errno;
 593
 594         if (chdir("/") < 0)
 595                 return -errno;
 596
 597         return 0;
 598 }
 599
 600 bool fstype_is_network(const char *fstype) {
 601         const char *x;
 602
 603         x = startswith(fstype, "fuse.");
 604         if (x)
 605                 fstype = x;
 606
 607         return STR_IN_SET(fstype,
 608                           "afs",
 609                           "cifs",
 610                           "smbfs",
 611                           "sshfs",
 612                           "ncpfs",
 613                           "ncp",
 614                           "nfs",
 615                           "nfs4",
 616                           "gfs",
 617                           "gfs2",
 618                           "glusterfs",
 619                           "pvfs2", /* OrangeFS */
 620                           "ocfs2",
 621                           "lustre");
 622 }
 623
 624 bool fstype_is_api_vfs(const char *fstype) {
 625         return STR_IN_SET(fstype,
 626                           "autofs",
 627                           "bpf",
 628                           "cgroup",
 629                           "cgroup2",
 630                           "configfs",
 631                           "cpuset",
 632                           "debugfs",
 633                           "devpts",
 634                           "devtmpfs",
 635                           "efivarfs",
 636                           "fusectl",
 637                           "hugetlbfs",
 638                           "mqueue",
 639                           "proc",
 640                           "pstore",
 641                           "ramfs",
 642                           "securityfs",
 643                           "sysfs",
 644                           "tmpfs",
 645                           "tracefs");
 646 }
 647
 648 bool fstype_is_ro(const char *fstype) {
 649         /* All Linux file systems that are necessarily read-only */
 650         return STR_IN_SET(fstype,
 651                           "DM_verity_hash",
 652                           "iso9660",
 653                           "squashfs");
 654 }
 655
 656 bool fstype_can_discard(const char *fstype) {
 657         return STR_IN_SET(fstype,
 658                           "btrfs",
 659                           "ext4",
 660                           "vfat",
 661                           "xfs");
 662 }
 663
 664 bool fstype_can_uid_gid(const char *fstype) {
 665
 666         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 667          * current and future. */
 668
 669         return STR_IN_SET(fstype,
 670                           "adfs",
 671                           "fat",
 672                           "hfs",
 673                           "hpfs",
 674                           "iso9660",
 675                           "msdos",
 676                           "ntfs",
 677                           "vfat");
 678 }
 679
 680 int repeat_unmount(const char *path, int flags) {
 681         bool done = false;
 682
 683         assert(path);
 684
 685         /* If there are multiple mounts on a mount point, this
 686          * removes them all */
 687
 688         for (;;) {
 689                 if (umount2(path, flags) < 0) {
 690
 691                         if (errno == EINVAL)
 692                                 return done;
 693
 694                         return -errno;
 695                 }
 696
 697                 done = true;
 698         }
 699 }
 700 #endif // 0
 701
 702 const char* mode_to_inaccessible_node(mode_t mode) {
 703         /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
 704          * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
 705          * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
 706          * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
 707          * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
 708          * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
 709
 710         switch(mode & S_IFMT) {
 711                 case S_IFREG:
 712                         return "/run/systemd/inaccessible/reg";
 713
 714                 case S_IFDIR:
 715                         return "/run/systemd/inaccessible/dir";
 716
 717                 case S_IFCHR:
 718                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 719                                 return "/run/systemd/inaccessible/chr";
 720                         return "/run/systemd/inaccessible/sock";
 721
 722                 case S_IFBLK:
 723                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 724                                 return "/run/systemd/inaccessible/blk";
 725                         return "/run/systemd/inaccessible/sock";
 726
 727                 case S_IFIFO:
 728                         return "/run/systemd/inaccessible/fifo";
 729
 730                 case S_IFSOCK:
 731                         return "/run/systemd/inaccessible/sock";
 732         }
 733         return NULL;
 734 }
 735
 736 #if 0 /// UNNEEDED by elogind
 737 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 738 static char* mount_flags_to_string(long unsigned flags) {
 739         char *x;
 740         _cleanup_free_ char *y = NULL;
 741         long unsigned overflow;
 742
 743         overflow = flags & ~(MS_RDONLY |
 744                              MS_NOSUID |
 745                              MS_NODEV |
 746                              MS_NOEXEC |
 747                              MS_SYNCHRONOUS |
 748                              MS_REMOUNT |
 749                              MS_MANDLOCK |
 750                              MS_DIRSYNC |
 751                              MS_NOATIME |
 752                              MS_NODIRATIME |
 753                              MS_BIND |
 754                              MS_MOVE |
 755                              MS_REC |
 756                              MS_SILENT |
 757                              MS_POSIXACL |
 758                              MS_UNBINDABLE |
 759                              MS_PRIVATE |
 760                              MS_SLAVE |
 761                              MS_SHARED |
 762                              MS_RELATIME |
 763                              MS_KERNMOUNT |
 764                              MS_I_VERSION |
 765                              MS_STRICTATIME |
 766                              MS_LAZYTIME);
 767
 768         if (flags == 0 || overflow != 0)
 769                 if (asprintf(&y, "%lx", overflow) < 0)
 770                         return NULL;
 771
 772         x = strjoin(FLAG(MS_RDONLY),
 773                     FLAG(MS_NOSUID),
 774                     FLAG(MS_NODEV),
 775                     FLAG(MS_NOEXEC),
 776                     FLAG(MS_SYNCHRONOUS),
 777                     FLAG(MS_REMOUNT),
 778                     FLAG(MS_MANDLOCK),
 779                     FLAG(MS_DIRSYNC),
 780                     FLAG(MS_NOATIME),
 781                     FLAG(MS_NODIRATIME),
 782                     FLAG(MS_BIND),
 783                     FLAG(MS_MOVE),
 784                     FLAG(MS_REC),
 785                     FLAG(MS_SILENT),
 786                     FLAG(MS_POSIXACL),
 787                     FLAG(MS_UNBINDABLE),
 788                     FLAG(MS_PRIVATE),
 789                     FLAG(MS_SLAVE),
 790                     FLAG(MS_SHARED),
 791                     FLAG(MS_RELATIME),
 792                     FLAG(MS_KERNMOUNT),
 793                     FLAG(MS_I_VERSION),
 794                     FLAG(MS_STRICTATIME),
 795                     FLAG(MS_LAZYTIME),
 796                     y);
 797         if (!x)
 798                 return NULL;
 799         if (!y)
 800                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 801         return x;
 802 }
 803
 804 int mount_verbose(
 805                 int error_log_level,
 806                 const char *what,
 807                 const char *where,
 808                 const char *type,
 809                 unsigned long flags,
 810                 const char *options) {
 811
 812         _cleanup_free_ char *fl = NULL, *o = NULL;
 813         unsigned long f;
 814         int r;
 815
 816         r = mount_option_mangle(options, flags, &f, &o);
 817         if (r < 0)
 818                 return log_full_errno(error_log_level, r,
 819                                       "Failed to mangle mount options %s: %m",
 820                                       strempty(options));
 821
 822         fl = mount_flags_to_string(f);
 823
 824         if ((f & MS_REMOUNT) && !what && !type)
 825                 log_debug("Remounting %s (%s \"%s\")...",
 826                           where, strnull(fl), strempty(o));
 827         else if (!what && !type)
 828                 log_debug("Mounting %s (%s \"%s\")...",
 829                           where, strnull(fl), strempty(o));
 830         else if ((f & MS_BIND) && !type)
 831                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 832                           what, where, strnull(fl), strempty(o));
 833         else if (f & MS_MOVE)
 834                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 835                           what, where, strnull(fl), strempty(o));
 836         else
 837                 log_debug("Mounting %s on %s (%s \"%s\")...",
 838                           strna(type), where, strnull(fl), strempty(o));
 839         if (mount(what, where, type, f, o) < 0)
 840                 return log_full_errno(error_log_level, errno,
 841                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 842                                       strna(type), where, strnull(fl), strempty(o));
 843         return 0;
 844 }
 845
 846 int umount_verbose(const char *what) {
 847         log_debug("Umounting %s...", what);
 848         if (umount(what) < 0)
 849                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 850         return 0;
 851 }
 852 #endif // 0
 853
 854 const char *mount_propagation_flags_to_string(unsigned long flags) {
 855
 856         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 857         case 0:
 858                 return "";
 859         case MS_SHARED:
 860                 return "shared";
 861         case MS_SLAVE:
 862                 return "slave";
 863         case MS_PRIVATE:
 864                 return "private";
 865         }
 866
 867         return NULL;
 868 }
 869
 870 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 871
 872         if (isempty(name))
 873                 *ret = 0;
 874         else if (streq(name, "shared"))
 875                 *ret = MS_SHARED;
 876         else if (streq(name, "slave"))
 877                 *ret = MS_SLAVE;
 878         else if (streq(name, "private"))
 879                 *ret = MS_PRIVATE;
 880         else
 881                 return -EINVAL;
 882         return 0;
 883 }
 884
 885 #if 0 /// UNNEEDED by elogind
 886 int mount_option_mangle(
 887                 const char *options,
 888                 unsigned long mount_flags,
 889                 unsigned long *ret_mount_flags,
 890                 char **ret_remaining_options) {
 891
 892         const struct libmnt_optmap *map;
 893         _cleanup_free_ char *ret = NULL;
 894         const char *p;
 895         int r;
 896
 897         /* This extracts mount flags from the mount options, and store
 898          * non-mount-flag options to '*ret_remaining_options'.
 899          * E.g.,
 900          * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
 901          * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
 902          * "size=1630748k,mode=700,uid=1000,gid=1000".
 903          * See more examples in test-mount-utils.c.
 904          *
 905          * Note that if 'options' does not contain any non-mount-flag options,
 906          * then '*ret_remaining_options' is set to NULL instread of empty string.
 907          * Note that this does not check validity of options stored in
 908          * '*ret_remaining_options'.
 909          * Note that if 'options' is NULL, then this just copies 'mount_flags'
 910          * to '*ret_mount_flags'. */
 911
 912         assert(ret_mount_flags);
 913         assert(ret_remaining_options);
 914
 915         map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
 916         if (!map)
 917                 return -EINVAL;
 918
 919         p = options;
 920         for (;;) {
 921                 _cleanup_free_ char *word = NULL;
 922                 const struct libmnt_optmap *ent;
 923
 924                 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
 925                 if (r < 0)
 926                         return r;
 927                 if (r == 0)
 928                         break;
 929
 930                 for (ent = map; ent->name; ent++) {
 931                         /* All entries in MNT_LINUX_MAP do not take any argument.
 932                          * Thus, ent->name does not contain "=" or "[=]". */
 933                         if (!streq(word, ent->name))
 934                                 continue;
 935
 936                         if (!(ent->mask & MNT_INVERT))
 937                                 mount_flags |= ent->id;
 938                         else if (mount_flags & ent->id)
 939                                 mount_flags ^= ent->id;
 940
 941                         break;
 942                 }
 943
 944                 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
 945                 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
 946                         return -ENOMEM;
 947         }
 948
 949         *ret_mount_flags = mount_flags;
 950         *ret_remaining_options = TAKE_PTR(ret);
 951
 952         return 0;
 953 }
 954 #endif // 0