src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6 ***/
   7
   8 #include <errno.h>
   9 //#include <stdio_ext.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <sys/mount.h>
  13 #include <sys/stat.h>
  14 #include <sys/statvfs.h>
  15 #include <unistd.h>
  16
  17 /* Include later */
  18 //#include <libmount.h>
  19
  20 #include "alloc-util.h"
  21 #include "escape.h"
  22 //#include "extract-word.h"
  23 #include "fd-util.h"
  24 #include "fileio.h"
  25 #include "fs-util.h"
  26 #include "hashmap.h"
  27 #include "mount-util.h"
  28 #include "parse-util.h"
  29 #include "path-util.h"
  30 #include "set.h"
  31 #include "stdio-util.h"
  32 #include "string-util.h"
  33 #include "strv.h"
  34
  35 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  36  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  37  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  38  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  39  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  40  * with large file handles anyway. */
  41 #define ORIGINAL_MAX_HANDLE_SZ 128
  42
  43 int name_to_handle_at_loop(
  44                 int fd,
  45                 const char *path,
  46                 struct file_handle **ret_handle,
  47                 int *ret_mnt_id,
  48                 int flags) {
  49
  50         _cleanup_free_ struct file_handle *h = NULL;
  51         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  52
  53         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  54          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  55          * start value, it is not an upper bound on the buffer size required.
  56          *
  57          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  58          * as NULL if there's no interest in either. */
  59
  60         for (;;) {
  61                 int mnt_id = -1;
  62
  63                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  64                 if (!h)
  65                         return -ENOMEM;
  66
  67                 h->handle_bytes = n;
  68
  69                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  70
  71                         if (ret_handle)
  72                                 *ret_handle = TAKE_PTR(h);
  73
  74                         if (ret_mnt_id)
  75                                 *ret_mnt_id = mnt_id;
  76
  77                         return 0;
  78                 }
  79                 if (errno != EOVERFLOW)
  80                         return -errno;
  81
  82                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  83
  84                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  85                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  86                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
  87
  88                         *ret_mnt_id = mnt_id;
  89                         return 0;
  90                 }
  91
  92                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
  93                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
  94                  * buffer. In that case propagate EOVERFLOW */
  95                 if (h->handle_bytes <= n)
  96                         return -EOVERFLOW;
  97
  98                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
  99                 n = h->handle_bytes;
 100                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
 101                         return -EOVERFLOW;
 102
 103                 h = mfree(h);
 104         }
 105 }
 106
 107 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 108         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 109         _cleanup_free_ char *fdinfo = NULL;
 110         _cleanup_close_ int subfd = -1;
 111         char *p;
 112         int r;
 113
 114         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 115                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 116         else {
 117                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 118                 if (subfd < 0)
 119                         return -errno;
 120
 121                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 122         }
 123
 124         r = read_full_file(path, &fdinfo, NULL);
 125         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 126                 return -EOPNOTSUPP;
 127         if (r < 0)
 128                 return r;
 129
 130         p = startswith(fdinfo, "mnt_id:");
 131         if (!p) {
 132                 p = strstr(fdinfo, "\nmnt_id:");
 133                 if (!p) /* The mnt_id field is a relatively new addition */
 134                         return -EOPNOTSUPP;
 135
 136                 p += 8;
 137         }
 138
 139         p += strspn(p, WHITESPACE);
 140         p[strcspn(p, WHITESPACE)] = 0;
 141
 142         return safe_atoi(p, mnt_id);
 143 }
 144
 145 int fd_is_mount_point(int fd, const char *filename, int flags) {
 146         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 147         int mount_id = -1, mount_id_parent = -1;
 148         bool nosupp = false, check_st_dev = true;
 149         struct stat a, b;
 150         int r;
 151
 152         assert(fd >= 0);
 153         assert(filename);
 154
 155         /* First we will try the name_to_handle_at() syscall, which
 156          * tells us the mount id and an opaque file "handle". It is
 157          * not supported everywhere though (kernel compile-time
 158          * option, not all file systems are hooked up). If it works
 159          * the mount id is usually good enough to tell us whether
 160          * something is a mount point.
 161          *
 162          * If that didn't work we will try to read the mount id from
 163          * /proc/self/fdinfo/<fd>. This is almost as good as
 164          * name_to_handle_at(), however, does not return the
 165          * opaque file handle. The opaque file handle is pretty useful
 166          * to detect the root directory, which we should always
 167          * consider a mount point. Hence we use this only as
 168          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 169          * kernel addition.
 170          *
 171          * As last fallback we do traditional fstat() based st_dev
 172          * comparisons. This is how things were traditionally done,
 173          * but unionfs breaks this since it exposes file
 174          * systems with a variety of st_dev reported. Also, btrfs
 175          * subvolumes have different st_dev, even though they aren't
 176          * real mounts of their own. */
 177
 178         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 179         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 180                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 181                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 182                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 183                  * (EINVAL): fall back to simpler logic. */
 184                 goto fallback_fdinfo;
 185         else if (r == -EOPNOTSUPP)
 186                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 187                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 188                  * logic */
 189                 nosupp = true;
 190         else if (r < 0)
 191                 return r;
 192
 193         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 194         if (r == -EOPNOTSUPP) {
 195                 if (nosupp)
 196                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 197                         goto fallback_fdinfo;
 198                 else
 199                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 200                          * it must be a mount point. */
 201                         return 1;
 202         } else if (r < 0)
 203                 return r;
 204
 205         /* The parent can do name_to_handle_at() but the
 206          * directory we are interested in can't? If so, it
 207          * must be a mount point. */
 208         if (nosupp)
 209                 return 1;
 210
 211         /* If the file handle for the directory we are
 212          * interested in and its parent are identical, we
 213          * assume this is the root directory, which is a mount
 214          * point. */
 215
 216         if (h->handle_bytes == h_parent->handle_bytes &&
 217             h->handle_type == h_parent->handle_type &&
 218             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 219                 return 1;
 220
 221         return mount_id != mount_id_parent;
 222
 223 fallback_fdinfo:
 224         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 225         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 226                 goto fallback_fstat;
 227         if (r < 0)
 228                 return r;
 229
 230         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 231         if (r < 0)
 232                 return r;
 233
 234         if (mount_id != mount_id_parent)
 235                 return 1;
 236
 237         /* Hmm, so, the mount ids are the same. This leaves one
 238          * special case though for the root file system. For that,
 239          * let's see if the parent directory has the same inode as we
 240          * are interested in. Hence, let's also do fstat() checks now,
 241          * too, but avoid the st_dev comparisons, since they aren't
 242          * that useful on unionfs mounts. */
 243         check_st_dev = false;
 244
 245 fallback_fstat:
 246         /* yay for fstatat() taking a different set of flags than the other
 247          * _at() above */
 248         if (flags & AT_SYMLINK_FOLLOW)
 249                 flags &= ~AT_SYMLINK_FOLLOW;
 250         else
 251                 flags |= AT_SYMLINK_NOFOLLOW;
 252         if (fstatat(fd, filename, &a, flags) < 0)
 253                 return -errno;
 254
 255         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 256                 return -errno;
 257
 258         /* A directory with same device and inode as its parent? Must
 259          * be the root directory */
 260         if (a.st_dev == b.st_dev &&
 261             a.st_ino == b.st_ino)
 262                 return 1;
 263
 264         return check_st_dev && (a.st_dev != b.st_dev);
 265 }
 266
 267 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 268 int path_is_mount_point(const char *t, const char *root, int flags) {
 269         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 270         _cleanup_close_ int fd = -1;
 271         int r;
 272
 273         assert(t);
 274         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 275
 276         if (path_equal(t, "/"))
 277                 return 1;
 278
 279         /* we need to resolve symlinks manually, we can't just rely on
 280          * fd_is_mount_point() to do that for us; if we have a structure like
 281          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 282          * look at needs to be /usr, not /. */
 283         if (flags & AT_SYMLINK_FOLLOW) {
 284                 r = chase_symlinks(t, root, CHASE_TRAIL_SLASH, &canonical);
 285                 if (r < 0)
 286                         return r;
 287
 288                 t = canonical;
 289         }
 290
 291         parent = dirname_malloc(t);
 292         if (!parent)
 293                 return -ENOMEM;
 294
 295         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 296         if (fd < 0)
 297                 return -errno;
 298
 299         return fd_is_mount_point(fd, last_path_component(t), flags);
 300 }
 301
 302 int path_get_mnt_id(const char *path, int *ret) {
 303         int r;
 304
 305         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 306         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 307                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 308
 309         return r;
 310 }
 311
 312 #if 0 /// UNNEEDED by elogind
 313 int umount_recursive(const char *prefix, int flags) {
 314         bool again;
 315         int n = 0, r;
 316
 317         /* Try to umount everything recursively below a
 318          * directory. Also, take care of stacked mounts, and keep
 319          * unmounting them until they are gone. */
 320
 321         do {
 322                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 323
 324                 again = false;
 325                 r = 0;
 326
 327                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 328                 if (!proc_self_mountinfo)
 329                         return -errno;
 330
 331                 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 332
 333                 for (;;) {
 334                         _cleanup_free_ char *path = NULL, *p = NULL;
 335                         int k;
 336
 337                         k = fscanf(proc_self_mountinfo,
 338                                    "%*s "       /* (1) mount id */
 339                                    "%*s "       /* (2) parent id */
 340                                    "%*s "       /* (3) major:minor */
 341                                    "%*s "       /* (4) root */
 342                                    "%ms "       /* (5) mount point */
 343                                    "%*s"        /* (6) mount options */
 344                                    "%*[^-]"     /* (7) optional fields */
 345                                    "- "         /* (8) separator */
 346                                    "%*s "       /* (9) file system type */
 347                                    "%*s"        /* (10) mount source */
 348                                    "%*s"        /* (11) mount options 2 */
 349                                    "%*[^\n]",   /* some rubbish at the end */
 350                                    &path);
 351                         if (k != 1) {
 352                                 if (k == EOF)
 353                                         break;
 354
 355                                 continue;
 356                         }
 357
 358                         r = cunescape(path, UNESCAPE_RELAX, &p);
 359                         if (r < 0)
 360                                 return r;
 361
 362                         if (!path_startswith(p, prefix))
 363                                 continue;
 364
 365                         if (umount2(p, flags) < 0) {
 366                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 367                                 continue;
 368                         }
 369
 370                         log_debug("Successfully unmounted %s", p);
 371
 372                         again = true;
 373                         n++;
 374
 375                         break;
 376                 }
 377
 378         } while (again);
 379
 380         return r ? r : n;
 381 }
 382
 383 static int get_mount_flags(const char *path, unsigned long *flags) {
 384         struct statvfs buf;
 385
 386         if (statvfs(path, &buf) < 0)
 387                 return -errno;
 388         *flags = buf.f_flag;
 389         return 0;
 390 }
 391
 392 /* Use this function only if do you have direct access to /proc/self/mountinfo
 393  * and need the caller to open it for you. This is the case when /proc is
 394  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 395 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 396         _cleanup_set_free_free_ Set *done = NULL;
 397         _cleanup_free_ char *cleaned = NULL;
 398         int r;
 399
 400         assert(proc_self_mountinfo);
 401
 402         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 403          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 404          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 405          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 406          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 407          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 408          * future submounts that have been triggered via autofs.
 409          *
 410          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 411          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 412
 413         cleaned = strdup(prefix);
 414         if (!cleaned)
 415                 return -ENOMEM;
 416
 417         path_simplify(cleaned, false);
 418
 419         done = set_new(&path_hash_ops);
 420         if (!done)
 421                 return -ENOMEM;
 422
 423         for (;;) {
 424                 _cleanup_set_free_free_ Set *todo = NULL;
 425                 bool top_autofs = false;
 426                 char *x;
 427                 unsigned long orig_flags;
 428
 429                 todo = set_new(&path_hash_ops);
 430                 if (!todo)
 431                         return -ENOMEM;
 432
 433                 rewind(proc_self_mountinfo);
 434
 435                 for (;;) {
 436                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 437                         int k;
 438
 439                         k = fscanf(proc_self_mountinfo,
 440                                    "%*s "       /* (1) mount id */
 441                                    "%*s "       /* (2) parent id */
 442                                    "%*s "       /* (3) major:minor */
 443                                    "%*s "       /* (4) root */
 444                                    "%ms "       /* (5) mount point */
 445                                    "%*s"        /* (6) mount options (superblock) */
 446                                    "%*[^-]"     /* (7) optional fields */
 447                                    "- "         /* (8) separator */
 448                                    "%ms "       /* (9) file system type */
 449                                    "%*s"        /* (10) mount source */
 450                                    "%*s"        /* (11) mount options (bind mount) */
 451                                    "%*[^\n]",   /* some rubbish at the end */
 452                                    &path,
 453                                    &type);
 454                         if (k != 2) {
 455                                 if (k == EOF)
 456                                         break;
 457
 458                                 continue;
 459                         }
 460
 461                         r = cunescape(path, UNESCAPE_RELAX, &p);
 462                         if (r < 0)
 463                                 return r;
 464
 465                         if (!path_startswith(p, cleaned))
 466                                 continue;
 467
 468                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 469                          * operate on. */
 470                         if (!path_equal(cleaned, p)) {
 471                                 bool blacklisted = false;
 472                                 char **i;
 473
 474                                 STRV_FOREACH(i, blacklist) {
 475
 476                                         if (path_equal(*i, cleaned))
 477                                                 continue;
 478
 479                                         if (!path_startswith(*i, cleaned))
 480                                                 continue;
 481
 482                                         if (path_startswith(p, *i)) {
 483                                                 blacklisted = true;
 484                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 485                                                 break;
 486                                         }
 487                                 }
 488                                 if (blacklisted)
 489                                         continue;
 490                         }
 491
 492                         /* Let's ignore autofs mounts.  If they aren't
 493                          * triggered yet, we want to avoid triggering
 494                          * them, as we don't make any guarantees for
 495                          * future submounts anyway.  If they are
 496                          * already triggered, then we will find
 497                          * another entry for this. */
 498                         if (streq(type, "autofs")) {
 499                                 top_autofs = top_autofs || path_equal(cleaned, p);
 500                                 continue;
 501                         }
 502
 503                         if (!set_contains(done, p)) {
 504                                 r = set_consume(todo, p);
 505                                 p = NULL;
 506                                 if (r == -EEXIST)
 507                                         continue;
 508                                 if (r < 0)
 509                                         return r;
 510                         }
 511                 }
 512
 513                 /* If we have no submounts to process anymore and if
 514                  * the root is either already done, or an autofs, we
 515                  * are done */
 516                 if (set_isempty(todo) &&
 517                     (top_autofs || set_contains(done, cleaned)))
 518                         return 0;
 519
 520                 if (!set_contains(done, cleaned) &&
 521                     !set_contains(todo, cleaned)) {
 522                         /* The prefix directory itself is not yet a mount, make it one. */
 523                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 524                                 return -errno;
 525
 526                         orig_flags = 0;
 527                         (void) get_mount_flags(cleaned, &orig_flags);
 528                         orig_flags &= ~MS_RDONLY;
 529
 530                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 531                                 return -errno;
 532
 533                         log_debug("Made top-level directory %s a mount point.", prefix);
 534
 535                         x = strdup(cleaned);
 536                         if (!x)
 537                                 return -ENOMEM;
 538
 539                         r = set_consume(done, x);
 540                         if (r < 0)
 541                                 return r;
 542                 }
 543
 544                 while ((x = set_steal_first(todo))) {
 545
 546                         r = set_consume(done, x);
 547                         if (IN_SET(r, 0, -EEXIST))
 548                                 continue;
 549                         if (r < 0)
 550                                 return r;
 551
 552                         /* Deal with mount points that are obstructed by a later mount */
 553                         r = path_is_mount_point(x, NULL, 0);
 554                         if (IN_SET(r, 0, -ENOENT))
 555                                 continue;
 556                         if (r < 0)
 557                                 return r;
 558
 559                         /* Try to reuse the original flag set */
 560                         orig_flags = 0;
 561                         (void) get_mount_flags(x, &orig_flags);
 562                         orig_flags &= ~MS_RDONLY;
 563
 564                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 565                                 return -errno;
 566
 567                         log_debug("Remounted %s read-only.", x);
 568                 }
 569         }
 570 }
 571
 572 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 573         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 574
 575         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 576         if (!proc_self_mountinfo)
 577                 return -errno;
 578
 579         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 580
 581         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 582 }
 583
 584 int mount_move_root(const char *path) {
 585         assert(path);
 586
 587         if (chdir(path) < 0)
 588                 return -errno;
 589
 590         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 591                 return -errno;
 592
 593         if (chroot(".") < 0)
 594                 return -errno;
 595
 596         if (chdir("/") < 0)
 597                 return -errno;
 598
 599         return 0;
 600 }
 601
 602 bool fstype_is_network(const char *fstype) {
 603         const char *x;
 604
 605         x = startswith(fstype, "fuse.");
 606         if (x)
 607                 fstype = x;
 608
 609         return STR_IN_SET(fstype,
 610                           "afs",
 611                           "cifs",
 612                           "smbfs",
 613                           "sshfs",
 614                           "ncpfs",
 615                           "ncp",
 616                           "nfs",
 617                           "nfs4",
 618                           "gfs",
 619                           "gfs2",
 620                           "glusterfs",
 621                           "pvfs2", /* OrangeFS */
 622                           "ocfs2",
 623                           "lustre");
 624 }
 625
 626 bool fstype_is_api_vfs(const char *fstype) {
 627         return STR_IN_SET(fstype,
 628                           "autofs",
 629                           "bpf",
 630                           "cgroup",
 631                           "cgroup2",
 632                           "configfs",
 633                           "cpuset",
 634                           "debugfs",
 635                           "devpts",
 636                           "devtmpfs",
 637                           "efivarfs",
 638                           "fusectl",
 639                           "hugetlbfs",
 640                           "mqueue",
 641                           "proc",
 642                           "pstore",
 643                           "ramfs",
 644                           "securityfs",
 645                           "sysfs",
 646                           "tmpfs",
 647                           "tracefs");
 648 }
 649
 650 bool fstype_is_ro(const char *fstype) {
 651         /* All Linux file systems that are necessarily read-only */
 652         return STR_IN_SET(fstype,
 653                           "DM_verity_hash",
 654                           "iso9660",
 655                           "squashfs");
 656 }
 657
 658 bool fstype_can_discard(const char *fstype) {
 659         return STR_IN_SET(fstype,
 660                           "btrfs",
 661                           "ext4",
 662                           "vfat",
 663                           "xfs");
 664 }
 665
 666 bool fstype_can_uid_gid(const char *fstype) {
 667
 668         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 669          * current and future. */
 670
 671         return STR_IN_SET(fstype,
 672                           "adfs",
 673                           "fat",
 674                           "hfs",
 675                           "hpfs",
 676                           "iso9660",
 677                           "msdos",
 678                           "ntfs",
 679                           "vfat");
 680 }
 681
 682 int repeat_unmount(const char *path, int flags) {
 683         bool done = false;
 684
 685         assert(path);
 686
 687         /* If there are multiple mounts on a mount point, this
 688          * removes them all */
 689
 690         for (;;) {
 691                 if (umount2(path, flags) < 0) {
 692
 693                         if (errno == EINVAL)
 694                                 return done;
 695
 696                         return -errno;
 697                 }
 698
 699                 done = true;
 700         }
 701 }
 702 #endif // 0
 703
 704 const char* mode_to_inaccessible_node(mode_t mode) {
 705         /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during
 706          * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe
 707          * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes
 708          * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead,
 709          * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts
 710          * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */
 711
 712         switch(mode & S_IFMT) {
 713                 case S_IFREG:
 714                         return "/run/systemd/inaccessible/reg";
 715
 716                 case S_IFDIR:
 717                         return "/run/systemd/inaccessible/dir";
 718
 719                 case S_IFCHR:
 720                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 721                                 return "/run/systemd/inaccessible/chr";
 722                         return "/run/systemd/inaccessible/sock";
 723
 724                 case S_IFBLK:
 725                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 726                                 return "/run/systemd/inaccessible/blk";
 727                         return "/run/systemd/inaccessible/sock";
 728
 729                 case S_IFIFO:
 730                         return "/run/systemd/inaccessible/fifo";
 731
 732                 case S_IFSOCK:
 733                         return "/run/systemd/inaccessible/sock";
 734         }
 735         return NULL;
 736 }
 737
 738 #if 0 /// UNNEEDED by elogind
 739 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 740 static char* mount_flags_to_string(long unsigned flags) {
 741         char *x;
 742         _cleanup_free_ char *y = NULL;
 743         long unsigned overflow;
 744
 745         overflow = flags & ~(MS_RDONLY |
 746                              MS_NOSUID |
 747                              MS_NODEV |
 748                              MS_NOEXEC |
 749                              MS_SYNCHRONOUS |
 750                              MS_REMOUNT |
 751                              MS_MANDLOCK |
 752                              MS_DIRSYNC |
 753                              MS_NOATIME |
 754                              MS_NODIRATIME |
 755                              MS_BIND |
 756                              MS_MOVE |
 757                              MS_REC |
 758                              MS_SILENT |
 759                              MS_POSIXACL |
 760                              MS_UNBINDABLE |
 761                              MS_PRIVATE |
 762                              MS_SLAVE |
 763                              MS_SHARED |
 764                              MS_RELATIME |
 765                              MS_KERNMOUNT |
 766                              MS_I_VERSION |
 767                              MS_STRICTATIME |
 768                              MS_LAZYTIME);
 769
 770         if (flags == 0 || overflow != 0)
 771                 if (asprintf(&y, "%lx", overflow) < 0)
 772                         return NULL;
 773
 774         x = strjoin(FLAG(MS_RDONLY),
 775                     FLAG(MS_NOSUID),
 776                     FLAG(MS_NODEV),
 777                     FLAG(MS_NOEXEC),
 778                     FLAG(MS_SYNCHRONOUS),
 779                     FLAG(MS_REMOUNT),
 780                     FLAG(MS_MANDLOCK),
 781                     FLAG(MS_DIRSYNC),
 782                     FLAG(MS_NOATIME),
 783                     FLAG(MS_NODIRATIME),
 784                     FLAG(MS_BIND),
 785                     FLAG(MS_MOVE),
 786                     FLAG(MS_REC),
 787                     FLAG(MS_SILENT),
 788                     FLAG(MS_POSIXACL),
 789                     FLAG(MS_UNBINDABLE),
 790                     FLAG(MS_PRIVATE),
 791                     FLAG(MS_SLAVE),
 792                     FLAG(MS_SHARED),
 793                     FLAG(MS_RELATIME),
 794                     FLAG(MS_KERNMOUNT),
 795                     FLAG(MS_I_VERSION),
 796                     FLAG(MS_STRICTATIME),
 797                     FLAG(MS_LAZYTIME),
 798                     y);
 799         if (!x)
 800                 return NULL;
 801         if (!y)
 802                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 803         return x;
 804 }
 805
 806 int mount_verbose(
 807                 int error_log_level,
 808                 const char *what,
 809                 const char *where,
 810                 const char *type,
 811                 unsigned long flags,
 812                 const char *options) {
 813
 814         _cleanup_free_ char *fl = NULL, *o = NULL;
 815         unsigned long f;
 816         int r;
 817
 818         r = mount_option_mangle(options, flags, &f, &o);
 819         if (r < 0)
 820                 return log_full_errno(error_log_level, r,
 821                                       "Failed to mangle mount options %s: %m",
 822                                       strempty(options));
 823
 824         fl = mount_flags_to_string(f);
 825
 826         if ((f & MS_REMOUNT) && !what && !type)
 827                 log_debug("Remounting %s (%s \"%s\")...",
 828                           where, strnull(fl), strempty(o));
 829         else if (!what && !type)
 830                 log_debug("Mounting %s (%s \"%s\")...",
 831                           where, strnull(fl), strempty(o));
 832         else if ((f & MS_BIND) && !type)
 833                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 834                           what, where, strnull(fl), strempty(o));
 835         else if (f & MS_MOVE)
 836                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 837                           what, where, strnull(fl), strempty(o));
 838         else
 839                 log_debug("Mounting %s on %s (%s \"%s\")...",
 840                           strna(type), where, strnull(fl), strempty(o));
 841         if (mount(what, where, type, f, o) < 0)
 842                 return log_full_errno(error_log_level, errno,
 843                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 844                                       strna(type), where, strnull(fl), strempty(o));
 845         return 0;
 846 }
 847
 848 int umount_verbose(const char *what) {
 849         log_debug("Umounting %s...", what);
 850         if (umount(what) < 0)
 851                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 852         return 0;
 853 }
 854 #endif // 0
 855
 856 const char *mount_propagation_flags_to_string(unsigned long flags) {
 857
 858         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 859         case 0:
 860                 return "";
 861         case MS_SHARED:
 862                 return "shared";
 863         case MS_SLAVE:
 864                 return "slave";
 865         case MS_PRIVATE:
 866                 return "private";
 867         }
 868
 869         return NULL;
 870 }
 871
 872 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 873
 874         if (isempty(name))
 875                 *ret = 0;
 876         else if (streq(name, "shared"))
 877                 *ret = MS_SHARED;
 878         else if (streq(name, "slave"))
 879                 *ret = MS_SLAVE;
 880         else if (streq(name, "private"))
 881                 *ret = MS_PRIVATE;
 882         else
 883                 return -EINVAL;
 884         return 0;
 885 }
 886
 887 #if 0 /// UNNEEDED by elogind
 888 int mount_option_mangle(
 889                 const char *options,
 890                 unsigned long mount_flags,
 891                 unsigned long *ret_mount_flags,
 892                 char **ret_remaining_options) {
 893
 894         const struct libmnt_optmap *map;
 895         _cleanup_free_ char *ret = NULL;
 896         const char *p;
 897         int r;
 898
 899         /* This extracts mount flags from the mount options, and store
 900          * non-mount-flag options to '*ret_remaining_options'.
 901          * E.g.,
 902          * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
 903          * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
 904          * "size=1630748k,mode=700,uid=1000,gid=1000".
 905          * See more examples in test-mount-utils.c.
 906          *
 907          * Note that if 'options' does not contain any non-mount-flag options,
 908          * then '*ret_remaining_options' is set to NULL instread of empty string.
 909          * Note that this does not check validity of options stored in
 910          * '*ret_remaining_options'.
 911          * Note that if 'options' is NULL, then this just copies 'mount_flags'
 912          * to '*ret_mount_flags'. */
 913
 914         assert(ret_mount_flags);
 915         assert(ret_remaining_options);
 916
 917         map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
 918         if (!map)
 919                 return -EINVAL;
 920
 921         p = options;
 922         for (;;) {
 923                 _cleanup_free_ char *word = NULL;
 924                 const struct libmnt_optmap *ent;
 925
 926                 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
 927                 if (r < 0)
 928                         return r;
 929                 if (r == 0)
 930                         break;
 931
 932                 for (ent = map; ent->name; ent++) {
 933                         /* All entries in MNT_LINUX_MAP do not take any argument.
 934                          * Thus, ent->name does not contain "=" or "[=]". */
 935                         if (!streq(word, ent->name))
 936                                 continue;
 937
 938                         if (!(ent->mask & MNT_INVERT))
 939                                 mount_flags |= ent->id;
 940                         else if (mount_flags & ent->id)
 941                                 mount_flags ^= ent->id;
 942
 943                         break;
 944                 }
 945
 946                 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
 947                 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
 948                         return -ENOMEM;
 949         }
 950
 951         *ret_mount_flags = mount_flags;
 952         *ret_remaining_options = TAKE_PTR(ret);
 953
 954         return 0;
 955 }
 956 #endif // 0