src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 //#include <stdio_ext.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <sys/mount.h>
  26 #include <sys/stat.h>
  27 #include <sys/statvfs.h>
  28 #include <unistd.h>
  29
  30 /* Include later */
  31 //#include <libmount.h>
  32
  33 #include "alloc-util.h"
  34 #include "escape.h"
  35 //#include "extract-word.h"
  36 #include "fd-util.h"
  37 #include "fileio.h"
  38 #include "fs-util.h"
  39 #include "hashmap.h"
  40 #include "mount-util.h"
  41 #include "parse-util.h"
  42 #include "path-util.h"
  43 #include "set.h"
  44 #include "stdio-util.h"
  45 #include "string-util.h"
  46 #include "strv.h"
  47
  48 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  49  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  50  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  51  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  52  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  53  * with large file handles anyway. */
  54 #define ORIGINAL_MAX_HANDLE_SZ 128
  55
  56 int name_to_handle_at_loop(
  57                 int fd,
  58                 const char *path,
  59                 struct file_handle **ret_handle,
  60                 int *ret_mnt_id,
  61                 int flags) {
  62
  63         _cleanup_free_ struct file_handle *h = NULL;
  64         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  65
  66         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  67          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  68          * start value, it is not an upper bound on the buffer size required.
  69          *
  70          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  71          * as NULL if there's no interest in either. */
  72
  73         for (;;) {
  74                 int mnt_id = -1;
  75
  76                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  77                 if (!h)
  78                         return -ENOMEM;
  79
  80                 h->handle_bytes = n;
  81
  82                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  83
  84                         if (ret_handle) {
  85                                 *ret_handle = h;
  86                                 h = NULL;
  87                         }
  88
  89                         if (ret_mnt_id)
  90                                 *ret_mnt_id = mnt_id;
  91
  92                         return 0;
  93                 }
  94                 if (errno != EOVERFLOW)
  95                         return -errno;
  96
  97                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  98
  99                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
 100                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
 101                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
 102
 103                         *ret_mnt_id = mnt_id;
 104                         return 0;
 105                 }
 106
 107                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
 108                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
 109                  * buffer. In that case propagate EOVERFLOW */
 110                 if (h->handle_bytes <= n)
 111                         return -EOVERFLOW;
 112
 113                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
 114                 n = h->handle_bytes;
 115                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
 116                         return -EOVERFLOW;
 117
 118                 h = mfree(h);
 119         }
 120 }
 121
 122 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 123         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 124         _cleanup_free_ char *fdinfo = NULL;
 125         _cleanup_close_ int subfd = -1;
 126         char *p;
 127         int r;
 128
 129         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 130                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 131         else {
 132                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 133                 if (subfd < 0)
 134                         return -errno;
 135
 136                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 137         }
 138
 139         r = read_full_file(path, &fdinfo, NULL);
 140         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 141                 return -EOPNOTSUPP;
 142         if (r < 0)
 143                 return r;
 144
 145         p = startswith(fdinfo, "mnt_id:");
 146         if (!p) {
 147                 p = strstr(fdinfo, "\nmnt_id:");
 148                 if (!p) /* The mnt_id field is a relatively new addition */
 149                         return -EOPNOTSUPP;
 150
 151                 p += 8;
 152         }
 153
 154         p += strspn(p, WHITESPACE);
 155         p[strcspn(p, WHITESPACE)] = 0;
 156
 157         return safe_atoi(p, mnt_id);
 158 }
 159
 160 int fd_is_mount_point(int fd, const char *filename, int flags) {
 161         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 162         int mount_id = -1, mount_id_parent = -1;
 163         bool nosupp = false, check_st_dev = true;
 164         struct stat a, b;
 165         int r;
 166
 167         assert(fd >= 0);
 168         assert(filename);
 169
 170         /* First we will try the name_to_handle_at() syscall, which
 171          * tells us the mount id and an opaque file "handle". It is
 172          * not supported everywhere though (kernel compile-time
 173          * option, not all file systems are hooked up). If it works
 174          * the mount id is usually good enough to tell us whether
 175          * something is a mount point.
 176          *
 177          * If that didn't work we will try to read the mount id from
 178          * /proc/self/fdinfo/<fd>. This is almost as good as
 179          * name_to_handle_at(), however, does not return the
 180          * opaque file handle. The opaque file handle is pretty useful
 181          * to detect the root directory, which we should always
 182          * consider a mount point. Hence we use this only as
 183          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 184          * kernel addition.
 185          *
 186          * As last fallback we do traditional fstat() based st_dev
 187          * comparisons. This is how things were traditionally done,
 188          * but unionfs breaks this since it exposes file
 189          * systems with a variety of st_dev reported. Also, btrfs
 190          * subvolumes have different st_dev, even though they aren't
 191          * real mounts of their own. */
 192
 193         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 194         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 195                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 196                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 197                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 198                  * (EINVAL): fall back to simpler logic. */
 199                 goto fallback_fdinfo;
 200         else if (r == -EOPNOTSUPP)
 201                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 202                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 203                  * logic */
 204                 nosupp = true;
 205         else if (r < 0)
 206                 return r;
 207
 208         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 209         if (r == -EOPNOTSUPP) {
 210                 if (nosupp)
 211                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 212                         goto fallback_fdinfo;
 213                 else
 214                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 215                          * it must be a mount point. */
 216                         return 1;
 217         } else if (r < 0)
 218                 return r;
 219
 220         /* The parent can do name_to_handle_at() but the
 221          * directory we are interested in can't? If so, it
 222          * must be a mount point. */
 223         if (nosupp)
 224                 return 1;
 225
 226         /* If the file handle for the directory we are
 227          * interested in and its parent are identical, we
 228          * assume this is the root directory, which is a mount
 229          * point. */
 230
 231         if (h->handle_bytes == h_parent->handle_bytes &&
 232             h->handle_type == h_parent->handle_type &&
 233             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 234                 return 1;
 235
 236         return mount_id != mount_id_parent;
 237
 238 fallback_fdinfo:
 239         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 240         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 241                 goto fallback_fstat;
 242         if (r < 0)
 243                 return r;
 244
 245         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 246         if (r < 0)
 247                 return r;
 248
 249         if (mount_id != mount_id_parent)
 250                 return 1;
 251
 252         /* Hmm, so, the mount ids are the same. This leaves one
 253          * special case though for the root file system. For that,
 254          * let's see if the parent directory has the same inode as we
 255          * are interested in. Hence, let's also do fstat() checks now,
 256          * too, but avoid the st_dev comparisons, since they aren't
 257          * that useful on unionfs mounts. */
 258         check_st_dev = false;
 259
 260 fallback_fstat:
 261         /* yay for fstatat() taking a different set of flags than the other
 262          * _at() above */
 263         if (flags & AT_SYMLINK_FOLLOW)
 264                 flags &= ~AT_SYMLINK_FOLLOW;
 265         else
 266                 flags |= AT_SYMLINK_NOFOLLOW;
 267         if (fstatat(fd, filename, &a, flags) < 0)
 268                 return -errno;
 269
 270         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 271                 return -errno;
 272
 273         /* A directory with same device and inode as its parent? Must
 274          * be the root directory */
 275         if (a.st_dev == b.st_dev &&
 276             a.st_ino == b.st_ino)
 277                 return 1;
 278
 279         return check_st_dev && (a.st_dev != b.st_dev);
 280 }
 281
 282 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 283 int path_is_mount_point(const char *t, const char *root, int flags) {
 284         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 285         _cleanup_close_ int fd = -1;
 286         int r;
 287
 288         assert(t);
 289         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 290
 291         if (path_equal(t, "/"))
 292                 return 1;
 293
 294         /* we need to resolve symlinks manually, we can't just rely on
 295          * fd_is_mount_point() to do that for us; if we have a structure like
 296          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 297          * look at needs to be /usr, not /. */
 298         if (flags & AT_SYMLINK_FOLLOW) {
 299                 r = chase_symlinks(t, root, 0, &canonical);
 300                 if (r < 0)
 301                         return r;
 302
 303                 t = canonical;
 304         }
 305
 306         parent = dirname_malloc(t);
 307         if (!parent)
 308                 return -ENOMEM;
 309
 310         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 311         if (fd < 0)
 312                 return -errno;
 313
 314         return fd_is_mount_point(fd, last_path_component(t), flags);
 315 }
 316
 317 int path_get_mnt_id(const char *path, int *ret) {
 318         int r;
 319
 320         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 321         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 322                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 323
 324         return r;
 325 }
 326
 327 #if 0 /// UNNEEDED by elogind
 328 int umount_recursive(const char *prefix, int flags) {
 329         bool again;
 330         int n = 0, r;
 331
 332         /* Try to umount everything recursively below a
 333          * directory. Also, take care of stacked mounts, and keep
 334          * unmounting them until they are gone. */
 335
 336         do {
 337                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 338
 339                 again = false;
 340                 r = 0;
 341
 342                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 343                 if (!proc_self_mountinfo)
 344                         return -errno;
 345
 346                 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 347
 348                 for (;;) {
 349                         _cleanup_free_ char *path = NULL, *p = NULL;
 350                         int k;
 351
 352                         k = fscanf(proc_self_mountinfo,
 353                                    "%*s "       /* (1) mount id */
 354                                    "%*s "       /* (2) parent id */
 355                                    "%*s "       /* (3) major:minor */
 356                                    "%*s "       /* (4) root */
 357                                    "%ms "       /* (5) mount point */
 358                                    "%*s"        /* (6) mount options */
 359                                    "%*[^-]"     /* (7) optional fields */
 360                                    "- "         /* (8) separator */
 361                                    "%*s "       /* (9) file system type */
 362                                    "%*s"        /* (10) mount source */
 363                                    "%*s"        /* (11) mount options 2 */
 364                                    "%*[^\n]",   /* some rubbish at the end */
 365                                    &path);
 366                         if (k != 1) {
 367                                 if (k == EOF)
 368                                         break;
 369
 370                                 continue;
 371                         }
 372
 373                         r = cunescape(path, UNESCAPE_RELAX, &p);
 374                         if (r < 0)
 375                                 return r;
 376
 377                         if (!path_startswith(p, prefix))
 378                                 continue;
 379
 380                         if (umount2(p, flags) < 0) {
 381                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 382                                 continue;
 383                         }
 384
 385                         log_debug("Successfully unmounted %s", p);
 386
 387                         again = true;
 388                         n++;
 389
 390                         break;
 391                 }
 392
 393         } while (again);
 394
 395         return r ? r : n;
 396 }
 397
 398 static int get_mount_flags(const char *path, unsigned long *flags) {
 399         struct statvfs buf;
 400
 401         if (statvfs(path, &buf) < 0)
 402                 return -errno;
 403         *flags = buf.f_flag;
 404         return 0;
 405 }
 406
 407 /* Use this function only if do you have direct access to /proc/self/mountinfo
 408  * and need the caller to open it for you. This is the case when /proc is
 409  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 410 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 411         _cleanup_set_free_free_ Set *done = NULL;
 412         _cleanup_free_ char *cleaned = NULL;
 413         int r;
 414
 415         assert(proc_self_mountinfo);
 416
 417         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 418          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 419          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 420          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 421          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 422          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 423          * future submounts that have been triggered via autofs.
 424          *
 425          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 426          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 427
 428         cleaned = strdup(prefix);
 429         if (!cleaned)
 430                 return -ENOMEM;
 431
 432         path_kill_slashes(cleaned);
 433
 434         done = set_new(&path_hash_ops);
 435         if (!done)
 436                 return -ENOMEM;
 437
 438         for (;;) {
 439                 _cleanup_set_free_free_ Set *todo = NULL;
 440                 bool top_autofs = false;
 441                 char *x;
 442                 unsigned long orig_flags;
 443
 444                 todo = set_new(&path_hash_ops);
 445                 if (!todo)
 446                         return -ENOMEM;
 447
 448                 rewind(proc_self_mountinfo);
 449
 450                 for (;;) {
 451                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 452                         int k;
 453
 454                         k = fscanf(proc_self_mountinfo,
 455                                    "%*s "       /* (1) mount id */
 456                                    "%*s "       /* (2) parent id */
 457                                    "%*s "       /* (3) major:minor */
 458                                    "%*s "       /* (4) root */
 459                                    "%ms "       /* (5) mount point */
 460                                    "%*s"        /* (6) mount options (superblock) */
 461                                    "%*[^-]"     /* (7) optional fields */
 462                                    "- "         /* (8) separator */
 463                                    "%ms "       /* (9) file system type */
 464                                    "%*s"        /* (10) mount source */
 465                                    "%*s"        /* (11) mount options (bind mount) */
 466                                    "%*[^\n]",   /* some rubbish at the end */
 467                                    &path,
 468                                    &type);
 469                         if (k != 2) {
 470                                 if (k == EOF)
 471                                         break;
 472
 473                                 continue;
 474                         }
 475
 476                         r = cunescape(path, UNESCAPE_RELAX, &p);
 477                         if (r < 0)
 478                                 return r;
 479
 480                         if (!path_startswith(p, cleaned))
 481                                 continue;
 482
 483                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 484                          * operate on. */
 485                         if (!path_equal(cleaned, p)) {
 486                                 bool blacklisted = false;
 487                                 char **i;
 488
 489                                 STRV_FOREACH(i, blacklist) {
 490
 491                                         if (path_equal(*i, cleaned))
 492                                                 continue;
 493
 494                                         if (!path_startswith(*i, cleaned))
 495                                                 continue;
 496
 497                                         if (path_startswith(p, *i)) {
 498                                                 blacklisted = true;
 499                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 500                                                 break;
 501                                         }
 502                                 }
 503                                 if (blacklisted)
 504                                         continue;
 505                         }
 506
 507                         /* Let's ignore autofs mounts.  If they aren't
 508                          * triggered yet, we want to avoid triggering
 509                          * them, as we don't make any guarantees for
 510                          * future submounts anyway.  If they are
 511                          * already triggered, then we will find
 512                          * another entry for this. */
 513                         if (streq(type, "autofs")) {
 514                                 top_autofs = top_autofs || path_equal(cleaned, p);
 515                                 continue;
 516                         }
 517
 518                         if (!set_contains(done, p)) {
 519                                 r = set_consume(todo, p);
 520                                 p = NULL;
 521                                 if (r == -EEXIST)
 522                                         continue;
 523                                 if (r < 0)
 524                                         return r;
 525                         }
 526                 }
 527
 528                 /* If we have no submounts to process anymore and if
 529                  * the root is either already done, or an autofs, we
 530                  * are done */
 531                 if (set_isempty(todo) &&
 532                     (top_autofs || set_contains(done, cleaned)))
 533                         return 0;
 534
 535                 if (!set_contains(done, cleaned) &&
 536                     !set_contains(todo, cleaned)) {
 537                         /* The prefix directory itself is not yet a mount, make it one. */
 538                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 539                                 return -errno;
 540
 541                         orig_flags = 0;
 542                         (void) get_mount_flags(cleaned, &orig_flags);
 543                         orig_flags &= ~MS_RDONLY;
 544
 545                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 546                                 return -errno;
 547
 548                         log_debug("Made top-level directory %s a mount point.", prefix);
 549
 550                         x = strdup(cleaned);
 551                         if (!x)
 552                                 return -ENOMEM;
 553
 554                         r = set_consume(done, x);
 555                         if (r < 0)
 556                                 return r;
 557                 }
 558
 559                 while ((x = set_steal_first(todo))) {
 560
 561                         r = set_consume(done, x);
 562                         if (IN_SET(r, 0, -EEXIST))
 563                                 continue;
 564                         if (r < 0)
 565                                 return r;
 566
 567                         /* Deal with mount points that are obstructed by a later mount */
 568                         r = path_is_mount_point(x, NULL, 0);
 569                         if (IN_SET(r, 0, -ENOENT))
 570                                 continue;
 571                         if (r < 0)
 572                                 return r;
 573
 574                         /* Try to reuse the original flag set */
 575                         orig_flags = 0;
 576                         (void) get_mount_flags(x, &orig_flags);
 577                         orig_flags &= ~MS_RDONLY;
 578
 579                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 580                                 return -errno;
 581
 582                         log_debug("Remounted %s read-only.", x);
 583                 }
 584         }
 585 }
 586
 587 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 588         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 589
 590         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 591         if (!proc_self_mountinfo)
 592                 return -errno;
 593
 594         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 595
 596         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 597 }
 598
 599 int mount_move_root(const char *path) {
 600         assert(path);
 601
 602         if (chdir(path) < 0)
 603                 return -errno;
 604
 605         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 606                 return -errno;
 607
 608         if (chroot(".") < 0)
 609                 return -errno;
 610
 611         if (chdir("/") < 0)
 612                 return -errno;
 613
 614         return 0;
 615 }
 616
 617 bool fstype_is_network(const char *fstype) {
 618         const char *x;
 619
 620         x = startswith(fstype, "fuse.");
 621         if (x)
 622                 fstype = x;
 623
 624         return STR_IN_SET(fstype,
 625                           "afs",
 626                           "cifs",
 627                           "smbfs",
 628                           "sshfs",
 629                           "ncpfs",
 630                           "ncp",
 631                           "nfs",
 632                           "nfs4",
 633                           "gfs",
 634                           "gfs2",
 635                           "glusterfs",
 636                           "pvfs2", /* OrangeFS */
 637                           "ocfs2",
 638                           "lustre");
 639 }
 640
 641 bool fstype_is_api_vfs(const char *fstype) {
 642         return STR_IN_SET(fstype,
 643                           "autofs",
 644                           "bpf",
 645                           "cgroup",
 646                           "cgroup2",
 647                           "configfs",
 648                           "cpuset",
 649                           "debugfs",
 650                           "devpts",
 651                           "devtmpfs",
 652                           "efivarfs",
 653                           "fusectl",
 654                           "hugetlbfs",
 655                           "mqueue",
 656                           "proc",
 657                           "pstore",
 658                           "ramfs",
 659                           "securityfs",
 660                           "sysfs",
 661                           "tmpfs",
 662                           "tracefs");
 663 }
 664
 665 bool fstype_is_ro(const char *fstype) {
 666         /* All Linux file systems that are necessarily read-only */
 667         return STR_IN_SET(fstype,
 668                           "DM_verity_hash",
 669                           "iso9660",
 670                           "squashfs");
 671 }
 672
 673 bool fstype_can_discard(const char *fstype) {
 674         return STR_IN_SET(fstype,
 675                           "btrfs",
 676                           "ext4",
 677                           "vfat",
 678                           "xfs");
 679 }
 680
 681 bool fstype_can_uid_gid(const char *fstype) {
 682
 683         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 684          * current and future. */
 685
 686         return STR_IN_SET(fstype,
 687                           "adfs",
 688                           "fat",
 689                           "hfs",
 690                           "hpfs",
 691                           "iso9660",
 692                           "msdos",
 693                           "ntfs",
 694                           "vfat");
 695 }
 696
 697 int repeat_unmount(const char *path, int flags) {
 698         bool done = false;
 699
 700         assert(path);
 701
 702         /* If there are multiple mounts on a mount point, this
 703          * removes them all */
 704
 705         for (;;) {
 706                 if (umount2(path, flags) < 0) {
 707
 708                         if (errno == EINVAL)
 709                                 return done;
 710
 711                         return -errno;
 712                 }
 713
 714                 done = true;
 715         }
 716 }
 717 #endif // 0
 718
 719 const char* mode_to_inaccessible_node(mode_t mode) {
 720         /* This function maps a node type to the correspondent inaccessible node type.
 721          * Character and block inaccessible devices may not be created (because major=0 and minor=0),
 722          * in such case we map character and block devices to the inaccessible node type socket. */
 723         switch(mode & S_IFMT) {
 724                 case S_IFREG:
 725                         return "/run/systemd/inaccessible/reg";
 726                 case S_IFDIR:
 727                         return "/run/systemd/inaccessible/dir";
 728                 case S_IFCHR:
 729                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 730                                 return "/run/systemd/inaccessible/chr";
 731                         return "/run/systemd/inaccessible/sock";
 732                 case S_IFBLK:
 733                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 734                                 return "/run/systemd/inaccessible/blk";
 735                         return "/run/systemd/inaccessible/sock";
 736                 case S_IFIFO:
 737                         return "/run/systemd/inaccessible/fifo";
 738                 case S_IFSOCK:
 739                         return "/run/systemd/inaccessible/sock";
 740         }
 741         return NULL;
 742 }
 743
 744 #if 0 /// UNNEEDED by elogind
 745 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 746 static char* mount_flags_to_string(long unsigned flags) {
 747         char *x;
 748         _cleanup_free_ char *y = NULL;
 749         long unsigned overflow;
 750
 751         overflow = flags & ~(MS_RDONLY |
 752                              MS_NOSUID |
 753                              MS_NODEV |
 754                              MS_NOEXEC |
 755                              MS_SYNCHRONOUS |
 756                              MS_REMOUNT |
 757                              MS_MANDLOCK |
 758                              MS_DIRSYNC |
 759                              MS_NOATIME |
 760                              MS_NODIRATIME |
 761                              MS_BIND |
 762                              MS_MOVE |
 763                              MS_REC |
 764                              MS_SILENT |
 765                              MS_POSIXACL |
 766                              MS_UNBINDABLE |
 767                              MS_PRIVATE |
 768                              MS_SLAVE |
 769                              MS_SHARED |
 770                              MS_RELATIME |
 771                              MS_KERNMOUNT |
 772                              MS_I_VERSION |
 773                              MS_STRICTATIME |
 774                              MS_LAZYTIME);
 775
 776         if (flags == 0 || overflow != 0)
 777                 if (asprintf(&y, "%lx", overflow) < 0)
 778                         return NULL;
 779
 780         x = strjoin(FLAG(MS_RDONLY),
 781                     FLAG(MS_NOSUID),
 782                     FLAG(MS_NODEV),
 783                     FLAG(MS_NOEXEC),
 784                     FLAG(MS_SYNCHRONOUS),
 785                     FLAG(MS_REMOUNT),
 786                     FLAG(MS_MANDLOCK),
 787                     FLAG(MS_DIRSYNC),
 788                     FLAG(MS_NOATIME),
 789                     FLAG(MS_NODIRATIME),
 790                     FLAG(MS_BIND),
 791                     FLAG(MS_MOVE),
 792                     FLAG(MS_REC),
 793                     FLAG(MS_SILENT),
 794                     FLAG(MS_POSIXACL),
 795                     FLAG(MS_UNBINDABLE),
 796                     FLAG(MS_PRIVATE),
 797                     FLAG(MS_SLAVE),
 798                     FLAG(MS_SHARED),
 799                     FLAG(MS_RELATIME),
 800                     FLAG(MS_KERNMOUNT),
 801                     FLAG(MS_I_VERSION),
 802                     FLAG(MS_STRICTATIME),
 803                     FLAG(MS_LAZYTIME),
 804                     y);
 805         if (!x)
 806                 return NULL;
 807         if (!y)
 808                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 809         return x;
 810 }
 811
 812 int mount_verbose(
 813                 int error_log_level,
 814                 const char *what,
 815                 const char *where,
 816                 const char *type,
 817                 unsigned long flags,
 818                 const char *options) {
 819
 820         _cleanup_free_ char *fl = NULL, *o = NULL;
 821         unsigned long f;
 822         int r;
 823
 824         r = mount_option_mangle(options, flags, &f, &o);
 825         if (r < 0)
 826                 return log_full_errno(error_log_level, r,
 827                                       "Failed to mangle mount options %s: %m",
 828                                       strempty(options));
 829
 830         fl = mount_flags_to_string(f);
 831
 832         if ((f & MS_REMOUNT) && !what && !type)
 833                 log_debug("Remounting %s (%s \"%s\")...",
 834                           where, strnull(fl), strempty(o));
 835         else if (!what && !type)
 836                 log_debug("Mounting %s (%s \"%s\")...",
 837                           where, strnull(fl), strempty(o));
 838         else if ((f & MS_BIND) && !type)
 839                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 840                           what, where, strnull(fl), strempty(o));
 841         else if (f & MS_MOVE)
 842                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 843                           what, where, strnull(fl), strempty(o));
 844         else
 845                 log_debug("Mounting %s on %s (%s \"%s\")...",
 846                           strna(type), where, strnull(fl), strempty(o));
 847         if (mount(what, where, type, f, o) < 0)
 848                 return log_full_errno(error_log_level, errno,
 849                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 850                                       strna(type), where, strnull(fl), strempty(o));
 851         return 0;
 852 }
 853
 854 int umount_verbose(const char *what) {
 855         log_debug("Umounting %s...", what);
 856         if (umount(what) < 0)
 857                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 858         return 0;
 859 }
 860 #endif // 0
 861
 862 const char *mount_propagation_flags_to_string(unsigned long flags) {
 863
 864         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 865         case 0:
 866                 return "";
 867         case MS_SHARED:
 868                 return "shared";
 869         case MS_SLAVE:
 870                 return "slave";
 871         case MS_PRIVATE:
 872                 return "private";
 873         }
 874
 875         return NULL;
 876 }
 877
 878
 879 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 880
 881         if (isempty(name))
 882                 *ret = 0;
 883         else if (streq(name, "shared"))
 884                 *ret = MS_SHARED;
 885         else if (streq(name, "slave"))
 886                 *ret = MS_SLAVE;
 887         else if (streq(name, "private"))
 888                 *ret = MS_PRIVATE;
 889         else
 890                 return -EINVAL;
 891         return 0;
 892 }
 893
 894 int mount_option_mangle(
 895                 const char *options,
 896                 unsigned long mount_flags,
 897                 unsigned long *ret_mount_flags,
 898                 char **ret_remaining_options) {
 899
 900         const struct libmnt_optmap *map;
 901         _cleanup_free_ char *ret = NULL;
 902         const char *p;
 903         int r;
 904
 905         /* This extracts mount flags from the mount options, and store
 906          * non-mount-flag options to '*ret_remaining_options'.
 907          * E.g.,
 908          * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
 909          * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
 910          * "size=1630748k,mode=700,uid=1000,gid=1000".
 911          * See more examples in test-mount-utils.c.
 912          *
 913          * Note that if 'options' does not contain any non-mount-flag options,
 914          * then '*ret_remaining_options' is set to NULL instread of empty string.
 915          * Note that this does not check validity of options stored in
 916          * '*ret_remaining_options'.
 917          * Note that if 'options' is NULL, then this just copies 'mount_flags'
 918          * to '*ret_mount_flags'. */
 919
 920         assert(ret_mount_flags);
 921         assert(ret_remaining_options);
 922
 923         map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
 924         if (!map)
 925                 return -EINVAL;
 926
 927         p = options;
 928         for (;;) {
 929                 _cleanup_free_ char *word = NULL;
 930                 const struct libmnt_optmap *ent;
 931
 932                 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
 933                 if (r < 0)
 934                         return r;
 935                 if (r == 0)
 936                         break;
 937
 938                 for (ent = map; ent->name; ent++) {
 939                         /* All entries in MNT_LINUX_MAP do not take any argument.
 940                          * Thus, ent->name does not contain "=" or "[=]". */
 941                         if (!streq(word, ent->name))
 942                                 continue;
 943
 944                         if (!(ent->mask & MNT_INVERT))
 945                                 mount_flags |= ent->id;
 946                         else if (mount_flags & ent->id)
 947                                 mount_flags ^= ent->id;
 948
 949                         break;
 950                 }
 951
 952                 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
 953                 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
 954                         return -ENOMEM;
 955         }
 956
 957         *ret_mount_flags = mount_flags;
 958         *ret_remaining_options = ret;
 959         ret = NULL;
 960
 961         return 0;
 962 }