src/basic/mount-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 //#include <stdio_ext.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <sys/mount.h>
  26 #include <sys/stat.h>
  27 #include <sys/statvfs.h>
  28 #include <unistd.h>
  29
  30 /* Include later */
  31 //#include <libmount.h>
  32
  33 #include "alloc-util.h"
  34 #include "escape.h"
  35 //#include "extract-word.h"
  36 #include "fd-util.h"
  37 #include "fileio.h"
  38 #include "fs-util.h"
  39 #include "hashmap.h"
  40 #include "mount-util.h"
  41 #include "parse-util.h"
  42 #include "path-util.h"
  43 #include "set.h"
  44 #include "stdio-util.h"
  45 #include "string-util.h"
  46 #include "strv.h"
  47
  48 /* This is the original MAX_HANDLE_SZ definition from the kernel, when the API was introduced. We use that in place of
  49  * any more currently defined value to future-proof things: if the size is increased in the API headers, and our code
  50  * is recompiled then it would cease working on old kernels, as those refuse any sizes larger than this value with
  51  * EINVAL right-away. Hence, let's disconnect ourselves from any such API changes, and stick to the original definition
  52  * from when it was introduced. We use it as a start value only anyway (see below), and hence should be able to deal
  53  * with large file handles anyway. */
  54 #define ORIGINAL_MAX_HANDLE_SZ 128
  55
  56 int name_to_handle_at_loop(
  57                 int fd,
  58                 const char *path,
  59                 struct file_handle **ret_handle,
  60                 int *ret_mnt_id,
  61                 int flags) {
  62
  63         _cleanup_free_ struct file_handle *h = NULL;
  64         size_t n = ORIGINAL_MAX_HANDLE_SZ;
  65
  66         /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
  67          * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
  68          * start value, it is not an upper bound on the buffer size required.
  69          *
  70          * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
  71          * as NULL if there's no interest in either. */
  72
  73         for (;;) {
  74                 int mnt_id = -1;
  75
  76                 h = malloc0(offsetof(struct file_handle, f_handle) + n);
  77                 if (!h)
  78                         return -ENOMEM;
  79
  80                 h->handle_bytes = n;
  81
  82                 if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
  83
  84                         if (ret_handle)
  85                                 *ret_handle = TAKE_PTR(h);
  86
  87                         if (ret_mnt_id)
  88                                 *ret_mnt_id = mnt_id;
  89
  90                         return 0;
  91                 }
  92                 if (errno != EOVERFLOW)
  93                         return -errno;
  94
  95                 if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
  96
  97                         /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW when the
  98                          * buffer is too small, but that's undocumented. Hence, let's make use of this if it appears to
  99                          * be filled in, and the caller was interested in only the mount ID an nothing else. */
 100
 101                         *ret_mnt_id = mnt_id;
 102                         return 0;
 103                 }
 104
 105                 /* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
 106                  * else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
 107                  * buffer. In that case propagate EOVERFLOW */
 108                 if (h->handle_bytes <= n)
 109                         return -EOVERFLOW;
 110
 111                 /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned. */
 112                 n = h->handle_bytes;
 113                 if (offsetof(struct file_handle, f_handle) + n < n) /* check for addition overflow */
 114                         return -EOVERFLOW;
 115
 116                 h = mfree(h);
 117         }
 118 }
 119
 120 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
 121         char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
 122         _cleanup_free_ char *fdinfo = NULL;
 123         _cleanup_close_ int subfd = -1;
 124         char *p;
 125         int r;
 126
 127         if ((flags & AT_EMPTY_PATH) && isempty(filename))
 128                 xsprintf(path, "/proc/self/fdinfo/%i", fd);
 129         else {
 130                 subfd = openat(fd, filename, O_CLOEXEC|O_PATH);
 131                 if (subfd < 0)
 132                         return -errno;
 133
 134                 xsprintf(path, "/proc/self/fdinfo/%i", subfd);
 135         }
 136
 137         r = read_full_file(path, &fdinfo, NULL);
 138         if (r == -ENOENT) /* The fdinfo directory is a relatively new addition */
 139                 return -EOPNOTSUPP;
 140         if (r < 0)
 141                 return r;
 142
 143         p = startswith(fdinfo, "mnt_id:");
 144         if (!p) {
 145                 p = strstr(fdinfo, "\nmnt_id:");
 146                 if (!p) /* The mnt_id field is a relatively new addition */
 147                         return -EOPNOTSUPP;
 148
 149                 p += 8;
 150         }
 151
 152         p += strspn(p, WHITESPACE);
 153         p[strcspn(p, WHITESPACE)] = 0;
 154
 155         return safe_atoi(p, mnt_id);
 156 }
 157
 158 int fd_is_mount_point(int fd, const char *filename, int flags) {
 159         _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
 160         int mount_id = -1, mount_id_parent = -1;
 161         bool nosupp = false, check_st_dev = true;
 162         struct stat a, b;
 163         int r;
 164
 165         assert(fd >= 0);
 166         assert(filename);
 167
 168         /* First we will try the name_to_handle_at() syscall, which
 169          * tells us the mount id and an opaque file "handle". It is
 170          * not supported everywhere though (kernel compile-time
 171          * option, not all file systems are hooked up). If it works
 172          * the mount id is usually good enough to tell us whether
 173          * something is a mount point.
 174          *
 175          * If that didn't work we will try to read the mount id from
 176          * /proc/self/fdinfo/<fd>. This is almost as good as
 177          * name_to_handle_at(), however, does not return the
 178          * opaque file handle. The opaque file handle is pretty useful
 179          * to detect the root directory, which we should always
 180          * consider a mount point. Hence we use this only as
 181          * fallback. Exporting the mnt_id in fdinfo is a pretty recent
 182          * kernel addition.
 183          *
 184          * As last fallback we do traditional fstat() based st_dev
 185          * comparisons. This is how things were traditionally done,
 186          * but unionfs breaks this since it exposes file
 187          * systems with a variety of st_dev reported. Also, btrfs
 188          * subvolumes have different st_dev, even though they aren't
 189          * real mounts of their own. */
 190
 191         r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
 192         if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
 193                 /* This kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
 194                  * (EACCES/EPERM; maybe through seccomp, because we are running inside of a container?), or the mount
 195                  * point is not triggered yet (EOVERFLOW, think nfs4), or some general name_to_handle_at() flakiness
 196                  * (EINVAL): fall back to simpler logic. */
 197                 goto fallback_fdinfo;
 198         else if (r == -EOPNOTSUPP)
 199                 /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
 200                  * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
 201                  * logic */
 202                 nosupp = true;
 203         else if (r < 0)
 204                 return r;
 205
 206         r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
 207         if (r == -EOPNOTSUPP) {
 208                 if (nosupp)
 209                         /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
 210                         goto fallback_fdinfo;
 211                 else
 212                         /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
 213                          * it must be a mount point. */
 214                         return 1;
 215         } else if (r < 0)
 216                 return r;
 217
 218         /* The parent can do name_to_handle_at() but the
 219          * directory we are interested in can't? If so, it
 220          * must be a mount point. */
 221         if (nosupp)
 222                 return 1;
 223
 224         /* If the file handle for the directory we are
 225          * interested in and its parent are identical, we
 226          * assume this is the root directory, which is a mount
 227          * point. */
 228
 229         if (h->handle_bytes == h_parent->handle_bytes &&
 230             h->handle_type == h_parent->handle_type &&
 231             memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
 232                 return 1;
 233
 234         return mount_id != mount_id_parent;
 235
 236 fallback_fdinfo:
 237         r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
 238         if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM))
 239                 goto fallback_fstat;
 240         if (r < 0)
 241                 return r;
 242
 243         r = fd_fdinfo_mnt_id(fd, "", AT_EMPTY_PATH, &mount_id_parent);
 244         if (r < 0)
 245                 return r;
 246
 247         if (mount_id != mount_id_parent)
 248                 return 1;
 249
 250         /* Hmm, so, the mount ids are the same. This leaves one
 251          * special case though for the root file system. For that,
 252          * let's see if the parent directory has the same inode as we
 253          * are interested in. Hence, let's also do fstat() checks now,
 254          * too, but avoid the st_dev comparisons, since they aren't
 255          * that useful on unionfs mounts. */
 256         check_st_dev = false;
 257
 258 fallback_fstat:
 259         /* yay for fstatat() taking a different set of flags than the other
 260          * _at() above */
 261         if (flags & AT_SYMLINK_FOLLOW)
 262                 flags &= ~AT_SYMLINK_FOLLOW;
 263         else
 264                 flags |= AT_SYMLINK_NOFOLLOW;
 265         if (fstatat(fd, filename, &a, flags) < 0)
 266                 return -errno;
 267
 268         if (fstatat(fd, "", &b, AT_EMPTY_PATH) < 0)
 269                 return -errno;
 270
 271         /* A directory with same device and inode as its parent? Must
 272          * be the root directory */
 273         if (a.st_dev == b.st_dev &&
 274             a.st_ino == b.st_ino)
 275                 return 1;
 276
 277         return check_st_dev && (a.st_dev != b.st_dev);
 278 }
 279
 280 /* flags can be AT_SYMLINK_FOLLOW or 0 */
 281 int path_is_mount_point(const char *t, const char *root, int flags) {
 282         _cleanup_free_ char *canonical = NULL, *parent = NULL;
 283         _cleanup_close_ int fd = -1;
 284         int r;
 285
 286         assert(t);
 287         assert((flags & ~AT_SYMLINK_FOLLOW) == 0);
 288
 289         if (path_equal(t, "/"))
 290                 return 1;
 291
 292         /* we need to resolve symlinks manually, we can't just rely on
 293          * fd_is_mount_point() to do that for us; if we have a structure like
 294          * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
 295          * look at needs to be /usr, not /. */
 296         if (flags & AT_SYMLINK_FOLLOW) {
 297                 r = chase_symlinks(t, root, 0, &canonical);
 298                 if (r < 0)
 299                         return r;
 300
 301                 t = canonical;
 302         }
 303
 304         parent = dirname_malloc(t);
 305         if (!parent)
 306                 return -ENOMEM;
 307
 308         fd = openat(AT_FDCWD, parent, O_DIRECTORY|O_CLOEXEC|O_PATH);
 309         if (fd < 0)
 310                 return -errno;
 311
 312         return fd_is_mount_point(fd, last_path_component(t), flags);
 313 }
 314
 315 int path_get_mnt_id(const char *path, int *ret) {
 316         int r;
 317
 318         r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
 319         if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
 320                 return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
 321
 322         return r;
 323 }
 324
 325 #if 0 /// UNNEEDED by elogind
 326 int umount_recursive(const char *prefix, int flags) {
 327         bool again;
 328         int n = 0, r;
 329
 330         /* Try to umount everything recursively below a
 331          * directory. Also, take care of stacked mounts, and keep
 332          * unmounting them until they are gone. */
 333
 334         do {
 335                 _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 336
 337                 again = false;
 338                 r = 0;
 339
 340                 proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 341                 if (!proc_self_mountinfo)
 342                         return -errno;
 343
 344                 (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 345
 346                 for (;;) {
 347                         _cleanup_free_ char *path = NULL, *p = NULL;
 348                         int k;
 349
 350                         k = fscanf(proc_self_mountinfo,
 351                                    "%*s "       /* (1) mount id */
 352                                    "%*s "       /* (2) parent id */
 353                                    "%*s "       /* (3) major:minor */
 354                                    "%*s "       /* (4) root */
 355                                    "%ms "       /* (5) mount point */
 356                                    "%*s"        /* (6) mount options */
 357                                    "%*[^-]"     /* (7) optional fields */
 358                                    "- "         /* (8) separator */
 359                                    "%*s "       /* (9) file system type */
 360                                    "%*s"        /* (10) mount source */
 361                                    "%*s"        /* (11) mount options 2 */
 362                                    "%*[^\n]",   /* some rubbish at the end */
 363                                    &path);
 364                         if (k != 1) {
 365                                 if (k == EOF)
 366                                         break;
 367
 368                                 continue;
 369                         }
 370
 371                         r = cunescape(path, UNESCAPE_RELAX, &p);
 372                         if (r < 0)
 373                                 return r;
 374
 375                         if (!path_startswith(p, prefix))
 376                                 continue;
 377
 378                         if (umount2(p, flags) < 0) {
 379                                 r = log_debug_errno(errno, "Failed to umount %s: %m", p);
 380                                 continue;
 381                         }
 382
 383                         log_debug("Successfully unmounted %s", p);
 384
 385                         again = true;
 386                         n++;
 387
 388                         break;
 389                 }
 390
 391         } while (again);
 392
 393         return r ? r : n;
 394 }
 395
 396 static int get_mount_flags(const char *path, unsigned long *flags) {
 397         struct statvfs buf;
 398
 399         if (statvfs(path, &buf) < 0)
 400                 return -errno;
 401         *flags = buf.f_flag;
 402         return 0;
 403 }
 404
 405 /* Use this function only if do you have direct access to /proc/self/mountinfo
 406  * and need the caller to open it for you. This is the case when /proc is
 407  * masked or not mounted. Otherwise, use bind_remount_recursive. */
 408 int bind_remount_recursive_with_mountinfo(const char *prefix, bool ro, char **blacklist, FILE *proc_self_mountinfo) {
 409         _cleanup_set_free_free_ Set *done = NULL;
 410         _cleanup_free_ char *cleaned = NULL;
 411         int r;
 412
 413         assert(proc_self_mountinfo);
 414
 415         /* Recursively remount a directory (and all its submounts) read-only or read-write. If the directory is already
 416          * mounted, we reuse the mount and simply mark it MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
 417          * operation). If it isn't we first make it one. Afterwards we apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to
 418          * all submounts we can access, too. When mounts are stacked on the same mount point we only care for each
 419          * individual "top-level" mount on each point, as we cannot influence/access the underlying mounts anyway. We
 420          * do not have any effect on future submounts that might get propagated, they migt be writable. This includes
 421          * future submounts that have been triggered via autofs.
 422          *
 423          * If the "blacklist" parameter is specified it may contain a list of subtrees to exclude from the
 424          * remount operation. Note that we'll ignore the blacklist for the top-level path. */
 425
 426         cleaned = strdup(prefix);
 427         if (!cleaned)
 428                 return -ENOMEM;
 429
 430         path_kill_slashes(cleaned);
 431
 432         done = set_new(&path_hash_ops);
 433         if (!done)
 434                 return -ENOMEM;
 435
 436         for (;;) {
 437                 _cleanup_set_free_free_ Set *todo = NULL;
 438                 bool top_autofs = false;
 439                 char *x;
 440                 unsigned long orig_flags;
 441
 442                 todo = set_new(&path_hash_ops);
 443                 if (!todo)
 444                         return -ENOMEM;
 445
 446                 rewind(proc_self_mountinfo);
 447
 448                 for (;;) {
 449                         _cleanup_free_ char *path = NULL, *p = NULL, *type = NULL;
 450                         int k;
 451
 452                         k = fscanf(proc_self_mountinfo,
 453                                    "%*s "       /* (1) mount id */
 454                                    "%*s "       /* (2) parent id */
 455                                    "%*s "       /* (3) major:minor */
 456                                    "%*s "       /* (4) root */
 457                                    "%ms "       /* (5) mount point */
 458                                    "%*s"        /* (6) mount options (superblock) */
 459                                    "%*[^-]"     /* (7) optional fields */
 460                                    "- "         /* (8) separator */
 461                                    "%ms "       /* (9) file system type */
 462                                    "%*s"        /* (10) mount source */
 463                                    "%*s"        /* (11) mount options (bind mount) */
 464                                    "%*[^\n]",   /* some rubbish at the end */
 465                                    &path,
 466                                    &type);
 467                         if (k != 2) {
 468                                 if (k == EOF)
 469                                         break;
 470
 471                                 continue;
 472                         }
 473
 474                         r = cunescape(path, UNESCAPE_RELAX, &p);
 475                         if (r < 0)
 476                                 return r;
 477
 478                         if (!path_startswith(p, cleaned))
 479                                 continue;
 480
 481                         /* Ignore this mount if it is blacklisted, but only if it isn't the top-level mount we shall
 482                          * operate on. */
 483                         if (!path_equal(cleaned, p)) {
 484                                 bool blacklisted = false;
 485                                 char **i;
 486
 487                                 STRV_FOREACH(i, blacklist) {
 488
 489                                         if (path_equal(*i, cleaned))
 490                                                 continue;
 491
 492                                         if (!path_startswith(*i, cleaned))
 493                                                 continue;
 494
 495                                         if (path_startswith(p, *i)) {
 496                                                 blacklisted = true;
 497                                                 log_debug("Not remounting %s, because blacklisted by %s, called for %s", p, *i, cleaned);
 498                                                 break;
 499                                         }
 500                                 }
 501                                 if (blacklisted)
 502                                         continue;
 503                         }
 504
 505                         /* Let's ignore autofs mounts.  If they aren't
 506                          * triggered yet, we want to avoid triggering
 507                          * them, as we don't make any guarantees for
 508                          * future submounts anyway.  If they are
 509                          * already triggered, then we will find
 510                          * another entry for this. */
 511                         if (streq(type, "autofs")) {
 512                                 top_autofs = top_autofs || path_equal(cleaned, p);
 513                                 continue;
 514                         }
 515
 516                         if (!set_contains(done, p)) {
 517                                 r = set_consume(todo, p);
 518                                 p = NULL;
 519                                 if (r == -EEXIST)
 520                                         continue;
 521                                 if (r < 0)
 522                                         return r;
 523                         }
 524                 }
 525
 526                 /* If we have no submounts to process anymore and if
 527                  * the root is either already done, or an autofs, we
 528                  * are done */
 529                 if (set_isempty(todo) &&
 530                     (top_autofs || set_contains(done, cleaned)))
 531                         return 0;
 532
 533                 if (!set_contains(done, cleaned) &&
 534                     !set_contains(todo, cleaned)) {
 535                         /* The prefix directory itself is not yet a mount, make it one. */
 536                         if (mount(cleaned, cleaned, NULL, MS_BIND|MS_REC, NULL) < 0)
 537                                 return -errno;
 538
 539                         orig_flags = 0;
 540                         (void) get_mount_flags(cleaned, &orig_flags);
 541                         orig_flags &= ~MS_RDONLY;
 542
 543                         if (mount(NULL, prefix, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 544                                 return -errno;
 545
 546                         log_debug("Made top-level directory %s a mount point.", prefix);
 547
 548                         x = strdup(cleaned);
 549                         if (!x)
 550                                 return -ENOMEM;
 551
 552                         r = set_consume(done, x);
 553                         if (r < 0)
 554                                 return r;
 555                 }
 556
 557                 while ((x = set_steal_first(todo))) {
 558
 559                         r = set_consume(done, x);
 560                         if (IN_SET(r, 0, -EEXIST))
 561                                 continue;
 562                         if (r < 0)
 563                                 return r;
 564
 565                         /* Deal with mount points that are obstructed by a later mount */
 566                         r = path_is_mount_point(x, NULL, 0);
 567                         if (IN_SET(r, 0, -ENOENT))
 568                                 continue;
 569                         if (r < 0)
 570                                 return r;
 571
 572                         /* Try to reuse the original flag set */
 573                         orig_flags = 0;
 574                         (void) get_mount_flags(x, &orig_flags);
 575                         orig_flags &= ~MS_RDONLY;
 576
 577                         if (mount(NULL, x, NULL, orig_flags|MS_BIND|MS_REMOUNT|(ro ? MS_RDONLY : 0), NULL) < 0)
 578                                 return -errno;
 579
 580                         log_debug("Remounted %s read-only.", x);
 581                 }
 582         }
 583 }
 584
 585 int bind_remount_recursive(const char *prefix, bool ro, char **blacklist) {
 586         _cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
 587
 588         proc_self_mountinfo = fopen("/proc/self/mountinfo", "re");
 589         if (!proc_self_mountinfo)
 590                 return -errno;
 591
 592         (void) __fsetlocking(proc_self_mountinfo, FSETLOCKING_BYCALLER);
 593
 594         return bind_remount_recursive_with_mountinfo(prefix, ro, blacklist, proc_self_mountinfo);
 595 }
 596
 597 int mount_move_root(const char *path) {
 598         assert(path);
 599
 600         if (chdir(path) < 0)
 601                 return -errno;
 602
 603         if (mount(path, "/", NULL, MS_MOVE, NULL) < 0)
 604                 return -errno;
 605
 606         if (chroot(".") < 0)
 607                 return -errno;
 608
 609         if (chdir("/") < 0)
 610                 return -errno;
 611
 612         return 0;
 613 }
 614
 615 bool fstype_is_network(const char *fstype) {
 616         const char *x;
 617
 618         x = startswith(fstype, "fuse.");
 619         if (x)
 620                 fstype = x;
 621
 622         return STR_IN_SET(fstype,
 623                           "afs",
 624                           "cifs",
 625                           "smbfs",
 626                           "sshfs",
 627                           "ncpfs",
 628                           "ncp",
 629                           "nfs",
 630                           "nfs4",
 631                           "gfs",
 632                           "gfs2",
 633                           "glusterfs",
 634                           "pvfs2", /* OrangeFS */
 635                           "ocfs2",
 636                           "lustre");
 637 }
 638
 639 bool fstype_is_api_vfs(const char *fstype) {
 640         return STR_IN_SET(fstype,
 641                           "autofs",
 642                           "bpf",
 643                           "cgroup",
 644                           "cgroup2",
 645                           "configfs",
 646                           "cpuset",
 647                           "debugfs",
 648                           "devpts",
 649                           "devtmpfs",
 650                           "efivarfs",
 651                           "fusectl",
 652                           "hugetlbfs",
 653                           "mqueue",
 654                           "proc",
 655                           "pstore",
 656                           "ramfs",
 657                           "securityfs",
 658                           "sysfs",
 659                           "tmpfs",
 660                           "tracefs");
 661 }
 662
 663 bool fstype_is_ro(const char *fstype) {
 664         /* All Linux file systems that are necessarily read-only */
 665         return STR_IN_SET(fstype,
 666                           "DM_verity_hash",
 667                           "iso9660",
 668                           "squashfs");
 669 }
 670
 671 bool fstype_can_discard(const char *fstype) {
 672         return STR_IN_SET(fstype,
 673                           "btrfs",
 674                           "ext4",
 675                           "vfat",
 676                           "xfs");
 677 }
 678
 679 bool fstype_can_uid_gid(const char *fstype) {
 680
 681         /* All file systems that have a uid=/gid= mount option that fixates the owners of all files and directories,
 682          * current and future. */
 683
 684         return STR_IN_SET(fstype,
 685                           "adfs",
 686                           "fat",
 687                           "hfs",
 688                           "hpfs",
 689                           "iso9660",
 690                           "msdos",
 691                           "ntfs",
 692                           "vfat");
 693 }
 694
 695 int repeat_unmount(const char *path, int flags) {
 696         bool done = false;
 697
 698         assert(path);
 699
 700         /* If there are multiple mounts on a mount point, this
 701          * removes them all */
 702
 703         for (;;) {
 704                 if (umount2(path, flags) < 0) {
 705
 706                         if (errno == EINVAL)
 707                                 return done;
 708
 709                         return -errno;
 710                 }
 711
 712                 done = true;
 713         }
 714 }
 715 #endif // 0
 716
 717 const char* mode_to_inaccessible_node(mode_t mode) {
 718         /* This function maps a node type to the correspondent inaccessible node type.
 719          * Character and block inaccessible devices may not be created (because major=0 and minor=0),
 720          * in such case we map character and block devices to the inaccessible node type socket. */
 721         switch(mode & S_IFMT) {
 722                 case S_IFREG:
 723                         return "/run/systemd/inaccessible/reg";
 724                 case S_IFDIR:
 725                         return "/run/systemd/inaccessible/dir";
 726                 case S_IFCHR:
 727                         if (access("/run/systemd/inaccessible/chr", F_OK) == 0)
 728                                 return "/run/systemd/inaccessible/chr";
 729                         return "/run/systemd/inaccessible/sock";
 730                 case S_IFBLK:
 731                         if (access("/run/systemd/inaccessible/blk", F_OK) == 0)
 732                                 return "/run/systemd/inaccessible/blk";
 733                         return "/run/systemd/inaccessible/sock";
 734                 case S_IFIFO:
 735                         return "/run/systemd/inaccessible/fifo";
 736                 case S_IFSOCK:
 737                         return "/run/systemd/inaccessible/sock";
 738         }
 739         return NULL;
 740 }
 741
 742 #if 0 /// UNNEEDED by elogind
 743 #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "")
 744 static char* mount_flags_to_string(long unsigned flags) {
 745         char *x;
 746         _cleanup_free_ char *y = NULL;
 747         long unsigned overflow;
 748
 749         overflow = flags & ~(MS_RDONLY |
 750                              MS_NOSUID |
 751                              MS_NODEV |
 752                              MS_NOEXEC |
 753                              MS_SYNCHRONOUS |
 754                              MS_REMOUNT |
 755                              MS_MANDLOCK |
 756                              MS_DIRSYNC |
 757                              MS_NOATIME |
 758                              MS_NODIRATIME |
 759                              MS_BIND |
 760                              MS_MOVE |
 761                              MS_REC |
 762                              MS_SILENT |
 763                              MS_POSIXACL |
 764                              MS_UNBINDABLE |
 765                              MS_PRIVATE |
 766                              MS_SLAVE |
 767                              MS_SHARED |
 768                              MS_RELATIME |
 769                              MS_KERNMOUNT |
 770                              MS_I_VERSION |
 771                              MS_STRICTATIME |
 772                              MS_LAZYTIME);
 773
 774         if (flags == 0 || overflow != 0)
 775                 if (asprintf(&y, "%lx", overflow) < 0)
 776                         return NULL;
 777
 778         x = strjoin(FLAG(MS_RDONLY),
 779                     FLAG(MS_NOSUID),
 780                     FLAG(MS_NODEV),
 781                     FLAG(MS_NOEXEC),
 782                     FLAG(MS_SYNCHRONOUS),
 783                     FLAG(MS_REMOUNT),
 784                     FLAG(MS_MANDLOCK),
 785                     FLAG(MS_DIRSYNC),
 786                     FLAG(MS_NOATIME),
 787                     FLAG(MS_NODIRATIME),
 788                     FLAG(MS_BIND),
 789                     FLAG(MS_MOVE),
 790                     FLAG(MS_REC),
 791                     FLAG(MS_SILENT),
 792                     FLAG(MS_POSIXACL),
 793                     FLAG(MS_UNBINDABLE),
 794                     FLAG(MS_PRIVATE),
 795                     FLAG(MS_SLAVE),
 796                     FLAG(MS_SHARED),
 797                     FLAG(MS_RELATIME),
 798                     FLAG(MS_KERNMOUNT),
 799                     FLAG(MS_I_VERSION),
 800                     FLAG(MS_STRICTATIME),
 801                     FLAG(MS_LAZYTIME),
 802                     y);
 803         if (!x)
 804                 return NULL;
 805         if (!y)
 806                 x[strlen(x) - 1] = '\0'; /* truncate the last | */
 807         return x;
 808 }
 809
 810 int mount_verbose(
 811                 int error_log_level,
 812                 const char *what,
 813                 const char *where,
 814                 const char *type,
 815                 unsigned long flags,
 816                 const char *options) {
 817
 818         _cleanup_free_ char *fl = NULL, *o = NULL;
 819         unsigned long f;
 820         int r;
 821
 822         r = mount_option_mangle(options, flags, &f, &o);
 823         if (r < 0)
 824                 return log_full_errno(error_log_level, r,
 825                                       "Failed to mangle mount options %s: %m",
 826                                       strempty(options));
 827
 828         fl = mount_flags_to_string(f);
 829
 830         if ((f & MS_REMOUNT) && !what && !type)
 831                 log_debug("Remounting %s (%s \"%s\")...",
 832                           where, strnull(fl), strempty(o));
 833         else if (!what && !type)
 834                 log_debug("Mounting %s (%s \"%s\")...",
 835                           where, strnull(fl), strempty(o));
 836         else if ((f & MS_BIND) && !type)
 837                 log_debug("Bind-mounting %s on %s (%s \"%s\")...",
 838                           what, where, strnull(fl), strempty(o));
 839         else if (f & MS_MOVE)
 840                 log_debug("Moving mount %s → %s (%s \"%s\")...",
 841                           what, where, strnull(fl), strempty(o));
 842         else
 843                 log_debug("Mounting %s on %s (%s \"%s\")...",
 844                           strna(type), where, strnull(fl), strempty(o));
 845         if (mount(what, where, type, f, o) < 0)
 846                 return log_full_errno(error_log_level, errno,
 847                                       "Failed to mount %s on %s (%s \"%s\"): %m",
 848                                       strna(type), where, strnull(fl), strempty(o));
 849         return 0;
 850 }
 851
 852 int umount_verbose(const char *what) {
 853         log_debug("Umounting %s...", what);
 854         if (umount(what) < 0)
 855                 return log_error_errno(errno, "Failed to unmount %s: %m", what);
 856         return 0;
 857 }
 858 #endif // 0
 859
 860 const char *mount_propagation_flags_to_string(unsigned long flags) {
 861
 862         switch (flags & (MS_SHARED|MS_SLAVE|MS_PRIVATE)) {
 863         case 0:
 864                 return "";
 865         case MS_SHARED:
 866                 return "shared";
 867         case MS_SLAVE:
 868                 return "slave";
 869         case MS_PRIVATE:
 870                 return "private";
 871         }
 872
 873         return NULL;
 874 }
 875
 876
 877 int mount_propagation_flags_from_string(const char *name, unsigned long *ret) {
 878
 879         if (isempty(name))
 880                 *ret = 0;
 881         else if (streq(name, "shared"))
 882                 *ret = MS_SHARED;
 883         else if (streq(name, "slave"))
 884                 *ret = MS_SLAVE;
 885         else if (streq(name, "private"))
 886                 *ret = MS_PRIVATE;
 887         else
 888                 return -EINVAL;
 889         return 0;
 890 }
 891
 892 #if 0 /// UNNEEDED by elogind
 893 int mount_option_mangle(
 894                 const char *options,
 895                 unsigned long mount_flags,
 896                 unsigned long *ret_mount_flags,
 897                 char **ret_remaining_options) {
 898
 899         const struct libmnt_optmap *map;
 900         _cleanup_free_ char *ret = NULL;
 901         const char *p;
 902         int r;
 903
 904         /* This extracts mount flags from the mount options, and store
 905          * non-mount-flag options to '*ret_remaining_options'.
 906          * E.g.,
 907          * "rw,nosuid,nodev,relatime,size=1630748k,mode=700,uid=1000,gid=1000"
 908          * is split to MS_NOSUID|MS_NODEV|MS_RELATIME and
 909          * "size=1630748k,mode=700,uid=1000,gid=1000".
 910          * See more examples in test-mount-utils.c.
 911          *
 912          * Note that if 'options' does not contain any non-mount-flag options,
 913          * then '*ret_remaining_options' is set to NULL instread of empty string.
 914          * Note that this does not check validity of options stored in
 915          * '*ret_remaining_options'.
 916          * Note that if 'options' is NULL, then this just copies 'mount_flags'
 917          * to '*ret_mount_flags'. */
 918
 919         assert(ret_mount_flags);
 920         assert(ret_remaining_options);
 921
 922         map = mnt_get_builtin_optmap(MNT_LINUX_MAP);
 923         if (!map)
 924                 return -EINVAL;
 925
 926         p = options;
 927         for (;;) {
 928                 _cleanup_free_ char *word = NULL;
 929                 const struct libmnt_optmap *ent;
 930
 931                 r = extract_first_word(&p, &word, ",", EXTRACT_QUOTES);
 932                 if (r < 0)
 933                         return r;
 934                 if (r == 0)
 935                         break;
 936
 937                 for (ent = map; ent->name; ent++) {
 938                         /* All entries in MNT_LINUX_MAP do not take any argument.
 939                          * Thus, ent->name does not contain "=" or "[=]". */
 940                         if (!streq(word, ent->name))
 941                                 continue;
 942
 943                         if (!(ent->mask & MNT_INVERT))
 944                                 mount_flags |= ent->id;
 945                         else if (mount_flags & ent->id)
 946                                 mount_flags ^= ent->id;
 947
 948                         break;
 949                 }
 950
 951                 /* If 'word' is not a mount flag, then store it in '*ret_remaining_options'. */
 952                 if (!ent->name && !strextend_with_separator(&ret, ",", word, NULL))
 953                         return -ENOMEM;
 954         }
 955
 956         *ret_mount_flags = mount_flags;
 957         *ret_remaining_options = TAKE_PTR(ret);
 958
 959         return 0;
 960 }
 961 #endif // 0