src/basic/fd-util.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2010 Lennart Poettering
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <sys/resource.h>
  24 #include <sys/socket.h>
  25 #include <sys/stat.h>
  26 #include <unistd.h>
  27
  28 #include "dirent-util.h"
  29 #include "fd-util.h"
  30 #include "fileio.h"
  31 #include "fs-util.h"
  32 #include "macro.h"
  33 #include "memfd-util.h"
  34 #include "missing.h"
  35 #include "parse-util.h"
  36 #include "path-util.h"
  37 #include "process-util.h"
  38 #include "socket-util.h"
  39 #include "stdio-util.h"
  40 #include "util.h"
  41
  42 int close_nointr(int fd) {
  43         assert(fd >= 0);
  44
  45         if (close(fd) >= 0)
  46                 return 0;
  47
  48         /*
  49          * Just ignore EINTR; a retry loop is the wrong thing to do on
  50          * Linux.
  51          *
  52          * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
  53          * https://bugzilla.gnome.org/show_bug.cgi?id=682819
  54          * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
  55          * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
  56          */
  57         if (errno == EINTR)
  58                 return 0;
  59
  60         return -errno;
  61 }
  62
  63 int safe_close(int fd) {
  64
  65         /*
  66          * Like close_nointr() but cannot fail. Guarantees errno is
  67          * unchanged. Is a NOP with negative fds passed, and returns
  68          * -1, so that it can be used in this syntax:
  69          *
  70          * fd = safe_close(fd);
  71          */
  72
  73         if (fd >= 0) {
  74                 PROTECT_ERRNO;
  75
  76                 /* The kernel might return pretty much any error code
  77                  * via close(), but the fd will be closed anyway. The
  78                  * only condition we want to check for here is whether
  79                  * the fd was invalid at all... */
  80
  81                 assert_se(close_nointr(fd) != -EBADF);
  82         }
  83
  84         return -1;
  85 }
  86
  87 void safe_close_pair(int p[]) {
  88         assert(p);
  89
  90         if (p[0] == p[1]) {
  91                 /* Special case pairs which use the same fd in both
  92                  * directions... */
  93                 p[0] = p[1] = safe_close(p[0]);
  94                 return;
  95         }
  96
  97         p[0] = safe_close(p[0]);
  98         p[1] = safe_close(p[1]);
  99 }
 100
 101 void close_many(const int fds[], unsigned n_fd) {
 102         unsigned i;
 103
 104         assert(fds || n_fd <= 0);
 105
 106         for (i = 0; i < n_fd; i++)
 107                 safe_close(fds[i]);
 108 }
 109
 110 int fclose_nointr(FILE *f) {
 111         assert(f);
 112
 113         /* Same as close_nointr(), but for fclose() */
 114
 115         if (fclose(f) == 0)
 116                 return 0;
 117
 118         if (errno == EINTR)
 119                 return 0;
 120
 121         return -errno;
 122 }
 123
 124 FILE* safe_fclose(FILE *f) {
 125
 126         /* Same as safe_close(), but for fclose() */
 127
 128         if (f) {
 129                 PROTECT_ERRNO;
 130
 131                 assert_se(fclose_nointr(f) != EBADF);
 132         }
 133
 134         return NULL;
 135 }
 136
 137 #if 0 /// UNNEEDED by elogind
 138 DIR* safe_closedir(DIR *d) {
 139
 140         if (d) {
 141                 PROTECT_ERRNO;
 142
 143                 assert_se(closedir(d) >= 0 || errno != EBADF);
 144         }
 145
 146         return NULL;
 147 }
 148 #endif // 0
 149
 150 int fd_nonblock(int fd, bool nonblock) {
 151         int flags, nflags;
 152
 153         assert(fd >= 0);
 154
 155         flags = fcntl(fd, F_GETFL, 0);
 156         if (flags < 0)
 157                 return -errno;
 158
 159         if (nonblock)
 160                 nflags = flags | O_NONBLOCK;
 161         else
 162                 nflags = flags & ~O_NONBLOCK;
 163
 164         if (nflags == flags)
 165                 return 0;
 166
 167         if (fcntl(fd, F_SETFL, nflags) < 0)
 168                 return -errno;
 169
 170         return 0;
 171 }
 172
 173 int fd_cloexec(int fd, bool cloexec) {
 174         int flags, nflags;
 175
 176         assert(fd >= 0);
 177
 178         flags = fcntl(fd, F_GETFD, 0);
 179         if (flags < 0)
 180                 return -errno;
 181
 182         if (cloexec)
 183                 nflags = flags | FD_CLOEXEC;
 184         else
 185                 nflags = flags & ~FD_CLOEXEC;
 186
 187         if (nflags == flags)
 188                 return 0;
 189
 190         if (fcntl(fd, F_SETFD, nflags) < 0)
 191                 return -errno;
 192
 193         return 0;
 194 }
 195
 196 void stdio_unset_cloexec(void) {
 197         (void) fd_cloexec(STDIN_FILENO, false);
 198         (void) fd_cloexec(STDOUT_FILENO, false);
 199         (void) fd_cloexec(STDERR_FILENO, false);
 200 }
 201
 202 _pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
 203         unsigned i;
 204
 205         assert(n_fdset == 0 || fdset);
 206
 207         for (i = 0; i < n_fdset; i++)
 208                 if (fdset[i] == fd)
 209                         return true;
 210
 211         return false;
 212 }
 213
 214 int close_all_fds(const int except[], unsigned n_except) {
 215         _cleanup_closedir_ DIR *d = NULL;
 216         struct dirent *de;
 217         int r = 0;
 218
 219         assert(n_except == 0 || except);
 220
 221         d = opendir("/proc/self/fd");
 222         if (!d) {
 223                 int fd;
 224                 struct rlimit rl;
 225
 226                 /* When /proc isn't available (for example in chroots)
 227                  * the fallback is brute forcing through the fd
 228                  * table */
 229
 230                 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
 231                 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
 232
 233                         if (fd_in_set(fd, except, n_except))
 234                                 continue;
 235
 236                         if (close_nointr(fd) < 0)
 237                                 if (errno != EBADF && r == 0)
 238                                         r = -errno;
 239                 }
 240
 241                 return r;
 242         }
 243
 244         FOREACH_DIRENT(de, d, return -errno) {
 245                 int fd = -1;
 246
 247                 if (safe_atoi(de->d_name, &fd) < 0)
 248                         /* Let's better ignore this, just in case */
 249                         continue;
 250
 251                 if (fd < 3)
 252                         continue;
 253
 254                 if (fd == dirfd(d))
 255                         continue;
 256
 257                 if (fd_in_set(fd, except, n_except))
 258                         continue;
 259
 260                 if (close_nointr(fd) < 0) {
 261                         /* Valgrind has its own FD and doesn't want to have it closed */
 262                         if (errno != EBADF && r == 0)
 263                                 r = -errno;
 264                 }
 265         }
 266
 267         return r;
 268 }
 269
 270 #if 0 /// UNNEEDED by elogind
 271 int same_fd(int a, int b) {
 272         struct stat sta, stb;
 273         pid_t pid;
 274         int r, fa, fb;
 275
 276         assert(a >= 0);
 277         assert(b >= 0);
 278
 279         /* Compares two file descriptors. Note that semantics are
 280          * quite different depending on whether we have kcmp() or we
 281          * don't. If we have kcmp() this will only return true for
 282          * dup()ed file descriptors, but not otherwise. If we don't
 283          * have kcmp() this will also return true for two fds of the same
 284          * file, created by separate open() calls. Since we use this
 285          * call mostly for filtering out duplicates in the fd store
 286          * this difference hopefully doesn't matter too much. */
 287
 288         if (a == b)
 289                 return true;
 290
 291         /* Try to use kcmp() if we have it. */
 292         pid = getpid_cached();
 293         r = kcmp(pid, pid, KCMP_FILE, a, b);
 294         if (r == 0)
 295                 return true;
 296         if (r > 0)
 297                 return false;
 298         if (errno != ENOSYS)
 299                 return -errno;
 300
 301         /* We don't have kcmp(), use fstat() instead. */
 302         if (fstat(a, &sta) < 0)
 303                 return -errno;
 304
 305         if (fstat(b, &stb) < 0)
 306                 return -errno;
 307
 308         if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
 309                 return false;
 310
 311         /* We consider all device fds different, since two device fds
 312          * might refer to quite different device contexts even though
 313          * they share the same inode and backing dev_t. */
 314
 315         if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
 316                 return false;
 317
 318         if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
 319                 return false;
 320
 321         /* The fds refer to the same inode on disk, let's also check
 322          * if they have the same fd flags. This is useful to
 323          * distinguish the read and write side of a pipe created with
 324          * pipe(). */
 325         fa = fcntl(a, F_GETFL);
 326         if (fa < 0)
 327                 return -errno;
 328
 329         fb = fcntl(b, F_GETFL);
 330         if (fb < 0)
 331                 return -errno;
 332
 333         return fa == fb;
 334 }
 335
 336 void cmsg_close_all(struct msghdr *mh) {
 337         struct cmsghdr *cmsg;
 338
 339         assert(mh);
 340
 341         CMSG_FOREACH(cmsg, mh)
 342                 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
 343                         close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
 344 }
 345
 346 bool fdname_is_valid(const char *s) {
 347         const char *p;
 348
 349         /* Validates a name for $LISTEN_FDNAMES. We basically allow
 350          * everything ASCII that's not a control character. Also, as
 351          * special exception the ":" character is not allowed, as we
 352          * use that as field separator in $LISTEN_FDNAMES.
 353          *
 354          * Note that the empty string is explicitly allowed
 355          * here. However, we limit the length of the names to 255
 356          * characters. */
 357
 358         if (!s)
 359                 return false;
 360
 361         for (p = s; *p; p++) {
 362                 if (*p < ' ')
 363                         return false;
 364                 if (*p >= 127)
 365                         return false;
 366                 if (*p == ':')
 367                         return false;
 368         }
 369
 370         return p - s < 256;
 371 }
 372
 373 int fd_get_path(int fd, char **ret) {
 374         char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
 375         int r;
 376
 377         xsprintf(procfs_path, "/proc/self/fd/%i", fd);
 378
 379         r = readlink_malloc(procfs_path, ret);
 380
 381         if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
 382                 return -EBADF;
 383
 384         return r;
 385 }
 386 #endif // 0
 387
 388 int move_fd(int from, int to, int cloexec) {
 389         int r;
 390
 391         /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
 392          * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
 393          * off, if it is > 0 it is turned on. */
 394
 395         if (from < 0)
 396                 return -EBADF;
 397         if (to < 0)
 398                 return -EBADF;
 399
 400         if (from == to) {
 401
 402                 if (cloexec >= 0) {
 403                         r = fd_cloexec(to, cloexec);
 404                         if (r < 0)
 405                                 return r;
 406                 }
 407
 408                 return to;
 409         }
 410
 411         if (cloexec < 0) {
 412                 int fl;
 413
 414                 fl = fcntl(from, F_GETFD, 0);
 415                 if (fl < 0)
 416                         return -errno;
 417
 418                 cloexec = !!(fl & FD_CLOEXEC);
 419         }
 420
 421         r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
 422         if (r < 0)
 423                 return -errno;
 424
 425         assert(r == to);
 426
 427         safe_close(from);
 428
 429         return to;
 430 }
 431
 432 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
 433
 434         char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
 435         _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
 436         char pattern[] = "/dev/shm/data-fd-XXXXXX";
 437         _cleanup_close_ int fd = -1;
 438         int isz = 0, r;
 439         ssize_t n;
 440         off_t f;
 441
 442         assert(data || size == 0);
 443
 444         /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
 445          * complex than I wish it was. But here's why:
 446          *
 447          * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
 448          *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
 449          *
 450          * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
 451          *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
 452          *    clients can only bump their size to a system-wide limit, which might be quite low.
 453          *
 454          * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
 455          *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
 456          *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
 457          *
 458          * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
 459          *
 460          * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
 461          * figure. */
 462
 463         if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
 464                 /* As a special case, return /dev/null if we have been called for an empty data block */
 465                 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
 466                 if (r < 0)
 467                         return -errno;
 468
 469                 return r;
 470         }
 471
 472         if ((flags & ACQUIRE_NO_MEMFD) == 0) {
 473                 fd = memfd_new("data-fd");
 474                 if (fd < 0)
 475                         goto try_pipe;
 476
 477                 n = write(fd, data, size);
 478                 if (n < 0)
 479                         return -errno;
 480                 if ((size_t) n != size)
 481                         return -EIO;
 482
 483                 f = lseek(fd, 0, SEEK_SET);
 484                 if (f != 0)
 485                         return -errno;
 486
 487                 r = memfd_set_sealed(fd);
 488                 if (r < 0)
 489                         return r;
 490
 491                 r = fd;
 492                 fd = -1;
 493
 494                 return r;
 495         }
 496
 497 try_pipe:
 498         if ((flags & ACQUIRE_NO_PIPE) == 0) {
 499                 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
 500                         return -errno;
 501
 502                 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 503                 if (isz < 0)
 504                         return -errno;
 505
 506                 if ((size_t) isz < size) {
 507                         isz = (int) size;
 508                         if (isz < 0 || (size_t) isz != size)
 509                                 return -E2BIG;
 510
 511                         /* Try to bump the pipe size */
 512                         (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
 513
 514                         /* See if that worked */
 515                         isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
 516                         if (isz < 0)
 517                                 return -errno;
 518
 519                         if ((size_t) isz < size)
 520                                 goto try_dev_shm;
 521                 }
 522
 523                 n = write(pipefds[1], data, size);
 524                 if (n < 0)
 525                         return -errno;
 526                 if ((size_t) n != size)
 527                         return -EIO;
 528
 529                 (void) fd_nonblock(pipefds[0], false);
 530
 531                 r = pipefds[0];
 532                 pipefds[0] = -1;
 533
 534                 return r;
 535         }
 536
 537 try_dev_shm:
 538         if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
 539                 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
 540                 if (fd < 0)
 541                         goto try_dev_shm_without_o_tmpfile;
 542
 543                 n = write(fd, data, size);
 544                 if (n < 0)
 545                         return -errno;
 546                 if ((size_t) n != size)
 547                         return -EIO;
 548
 549                 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
 550                 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
 551                 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
 552                 if (r < 0)
 553                         return -errno;
 554
 555                 return r;
 556         }
 557
 558 try_dev_shm_without_o_tmpfile:
 559         if ((flags & ACQUIRE_NO_REGULAR) == 0) {
 560                 fd = mkostemp_safe(pattern);
 561                 if (fd < 0)
 562                         return fd;
 563
 564                 n = write(fd, data, size);
 565                 if (n < 0) {
 566                         r = -errno;
 567                         goto unlink_and_return;
 568                 }
 569                 if ((size_t) n != size) {
 570                         r = -EIO;
 571                         goto unlink_and_return;
 572                 }
 573
 574                 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
 575                 r = open(pattern, O_RDONLY|O_CLOEXEC);
 576                 if (r < 0)
 577                         r = -errno;
 578
 579         unlink_and_return:
 580                 (void) unlink(pattern);
 581                 return r;
 582         }
 583
 584         return -EOPNOTSUPP;
 585 }