1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/resource.h>
24 #include <sys/socket.h>
28 #include "dirent-util.h"
33 #include "memfd-util.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "socket-util.h"
39 #include "stdio-util.h"
42 int close_nointr(int fd) {
49 * Just ignore EINTR; a retry loop is the wrong thing to do on
52 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
53 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
54 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
55 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
63 int safe_close(int fd) {
66 * Like close_nointr() but cannot fail. Guarantees errno is
67 * unchanged. Is a NOP with negative fds passed, and returns
68 * -1, so that it can be used in this syntax:
70 * fd = safe_close(fd);
76 /* The kernel might return pretty much any error code
77 * via close(), but the fd will be closed anyway. The
78 * only condition we want to check for here is whether
79 * the fd was invalid at all... */
81 assert_se(close_nointr(fd) != -EBADF);
87 void safe_close_pair(int p[]) {
91 /* Special case pairs which use the same fd in both
93 p[0] = p[1] = safe_close(p[0]);
97 p[0] = safe_close(p[0]);
98 p[1] = safe_close(p[1]);
101 void close_many(const int fds[], unsigned n_fd) {
104 assert(fds || n_fd <= 0);
106 for (i = 0; i < n_fd; i++)
110 int fclose_nointr(FILE *f) {
113 /* Same as close_nointr(), but for fclose() */
124 FILE* safe_fclose(FILE *f) {
126 /* Same as safe_close(), but for fclose() */
131 assert_se(fclose_nointr(f) != EBADF);
137 #if 0 /// UNNEEDED by elogind
138 DIR* safe_closedir(DIR *d) {
143 assert_se(closedir(d) >= 0 || errno != EBADF);
150 int fd_nonblock(int fd, bool nonblock) {
155 flags = fcntl(fd, F_GETFL, 0);
160 nflags = flags | O_NONBLOCK;
162 nflags = flags & ~O_NONBLOCK;
167 if (fcntl(fd, F_SETFL, nflags) < 0)
173 int fd_cloexec(int fd, bool cloexec) {
178 flags = fcntl(fd, F_GETFD, 0);
183 nflags = flags | FD_CLOEXEC;
185 nflags = flags & ~FD_CLOEXEC;
190 if (fcntl(fd, F_SETFD, nflags) < 0)
196 void stdio_unset_cloexec(void) {
197 (void) fd_cloexec(STDIN_FILENO, false);
198 (void) fd_cloexec(STDOUT_FILENO, false);
199 (void) fd_cloexec(STDERR_FILENO, false);
202 _pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
205 assert(n_fdset == 0 || fdset);
207 for (i = 0; i < n_fdset; i++)
214 int close_all_fds(const int except[], unsigned n_except) {
215 _cleanup_closedir_ DIR *d = NULL;
219 assert(n_except == 0 || except);
221 d = opendir("/proc/self/fd");
226 /* When /proc isn't available (for example in chroots)
227 * the fallback is brute forcing through the fd
230 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
231 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
234 if (fd_in_set(fd, except, n_except))
237 q = close_nointr(fd);
238 if (q < 0 && q != -EBADF && r >= 0)
245 FOREACH_DIRENT(de, d, return -errno) {
248 if (safe_atoi(de->d_name, &fd) < 0)
249 /* Let's better ignore this, just in case */
258 if (fd_in_set(fd, except, n_except))
261 q = close_nointr(fd);
262 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
269 #if 0 /// UNNEEDED by elogind
270 int same_fd(int a, int b) {
271 struct stat sta, stb;
278 /* Compares two file descriptors. Note that semantics are
279 * quite different depending on whether we have kcmp() or we
280 * don't. If we have kcmp() this will only return true for
281 * dup()ed file descriptors, but not otherwise. If we don't
282 * have kcmp() this will also return true for two fds of the same
283 * file, created by separate open() calls. Since we use this
284 * call mostly for filtering out duplicates in the fd store
285 * this difference hopefully doesn't matter too much. */
290 /* Try to use kcmp() if we have it. */
291 pid = getpid_cached();
292 r = kcmp(pid, pid, KCMP_FILE, a, b);
300 /* We don't have kcmp(), use fstat() instead. */
301 if (fstat(a, &sta) < 0)
304 if (fstat(b, &stb) < 0)
307 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
310 /* We consider all device fds different, since two device fds
311 * might refer to quite different device contexts even though
312 * they share the same inode and backing dev_t. */
314 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
317 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
320 /* The fds refer to the same inode on disk, let's also check
321 * if they have the same fd flags. This is useful to
322 * distinguish the read and write side of a pipe created with
324 fa = fcntl(a, F_GETFL);
328 fb = fcntl(b, F_GETFL);
335 void cmsg_close_all(struct msghdr *mh) {
336 struct cmsghdr *cmsg;
340 CMSG_FOREACH(cmsg, mh)
341 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
342 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
345 bool fdname_is_valid(const char *s) {
348 /* Validates a name for $LISTEN_FDNAMES. We basically allow
349 * everything ASCII that's not a control character. Also, as
350 * special exception the ":" character is not allowed, as we
351 * use that as field separator in $LISTEN_FDNAMES.
353 * Note that the empty string is explicitly allowed
354 * here. However, we limit the length of the names to 255
360 for (p = s; *p; p++) {
372 int fd_get_path(int fd, char **ret) {
373 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
376 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
378 r = readlink_malloc(procfs_path, ret);
380 if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
387 int move_fd(int from, int to, int cloexec) {
390 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
391 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
392 * off, if it is > 0 it is turned on. */
402 r = fd_cloexec(to, cloexec);
413 fl = fcntl(from, F_GETFD, 0);
417 cloexec = !!(fl & FD_CLOEXEC);
420 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
431 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
433 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
434 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
435 char pattern[] = "/dev/shm/data-fd-XXXXXX";
436 _cleanup_close_ int fd = -1;
441 assert(data || size == 0);
443 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
444 * complex than I wish it was. But here's why:
446 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
447 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
449 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
450 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
451 * clients can only bump their size to a system-wide limit, which might be quite low.
453 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
454 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
455 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
457 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
459 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
462 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
463 /* As a special case, return /dev/null if we have been called for an empty data block */
464 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
471 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
472 fd = memfd_new("data-fd");
476 n = write(fd, data, size);
479 if ((size_t) n != size)
482 f = lseek(fd, 0, SEEK_SET);
486 r = memfd_set_sealed(fd);
497 if ((flags & ACQUIRE_NO_PIPE) == 0) {
498 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
501 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
505 if ((size_t) isz < size) {
507 if (isz < 0 || (size_t) isz != size)
510 /* Try to bump the pipe size */
511 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
513 /* See if that worked */
514 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
518 if ((size_t) isz < size)
522 n = write(pipefds[1], data, size);
525 if ((size_t) n != size)
528 (void) fd_nonblock(pipefds[0], false);
537 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
538 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
540 goto try_dev_shm_without_o_tmpfile;
542 n = write(fd, data, size);
545 if ((size_t) n != size)
548 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
549 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
550 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
557 try_dev_shm_without_o_tmpfile:
558 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
559 fd = mkostemp_safe(pattern);
563 n = write(fd, data, size);
566 goto unlink_and_return;
568 if ((size_t) n != size) {
570 goto unlink_and_return;
573 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
574 r = open(pattern, O_RDONLY|O_CLOEXEC);
579 (void) unlink(pattern);
586 int fd_move_above_stdio(int fd) {
590 /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
591 * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
592 * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
593 * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
594 * stdin/stdout/stderr of unrelated code.
596 * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
597 * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
598 * been closed before.
600 * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
601 * error we simply return the original file descriptor, and we do not touch errno. */
603 if (fd < 0 || fd > 2)
606 flags = fcntl(fd, F_GETFD, 0);
610 if (flags & FD_CLOEXEC)
611 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
613 copy = fcntl(fd, F_DUPFD, 3);
623 int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
625 int fd[3] = { /* Put together an array of fds we work on */
632 null_fd = -1, /* if we open /dev/null, we store the fd to it here */
633 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
634 bool null_readable, null_writable;
636 /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
637 * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
638 * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
641 * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
642 * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
644 * Note that when this function fails stdin/stdout/stderr might remain half set up!
646 * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
647 * stdin/stdout/stderr). */
649 null_readable = original_input_fd < 0;
650 null_writable = original_output_fd < 0 || original_error_fd < 0;
652 /* First step, open /dev/null once, if we need it */
653 if (null_readable || null_writable) {
655 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
656 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
657 null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
663 /* If this fd is in the 0…2 range, let's move it out of it */
667 copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
678 /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
679 for (i = 0; i < 3; i++) {
682 fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
683 else if (fd[i] != i && fd[i] < 3) {
684 /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
685 copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
686 if (copy_fd[i] < 0) {
695 /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
696 * have freedom to move them around. If the fds already were at the right places then the specific fds are
697 * -1. Let's now move them to the right places. This is the point of no return. */
698 for (i = 0; i < 3; i++) {
702 /* fd is already in place, but let's make sure O_CLOEXEC is off */
703 r = fd_cloexec(i, false);
710 if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
720 /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
721 * fd passed in multiple times. */
722 safe_close_above_stdio(original_input_fd);
723 if (original_output_fd != original_input_fd)
724 safe_close_above_stdio(original_output_fd);
725 if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
726 safe_close_above_stdio(original_error_fd);
728 /* Close the copies we moved > 2 */
729 for (i = 0; i < 3; i++)
730 safe_close(copy_fd[i]);
732 /* Close our null fd, if it's > 2 */
733 safe_close_above_stdio(null_fd);