1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/resource.h>
24 #include <sys/socket.h>
28 #include "dirent-util.h"
33 #include "memfd-util.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "socket-util.h"
39 #include "stdio-util.h"
42 int close_nointr(int fd) {
49 * Just ignore EINTR; a retry loop is the wrong thing to do on
52 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
53 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
54 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
55 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
63 int safe_close(int fd) {
66 * Like close_nointr() but cannot fail. Guarantees errno is
67 * unchanged. Is a NOP with negative fds passed, and returns
68 * -1, so that it can be used in this syntax:
70 * fd = safe_close(fd);
76 /* The kernel might return pretty much any error code
77 * via close(), but the fd will be closed anyway. The
78 * only condition we want to check for here is whether
79 * the fd was invalid at all... */
81 assert_se(close_nointr(fd) != -EBADF);
87 void safe_close_pair(int p[]) {
91 /* Special case pairs which use the same fd in both
93 p[0] = p[1] = safe_close(p[0]);
97 p[0] = safe_close(p[0]);
98 p[1] = safe_close(p[1]);
101 void close_many(const int fds[], unsigned n_fd) {
104 assert(fds || n_fd <= 0);
106 for (i = 0; i < n_fd; i++)
110 int fclose_nointr(FILE *f) {
113 /* Same as close_nointr(), but for fclose() */
124 FILE* safe_fclose(FILE *f) {
126 /* Same as safe_close(), but for fclose() */
131 assert_se(fclose_nointr(f) != EBADF);
137 #if 0 /// UNNEEDED by elogind
138 DIR* safe_closedir(DIR *d) {
143 assert_se(closedir(d) >= 0 || errno != EBADF);
150 int fd_nonblock(int fd, bool nonblock) {
155 flags = fcntl(fd, F_GETFL, 0);
160 nflags = flags | O_NONBLOCK;
162 nflags = flags & ~O_NONBLOCK;
167 if (fcntl(fd, F_SETFL, nflags) < 0)
173 int fd_cloexec(int fd, bool cloexec) {
178 flags = fcntl(fd, F_GETFD, 0);
183 nflags = flags | FD_CLOEXEC;
185 nflags = flags & ~FD_CLOEXEC;
190 if (fcntl(fd, F_SETFD, nflags) < 0)
196 _pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
199 assert(n_fdset == 0 || fdset);
201 for (i = 0; i < n_fdset; i++)
208 int close_all_fds(const int except[], unsigned n_except) {
209 _cleanup_closedir_ DIR *d = NULL;
213 assert(n_except == 0 || except);
215 d = opendir("/proc/self/fd");
220 /* When /proc isn't available (for example in chroots)
221 * the fallback is brute forcing through the fd
224 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
225 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
228 if (fd_in_set(fd, except, n_except))
231 q = close_nointr(fd);
232 if (q < 0 && q != -EBADF && r >= 0)
239 FOREACH_DIRENT(de, d, return -errno) {
242 if (safe_atoi(de->d_name, &fd) < 0)
243 /* Let's better ignore this, just in case */
252 if (fd_in_set(fd, except, n_except))
255 q = close_nointr(fd);
256 if (q < 0 && q != -EBADF && r >= 0) /* Valgrind has its own FD and doesn't want to have it closed */
263 #if 0 /// UNNEEDED by elogind
264 int same_fd(int a, int b) {
265 struct stat sta, stb;
272 /* Compares two file descriptors. Note that semantics are
273 * quite different depending on whether we have kcmp() or we
274 * don't. If we have kcmp() this will only return true for
275 * dup()ed file descriptors, but not otherwise. If we don't
276 * have kcmp() this will also return true for two fds of the same
277 * file, created by separate open() calls. Since we use this
278 * call mostly for filtering out duplicates in the fd store
279 * this difference hopefully doesn't matter too much. */
284 /* Try to use kcmp() if we have it. */
285 pid = getpid_cached();
286 r = kcmp(pid, pid, KCMP_FILE, a, b);
294 /* We don't have kcmp(), use fstat() instead. */
295 if (fstat(a, &sta) < 0)
298 if (fstat(b, &stb) < 0)
301 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
304 /* We consider all device fds different, since two device fds
305 * might refer to quite different device contexts even though
306 * they share the same inode and backing dev_t. */
308 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
311 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
314 /* The fds refer to the same inode on disk, let's also check
315 * if they have the same fd flags. This is useful to
316 * distinguish the read and write side of a pipe created with
318 fa = fcntl(a, F_GETFL);
322 fb = fcntl(b, F_GETFL);
329 void cmsg_close_all(struct msghdr *mh) {
330 struct cmsghdr *cmsg;
334 CMSG_FOREACH(cmsg, mh)
335 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
336 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
339 bool fdname_is_valid(const char *s) {
342 /* Validates a name for $LISTEN_FDNAMES. We basically allow
343 * everything ASCII that's not a control character. Also, as
344 * special exception the ":" character is not allowed, as we
345 * use that as field separator in $LISTEN_FDNAMES.
347 * Note that the empty string is explicitly allowed
348 * here. However, we limit the length of the names to 255
354 for (p = s; *p; p++) {
367 int fd_get_path(int fd, char **ret) {
368 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
371 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
373 r = readlink_malloc(procfs_path, ret);
375 if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
381 int move_fd(int from, int to, int cloexec) {
384 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
385 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
386 * off, if it is > 0 it is turned on. */
396 r = fd_cloexec(to, cloexec);
407 fl = fcntl(from, F_GETFD, 0);
411 cloexec = !!(fl & FD_CLOEXEC);
414 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
425 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
427 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
428 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
429 char pattern[] = "/dev/shm/data-fd-XXXXXX";
430 _cleanup_close_ int fd = -1;
435 assert(data || size == 0);
437 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
438 * complex than I wish it was. But here's why:
440 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
441 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
443 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
444 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
445 * clients can only bump their size to a system-wide limit, which might be quite low.
447 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
448 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
449 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
451 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
453 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
456 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
457 /* As a special case, return /dev/null if we have been called for an empty data block */
458 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
465 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
466 fd = memfd_new("data-fd");
470 n = write(fd, data, size);
473 if ((size_t) n != size)
476 f = lseek(fd, 0, SEEK_SET);
480 r = memfd_set_sealed(fd);
491 if ((flags & ACQUIRE_NO_PIPE) == 0) {
492 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
495 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
499 if ((size_t) isz < size) {
501 if (isz < 0 || (size_t) isz != size)
504 /* Try to bump the pipe size */
505 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
507 /* See if that worked */
508 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
512 if ((size_t) isz < size)
516 n = write(pipefds[1], data, size);
519 if ((size_t) n != size)
522 (void) fd_nonblock(pipefds[0], false);
531 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
532 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
534 goto try_dev_shm_without_o_tmpfile;
536 n = write(fd, data, size);
539 if ((size_t) n != size)
542 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
543 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
544 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
551 try_dev_shm_without_o_tmpfile:
552 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
553 fd = mkostemp_safe(pattern);
557 n = write(fd, data, size);
560 goto unlink_and_return;
562 if ((size_t) n != size) {
564 goto unlink_and_return;
567 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
568 r = open(pattern, O_RDONLY|O_CLOEXEC);
573 (void) unlink(pattern);
580 int fd_move_above_stdio(int fd) {
584 /* Moves the specified file descriptor if possible out of the range [0…2], i.e. the range of
585 * stdin/stdout/stderr. If it can't be moved outside of this range the original file descriptor is
586 * returned. This call is supposed to be used for long-lasting file descriptors we allocate in our code that
587 * might get loaded into foreign code, and where we want ensure our fds are unlikely used accidentally as
588 * stdin/stdout/stderr of unrelated code.
590 * Note that this doesn't fix any real bugs, it just makes it less likely that our code will be affected by
591 * buggy code from others that mindlessly invokes 'fprintf(stderr, …' or similar in places where stderr has
592 * been closed before.
594 * This function is written in a "best-effort" and "least-impact" style. This means whenever we encounter an
595 * error we simply return the original file descriptor, and we do not touch errno. */
597 if (fd < 0 || fd > 2)
600 flags = fcntl(fd, F_GETFD, 0);
604 if (flags & FD_CLOEXEC)
605 copy = fcntl(fd, F_DUPFD_CLOEXEC, 3);
607 copy = fcntl(fd, F_DUPFD, 3);
617 int rearrange_stdio(int original_input_fd, int original_output_fd, int original_error_fd) {
619 int fd[3] = { /* Put together an array of fds we work on */
626 null_fd = -1, /* if we open /dev/null, we store the fd to it here */
627 copy_fd[3] = { -1, -1, -1 }; /* This contains all fds we duplicate here temporarily, and hence need to close at the end */
628 bool null_readable, null_writable;
630 /* Sets up stdin, stdout, stderr with the three file descriptors passed in. If any of the descriptors is
631 * specified as -1 it will be connected with /dev/null instead. If any of the file descriptors is passed as
632 * itself (e.g. stdin as STDIN_FILENO) it is left unmodified, but the O_CLOEXEC bit is turned off should it be
635 * Note that if any of the passed file descriptors are > 2 they will be closed — both on success and on
636 * failure! Thus, callers should assume that when this function returns the input fds are invalidated.
638 * Note that when this function fails stdin/stdout/stderr might remain half set up!
640 * O_CLOEXEC is turned off for all three file descriptors (which is how it should be for
641 * stdin/stdout/stderr). */
643 null_readable = original_input_fd < 0;
644 null_writable = original_output_fd < 0 || original_error_fd < 0;
646 /* First step, open /dev/null once, if we need it */
647 if (null_readable || null_writable) {
649 /* Let's open this with O_CLOEXEC first, and convert it to non-O_CLOEXEC when we move the fd to the final position. */
650 null_fd = open("/dev/null", (null_readable && null_writable ? O_RDWR :
651 null_readable ? O_RDONLY : O_WRONLY) | O_CLOEXEC);
657 /* If this fd is in the 0…2 range, let's move it out of it */
661 copy = fcntl(null_fd, F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
672 /* Let's assemble fd[] with the fds to install in place of stdin/stdout/stderr */
673 for (i = 0; i < 3; i++) {
676 fd[i] = null_fd; /* A negative parameter means: connect this one to /dev/null */
677 else if (fd[i] != i && fd[i] < 3) {
678 /* This fd is in the 0…2 territory, but not at its intended place, move it out of there, so that we can work there. */
679 copy_fd[i] = fcntl(fd[i], F_DUPFD_CLOEXEC, 3); /* Duplicate this with O_CLOEXEC set */
680 if (copy_fd[i] < 0) {
689 /* At this point we now have the fds to use in fd[], and they are all above the stdio range, so that we
690 * have freedom to move them around. If the fds already were at the right places then the specific fds are
691 * -1. Let's now move them to the right places. This is the point of no return. */
692 for (i = 0; i < 3; i++) {
696 /* fd is already in place, but let's make sure O_CLOEXEC is off */
697 r = fd_cloexec(i, false);
704 if (dup2(fd[i], i) < 0) { /* Turns off O_CLOEXEC on the new fd. */
714 /* Close the original fds, but only if they were outside of the stdio range. Also, properly check for the same
715 * fd passed in multiple times. */
716 safe_close_above_stdio(original_input_fd);
717 if (original_output_fd != original_input_fd)
718 safe_close_above_stdio(original_output_fd);
719 if (original_error_fd != original_input_fd && original_error_fd != original_output_fd)
720 safe_close_above_stdio(original_error_fd);
722 /* Close the copies we moved > 2 */
723 for (i = 0; i < 3; i++)
724 safe_close(copy_fd[i]);
726 /* Close our null fd, if it's > 2 */
727 safe_close_above_stdio(null_fd);