1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2010 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/resource.h>
24 #include <sys/socket.h>
28 #include "dirent-util.h"
33 #include "memfd-util.h"
35 #include "parse-util.h"
36 #include "path-util.h"
37 #include "process-util.h"
38 #include "socket-util.h"
39 #include "stdio-util.h"
42 int close_nointr(int fd) {
49 * Just ignore EINTR; a retry loop is the wrong thing to do on
52 * http://lkml.indiana.edu/hypermail/linux/kernel/0509.1/0877.html
53 * https://bugzilla.gnome.org/show_bug.cgi?id=682819
54 * http://utcc.utoronto.ca/~cks/space/blog/unix/CloseEINTR
55 * https://sites.google.com/site/michaelsafyan/software-engineering/checkforeintrwheninvokingclosethinkagain
63 int safe_close(int fd) {
66 * Like close_nointr() but cannot fail. Guarantees errno is
67 * unchanged. Is a NOP with negative fds passed, and returns
68 * -1, so that it can be used in this syntax:
70 * fd = safe_close(fd);
76 /* The kernel might return pretty much any error code
77 * via close(), but the fd will be closed anyway. The
78 * only condition we want to check for here is whether
79 * the fd was invalid at all... */
81 assert_se(close_nointr(fd) != -EBADF);
87 void safe_close_pair(int p[]) {
91 /* Special case pairs which use the same fd in both
93 p[0] = p[1] = safe_close(p[0]);
97 p[0] = safe_close(p[0]);
98 p[1] = safe_close(p[1]);
101 void close_many(const int fds[], unsigned n_fd) {
104 assert(fds || n_fd <= 0);
106 for (i = 0; i < n_fd; i++)
110 int fclose_nointr(FILE *f) {
113 /* Same as close_nointr(), but for fclose() */
124 FILE* safe_fclose(FILE *f) {
126 /* Same as safe_close(), but for fclose() */
131 assert_se(fclose_nointr(f) != EBADF);
137 #if 0 /// UNNEEDED by elogind
138 DIR* safe_closedir(DIR *d) {
143 assert_se(closedir(d) >= 0 || errno != EBADF);
150 int fd_nonblock(int fd, bool nonblock) {
155 flags = fcntl(fd, F_GETFL, 0);
160 nflags = flags | O_NONBLOCK;
162 nflags = flags & ~O_NONBLOCK;
167 if (fcntl(fd, F_SETFL, nflags) < 0)
173 int fd_cloexec(int fd, bool cloexec) {
178 flags = fcntl(fd, F_GETFD, 0);
183 nflags = flags | FD_CLOEXEC;
185 nflags = flags & ~FD_CLOEXEC;
190 if (fcntl(fd, F_SETFD, nflags) < 0)
196 void stdio_unset_cloexec(void) {
197 (void) fd_cloexec(STDIN_FILENO, false);
198 (void) fd_cloexec(STDOUT_FILENO, false);
199 (void) fd_cloexec(STDERR_FILENO, false);
202 _pure_ static bool fd_in_set(int fd, const int fdset[], unsigned n_fdset) {
205 assert(n_fdset == 0 || fdset);
207 for (i = 0; i < n_fdset; i++)
214 int close_all_fds(const int except[], unsigned n_except) {
215 _cleanup_closedir_ DIR *d = NULL;
219 assert(n_except == 0 || except);
221 d = opendir("/proc/self/fd");
226 /* When /proc isn't available (for example in chroots)
227 * the fallback is brute forcing through the fd
230 assert_se(getrlimit(RLIMIT_NOFILE, &rl) >= 0);
231 for (fd = 3; fd < (int) rl.rlim_max; fd ++) {
233 if (fd_in_set(fd, except, n_except))
236 if (close_nointr(fd) < 0)
237 if (errno != EBADF && r == 0)
244 FOREACH_DIRENT(de, d, return -errno) {
247 if (safe_atoi(de->d_name, &fd) < 0)
248 /* Let's better ignore this, just in case */
257 if (fd_in_set(fd, except, n_except))
260 if (close_nointr(fd) < 0) {
261 /* Valgrind has its own FD and doesn't want to have it closed */
262 if (errno != EBADF && r == 0)
270 #if 0 /// UNNEEDED by elogind
271 int same_fd(int a, int b) {
272 struct stat sta, stb;
279 /* Compares two file descriptors. Note that semantics are
280 * quite different depending on whether we have kcmp() or we
281 * don't. If we have kcmp() this will only return true for
282 * dup()ed file descriptors, but not otherwise. If we don't
283 * have kcmp() this will also return true for two fds of the same
284 * file, created by separate open() calls. Since we use this
285 * call mostly for filtering out duplicates in the fd store
286 * this difference hopefully doesn't matter too much. */
291 /* Try to use kcmp() if we have it. */
292 pid = getpid_cached();
293 r = kcmp(pid, pid, KCMP_FILE, a, b);
301 /* We don't have kcmp(), use fstat() instead. */
302 if (fstat(a, &sta) < 0)
305 if (fstat(b, &stb) < 0)
308 if ((sta.st_mode & S_IFMT) != (stb.st_mode & S_IFMT))
311 /* We consider all device fds different, since two device fds
312 * might refer to quite different device contexts even though
313 * they share the same inode and backing dev_t. */
315 if (S_ISCHR(sta.st_mode) || S_ISBLK(sta.st_mode))
318 if (sta.st_dev != stb.st_dev || sta.st_ino != stb.st_ino)
321 /* The fds refer to the same inode on disk, let's also check
322 * if they have the same fd flags. This is useful to
323 * distinguish the read and write side of a pipe created with
325 fa = fcntl(a, F_GETFL);
329 fb = fcntl(b, F_GETFL);
336 void cmsg_close_all(struct msghdr *mh) {
337 struct cmsghdr *cmsg;
341 CMSG_FOREACH(cmsg, mh)
342 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS)
343 close_many((int*) CMSG_DATA(cmsg), (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int));
346 bool fdname_is_valid(const char *s) {
349 /* Validates a name for $LISTEN_FDNAMES. We basically allow
350 * everything ASCII that's not a control character. Also, as
351 * special exception the ":" character is not allowed, as we
352 * use that as field separator in $LISTEN_FDNAMES.
354 * Note that the empty string is explicitly allowed
355 * here. However, we limit the length of the names to 255
361 for (p = s; *p; p++) {
373 int fd_get_path(int fd, char **ret) {
374 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
377 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
379 r = readlink_malloc(procfs_path, ret);
381 if (r == -ENOENT) /* If the file doesn't exist the fd is invalid */
388 int move_fd(int from, int to, int cloexec) {
391 /* Move fd 'from' to 'to', make sure FD_CLOEXEC remains equal if requested, and release the old fd. If
392 * 'cloexec' is passed as -1, the original FD_CLOEXEC is inherited for the new fd. If it is 0, it is turned
393 * off, if it is > 0 it is turned on. */
403 r = fd_cloexec(to, cloexec);
414 fl = fcntl(from, F_GETFD, 0);
418 cloexec = !!(fl & FD_CLOEXEC);
421 r = dup3(from, to, cloexec ? O_CLOEXEC : 0);
432 int acquire_data_fd(const void *data, size_t size, unsigned flags) {
434 char procfs_path[STRLEN("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
435 _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
436 char pattern[] = "/dev/shm/data-fd-XXXXXX";
437 _cleanup_close_ int fd = -1;
442 assert(data || size == 0);
444 /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
445 * complex than I wish it was. But here's why:
447 * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
448 * read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
450 * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
451 * a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
452 * clients can only bump their size to a system-wide limit, which might be quite low.
454 * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
455 * earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
456 * /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
458 * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
460 * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
463 if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
464 /* As a special case, return /dev/null if we have been called for an empty data block */
465 r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
472 if ((flags & ACQUIRE_NO_MEMFD) == 0) {
473 fd = memfd_new("data-fd");
477 n = write(fd, data, size);
480 if ((size_t) n != size)
483 f = lseek(fd, 0, SEEK_SET);
487 r = memfd_set_sealed(fd);
498 if ((flags & ACQUIRE_NO_PIPE) == 0) {
499 if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
502 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
506 if ((size_t) isz < size) {
508 if (isz < 0 || (size_t) isz != size)
511 /* Try to bump the pipe size */
512 (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
514 /* See if that worked */
515 isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
519 if ((size_t) isz < size)
523 n = write(pipefds[1], data, size);
526 if ((size_t) n != size)
529 (void) fd_nonblock(pipefds[0], false);
538 if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
539 fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
541 goto try_dev_shm_without_o_tmpfile;
543 n = write(fd, data, size);
546 if ((size_t) n != size)
549 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
550 xsprintf(procfs_path, "/proc/self/fd/%i", fd);
551 r = open(procfs_path, O_RDONLY|O_CLOEXEC);
558 try_dev_shm_without_o_tmpfile:
559 if ((flags & ACQUIRE_NO_REGULAR) == 0) {
560 fd = mkostemp_safe(pattern);
564 n = write(fd, data, size);
567 goto unlink_and_return;
569 if ((size_t) n != size) {
571 goto unlink_and_return;
574 /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
575 r = open(pattern, O_RDONLY|O_CLOEXEC);
580 (void) unlink(pattern);