1 /* SPDX-License-Identifier: LGPL-2.1+ */
10 #include <sys/sendfile.h>
11 //#include <sys/stat.h>
12 #include <sys/xattr.h>
16 //#include "alloc-util.h"
17 //#include "btrfs-util.h"
18 //#include "chattr-util.h"
20 //#include "dirent-util.h"
21 //#include "fd-util.h"
23 //#include "fs-util.h"
27 //#include "mount-util.h"
28 //#include "string-util.h"
30 #include "time-util.h"
31 //#include "umask-util.h"
32 #include "user-util.h"
33 //#include "xattr-util.h"
35 #define COPY_BUFFER_SIZE (16U*1024U)
37 /* A safety net for descending recursively into file system trees to copy. On Linux PATH_MAX is 4096, which means the
38 * deepest valid path one can build is around 2048, which we hence use as a safety net here, to not spin endlessly in
39 * case of bind mount cycles and suchlike. */
40 #define COPY_DEPTH_MAX 2048U
42 static ssize_t try_copy_file_range(
43 int fd_in, loff_t *off_in,
44 int fd_out, loff_t *off_out,
54 r = copy_file_range(fd_in, off_in, fd_out, off_out, len, flags);
56 have = r >= 0 || errno != ENOSYS;
66 FD_IS_NONBLOCKING_PIPE,
69 static int fd_is_nonblock_pipe(int fd) {
73 /* Checks whether the specified file descriptor refers to a pipe, and if so if O_NONBLOCK is set. */
75 if (fstat(fd, &st) < 0)
78 if (!S_ISFIFO(st.st_mode))
81 flags = fcntl(fd, F_GETFL);
85 return FLAGS_SET(flags, O_NONBLOCK) ? FD_IS_NONBLOCKING_PIPE : FD_IS_BLOCKING_PIPE;
93 size_t *ret_remains_size) {
95 bool try_cfr = true, try_sendfile = true, try_splice = true;
96 int r, nonblock_pipe = -1;
97 size_t m = SSIZE_MAX; /* that is the maximum that sendfile and c_f_r accept */
102 /* Tries to copy bytes from the file descriptor 'fdf' to 'fdt' in the smartest possible way. Copies a maximum
103 * of 'max_bytes', which may be specified as UINT64_MAX, in which no maximum is applied. Returns negative on
104 * error, zero if EOF is hit before the bytes limit is hit and positive otherwise. If the copy fails for some
105 * reason but we read but didn't yet write some data an ret_remains/ret_remains_size is not NULL, then it will
106 * be initialized with an allocated buffer containing this "remaining" data. Note that these two parameters are
107 * initialized with a valid buffer only on failure and only if there's actually data already read. Otherwise
108 * these parameters if non-NULL are set to NULL. */
112 if (ret_remains_size)
113 *ret_remains_size = 0;
115 #if 0 /// UNNEEDED by elogind
116 /* Try btrfs reflinks first. This only works on regular, seekable files, hence let's check the file offsets of
117 * source and destination first. */
118 if ((copy_flags & COPY_REFLINK)) {
121 foffset = lseek(fdf, 0, SEEK_CUR);
125 toffset = lseek(fdt, 0, SEEK_CUR);
128 if (foffset == 0 && toffset == 0 && max_bytes == UINT64_MAX)
129 r = btrfs_reflink(fdf, fdt); /* full file reflink */
131 r = btrfs_clone_range(fdf, foffset, fdt, toffset, max_bytes == UINT64_MAX ? 0 : max_bytes); /* partial reflink */
135 /* This worked, yay! Now — to be fully correct — let's adjust the file pointers */
136 if (max_bytes == UINT64_MAX) {
138 /* We cloned to the end of the source file, let's position the read
139 * pointer there, and query it at the same time. */
140 t = lseek(fdf, 0, SEEK_END);
146 /* Let's adjust the destination file write pointer by the same number
148 t = lseek(fdt, toffset + (t - foffset), SEEK_SET);
152 return 0; /* we copied the whole thing, hence hit EOF, return 0 */
154 t = lseek(fdf, foffset + max_bytes, SEEK_SET);
158 t = lseek(fdt, toffset + max_bytes, SEEK_SET);
162 return 1; /* we copied only some number of bytes, which worked, but this means we didn't hit EOF, return 1 */
166 log_debug_errno(r, "Reflinking didn't work, falling back to non-reflink copying: %m");
176 return 1; /* return > 0 if we hit the max_bytes limit */
178 if (max_bytes != UINT64_MAX && m > max_bytes)
181 /* First try copy_file_range(), unless we already tried */
183 n = try_copy_file_range(fdf, NULL, fdt, NULL, m, 0u);
185 if (!IN_SET(n, -EINVAL, -ENOSYS, -EXDEV, -EBADF))
189 /* use fallback below */
190 } else if (n == 0) /* EOF */
197 /* First try sendfile(), unless we already tried */
199 n = sendfile(fdt, fdf, NULL, m);
201 if (!IN_SET(errno, EINVAL, ENOSYS))
204 try_sendfile = false;
205 /* use fallback below */
206 } else if (n == 0) /* EOF */
213 /* Then try splice, unless we already tried. */
216 /* splice()'s asynchronous I/O support is a bit weird. When it encounters a pipe file
217 * descriptor, then it will ignore its O_NONBLOCK flag and instead only honour the
218 * SPLICE_F_NONBLOCK flag specified in its flag parameter. Let's hide this behaviour here, and
219 * check if either of the specified fds are a pipe, and if so, let's pass the flag
220 * automatically, depending on O_NONBLOCK being set.
222 * Here's a twist though: when we use it to move data between two pipes of which one has
223 * O_NONBLOCK set and the other has not, then we have no individual control over O_NONBLOCK
224 * behaviour. Hence in that case we can't use splice() and still guarantee systematic
225 * O_NONBLOCK behaviour, hence don't. */
227 if (nonblock_pipe < 0) {
230 /* Check if either of these fds is a pipe, and if so non-blocking or not */
231 a = fd_is_nonblock_pipe(fdf);
235 b = fd_is_nonblock_pipe(fdt);
239 if ((a == FD_IS_NO_PIPE && b == FD_IS_NO_PIPE) ||
240 (a == FD_IS_BLOCKING_PIPE && b == FD_IS_NONBLOCKING_PIPE) ||
241 (a == FD_IS_NONBLOCKING_PIPE && b == FD_IS_BLOCKING_PIPE))
243 /* splice() only works if one of the fds is a pipe. If neither is, let's skip
244 * this step right-away. As mentioned above, if one of the two fds refers to a
245 * blocking pipe and the other to a non-blocking pipe, we can't use splice()
246 * either, hence don't try either. This hence means we can only use splice() if
247 * either only one of the two fds is a pipe, or if both are pipes with the same
248 * nonblocking flag setting. */
252 nonblock_pipe = a == FD_IS_NONBLOCKING_PIPE || b == FD_IS_NONBLOCKING_PIPE;
257 n = splice(fdf, NULL, fdt, NULL, m, nonblock_pipe ? SPLICE_F_NONBLOCK : 0);
259 if (!IN_SET(errno, EINVAL, ENOSYS))
263 /* use fallback below */
264 } else if (n == 0) /* EOF */
271 /* As a fallback just copy bits by hand */
273 uint8_t buf[MIN(m, COPY_BUFFER_SIZE)], *p = buf;
276 n = read(fdf, buf, sizeof buf);
279 if (n == 0) /* EOF */
286 k = write(fdt, p, z);
300 if (ret_remains_size)
301 *ret_remains_size = z;
313 if (max_bytes != (uint64_t) -1) {
314 assert(max_bytes >= (uint64_t) n);
317 /* sendfile accepts at most SSIZE_MAX-offset bytes to copy,
318 * so reduce our maximum by the amount we already copied,
319 * but don't go below our copy buffer size, unless we are
320 * close the limit of bytes we are allowed to copy. */
321 m = MAX(MIN(COPY_BUFFER_SIZE, max_bytes), m - n);
324 return 0; /* return 0 if we hit EOF earlier than the size limit */
327 #if 0 /// UNNEEDED by elogind
328 static int fd_copy_symlink(
331 const struct stat *st,
336 CopyFlags copy_flags) {
338 _cleanup_free_ char *target = NULL;
345 r = readlinkat_malloc(df, from, &target);
349 if (symlinkat(target, dt, to) < 0)
353 uid_is_valid(override_uid) ? override_uid : st->st_uid,
354 gid_is_valid(override_gid) ? override_gid : st->st_gid,
355 AT_SYMLINK_NOFOLLOW) < 0)
361 static int fd_copy_regular(
364 const struct stat *st,
369 CopyFlags copy_flags) {
371 _cleanup_close_ int fdf = -1, fdt = -1;
372 struct timespec ts[2];
379 fdf = openat(df, from, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
383 fdt = openat(dt, to, O_WRONLY|O_CREAT|O_EXCL|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW, st->st_mode & 07777);
387 r = copy_bytes(fdf, fdt, (uint64_t) -1, copy_flags);
389 (void) unlinkat(dt, to, 0);
394 uid_is_valid(override_uid) ? override_uid : st->st_uid,
395 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
398 if (fchmod(fdt, st->st_mode & 07777) < 0)
403 (void) futimens(fdt, ts);
404 (void) copy_xattr(fdf, fdt);
411 (void) unlinkat(dt, to, 0);
417 static int fd_copy_fifo(
420 const struct stat *st,
425 CopyFlags copy_flags) {
432 r = mkfifoat(dt, to, st->st_mode & 07777);
437 uid_is_valid(override_uid) ? override_uid : st->st_uid,
438 gid_is_valid(override_gid) ? override_gid : st->st_gid,
439 AT_SYMLINK_NOFOLLOW) < 0)
442 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
448 static int fd_copy_node(
451 const struct stat *st,
456 CopyFlags copy_flags) {
463 r = mknodat(dt, to, st->st_mode, st->st_rdev);
468 uid_is_valid(override_uid) ? override_uid : st->st_uid,
469 gid_is_valid(override_gid) ? override_gid : st->st_gid,
470 AT_SYMLINK_NOFOLLOW) < 0)
473 if (fchmodat(dt, to, st->st_mode & 07777, 0) < 0)
479 static int fd_copy_directory(
482 const struct stat *st,
485 dev_t original_device,
489 CopyFlags copy_flags) {
491 _cleanup_close_ int fdf = -1, fdt = -1;
492 _cleanup_closedir_ DIR *d = NULL;
501 return -ENAMETOOLONG;
504 fdf = openat(df, from, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
506 fdf = fcntl(df, F_DUPFD_CLOEXEC, 3);
515 r = mkdirat(dt, to, st->st_mode & 07777);
518 else if (errno == EEXIST && (copy_flags & COPY_MERGE))
523 fdt = openat(dt, to, O_RDONLY|O_DIRECTORY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
529 FOREACH_DIRENT_ALL(de, d, return -errno) {
533 if (dot_or_dot_dot(de->d_name))
536 if (fstatat(dirfd(d), de->d_name, &buf, AT_SYMLINK_NOFOLLOW) < 0) {
541 if (S_ISDIR(buf.st_mode)) {
543 * Don't descend into directories on other file systems, if this is requested. We do a simple
544 * .st_dev check here, which basically comes for free. Note that we do this check only on
545 * directories, not other kind of file system objects, for two reason:
547 * • The kernel's overlayfs pseudo file system that overlays multiple real file systems
548 * propagates the .st_dev field of the file system a file originates from all the way up
549 * through the stack to stat(). It doesn't do that for directories however. This means that
550 * comparing .st_dev on non-directories suggests that they all are mount points. To avoid
551 * confusion we hence avoid relying on this check for regular files.
553 * • The main reason we do this check at all is to protect ourselves from bind mount cycles,
554 * where we really want to avoid descending down in all eternity. However the .st_dev check
555 * is usually not sufficient for this protection anyway, as bind mount cycles from the same
556 * file system onto itself can't be detected that way. (Note we also do a recursion depth
557 * check, which is probably the better protection in this regard, which is why
558 * COPY_SAME_MOUNT is optional).
561 if (FLAGS_SET(copy_flags, COPY_SAME_MOUNT)) {
562 if (buf.st_dev != original_device)
565 r = fd_is_mount_point(dirfd(d), de->d_name, 0);
572 q = fd_copy_directory(dirfd(d), de->d_name, &buf, fdt, de->d_name, original_device, depth_left-1, override_uid, override_gid, copy_flags);
573 } else if (S_ISREG(buf.st_mode))
574 q = fd_copy_regular(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
575 else if (S_ISLNK(buf.st_mode))
576 q = fd_copy_symlink(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
577 else if (S_ISFIFO(buf.st_mode))
578 q = fd_copy_fifo(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
579 else if (S_ISBLK(buf.st_mode) || S_ISCHR(buf.st_mode) || S_ISSOCK(buf.st_mode))
580 q = fd_copy_node(dirfd(d), de->d_name, &buf, fdt, de->d_name, override_uid, override_gid, copy_flags);
584 if (q == -EEXIST && (copy_flags & COPY_MERGE))
592 struct timespec ut[2] = {
598 uid_is_valid(override_uid) ? override_uid : st->st_uid,
599 gid_is_valid(override_gid) ? override_gid : st->st_gid) < 0)
602 if (fchmod(fdt, st->st_mode & 07777) < 0)
605 (void) copy_xattr(dirfd(d), fdt);
606 (void) futimens(fdt, ut);
612 int copy_tree_at(int fdf, const char *from, int fdt, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
618 if (fstatat(fdf, from, &st, AT_SYMLINK_NOFOLLOW) < 0)
621 if (S_ISREG(st.st_mode))
622 return fd_copy_regular(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
623 else if (S_ISDIR(st.st_mode))
624 return fd_copy_directory(fdf, from, &st, fdt, to, st.st_dev, COPY_DEPTH_MAX, override_uid, override_gid, copy_flags);
625 else if (S_ISLNK(st.st_mode))
626 return fd_copy_symlink(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
627 else if (S_ISFIFO(st.st_mode))
628 return fd_copy_fifo(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
629 else if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode) || S_ISSOCK(st.st_mode))
630 return fd_copy_node(fdf, from, &st, fdt, to, override_uid, override_gid, copy_flags);
635 int copy_tree(const char *from, const char *to, uid_t override_uid, gid_t override_gid, CopyFlags copy_flags) {
636 return copy_tree_at(AT_FDCWD, from, AT_FDCWD, to, override_uid, override_gid, copy_flags);
639 int copy_directory_fd(int dirfd, const char *to, CopyFlags copy_flags) {
645 if (fstat(dirfd, &st) < 0)
648 if (!S_ISDIR(st.st_mode))
651 return fd_copy_directory(dirfd, NULL, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags);
654 int copy_directory(const char *from, const char *to, CopyFlags copy_flags) {
660 if (lstat(from, &st) < 0)
663 if (!S_ISDIR(st.st_mode))
666 return fd_copy_directory(AT_FDCWD, from, &st, AT_FDCWD, to, st.st_dev, COPY_DEPTH_MAX, UID_INVALID, GID_INVALID, copy_flags);
669 int copy_file_fd(const char *from, int fdt, CopyFlags copy_flags) {
670 _cleanup_close_ int fdf = -1;
676 fdf = open(from, O_RDONLY|O_CLOEXEC|O_NOCTTY);
680 r = copy_bytes(fdf, fdt, (uint64_t) -1, copy_flags);
682 (void) copy_times(fdf, fdt);
683 (void) copy_xattr(fdf, fdt);
688 int copy_file(const char *from, const char *to, int flags, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
694 RUN_WITH_UMASK(0000) {
695 fdt = open(to, flags|O_WRONLY|O_CREAT|O_CLOEXEC|O_NOCTTY, mode);
700 if (chattr_flags != 0)
701 (void) chattr_fd(fdt, chattr_flags, (unsigned) -1);
703 r = copy_file_fd(from, fdt, copy_flags);
710 if (close(fdt) < 0) {
718 int copy_file_atomic(const char *from, const char *to, mode_t mode, unsigned chattr_flags, CopyFlags copy_flags) {
719 _cleanup_(unlink_and_freep) char *t = NULL;
720 _cleanup_close_ int fdt = -1;
726 /* We try to use O_TMPFILE here to create the file if we can. Note that that only works if COPY_REPLACE is not
727 * set though as we need to use linkat() for linking the O_TMPFILE file into the file system but that system
728 * call can't replace existing files. Hence, if COPY_REPLACE is set we create a temporary name in the file
729 * system right-away and unconditionally which we then can renameat() to the right name after we completed
732 if (copy_flags & COPY_REPLACE) {
733 r = tempfn_random(to, NULL, &t);
737 fdt = open(t, O_CREAT|O_EXCL|O_NOFOLLOW|O_NOCTTY|O_WRONLY|O_CLOEXEC, 0600);
743 fdt = open_tmpfile_linkable(to, O_WRONLY|O_CLOEXEC, &t);
748 if (chattr_flags != 0)
749 (void) chattr_fd(fdt, chattr_flags, (unsigned) -1);
751 r = copy_file_fd(from, fdt, copy_flags);
755 if (fchmod(fdt, mode) < 0)
758 if (copy_flags & COPY_REPLACE) {
759 if (renameat(AT_FDCWD, t, AT_FDCWD, to) < 0)
762 r = link_tmpfile(fdt, t, to);
771 int copy_times(int fdf, int fdt) {
772 struct timespec ut[2];
779 if (fstat(fdf, &st) < 0)
785 if (futimens(fdt, ut) < 0)
788 if (fd_getcrtime(fdf, &crtime) >= 0)
789 (void) fd_setcrtime(fdt, crtime);
794 int copy_xattr(int fdf, int fdt) {
795 _cleanup_free_ char *bufa = NULL, *bufb = NULL;
796 size_t sza = 100, szb = 100;
806 n = flistxattr(fdf, bufa, sza);
824 assert(l < (size_t) n);
826 if (startswith(p, "user.")) {
835 m = fgetxattr(fdf, p, bufb, szb);
837 if (errno == ERANGE) {
846 if (fsetxattr(fdt, p, bufb, m, 0) < 0)