From: Lennart Poettering Date: Thu, 30 Oct 2014 16:36:02 +0000 (+0100) Subject: journal: when sending huge log messages prefer memfds over temporary files in /dev/shm X-Git-Tag: v218~657 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~ianmdlvl/git?p=elogind.git;a=commitdiff_plain;h=c79e98eadd3056a36a662699fa650db5b1bca0c3 journal: when sending huge log messages prefer memfds over temporary files in /dev/shm Previously when a log message grew beyond the maximum AF_UNIX/SOCK_DGRAM datagram limit we'd send an fd to a deleted file in /dev/shm instead. Because the sender could still modify the file after delivery we had to immediately copy the data on the receiving side. With memfds we can optimize this logic, and also remove the dependency on /dev/shm: simply send a sealed memfd around, and if we detect the seal memory map the fd and use it directly. --- diff --git a/src/journal/journal-send.c b/src/journal/journal-send.c index bb1ef66dc..bfc404e37 100644 --- a/src/journal/journal-send.c +++ b/src/journal/journal-send.c @@ -198,7 +198,7 @@ finish: _public_ int sd_journal_sendv(const struct iovec *iov, int n) { PROTECT_ERRNO; - int fd; + int fd, r; _cleanup_close_ int buffer_fd = -1; struct iovec *w; uint64_t *l; @@ -218,6 +218,7 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) { } control; struct cmsghdr *cmsg; bool have_syslog_identifier = false; + bool seal = true; assert_return(iov, -EINVAL); assert_return(n > 0, -EINVAL); @@ -304,21 +305,36 @@ _public_ int sd_journal_sendv(const struct iovec *iov, int n) { if (errno != EMSGSIZE && errno != ENOBUFS) return -errno; - /* Message doesn't fit... Let's dump the data in a temporary - * file and just pass a file descriptor of it to the other - * side. + /* Message doesn't fit... Let's dump the data in a memfd or + * temporary file and just pass a file descriptor of it to the + * other side. * - * We use /dev/shm instead of /tmp here, since we want this to - * be a tmpfs, and one that is available from early boot on - * and where unprivileged users can create files. */ - buffer_fd = open_tmpfile("/dev/shm", O_RDWR | O_CLOEXEC); - if (buffer_fd < 0) - return buffer_fd; + * For the temporary files we use /dev/shm instead of /tmp + * here, since we want this to be a tmpfs, and one that is + * available from early boot on and where unprivileged users + * can create files. */ + buffer_fd = memfd_create("journal-message", MFD_ALLOW_SEALING | MFD_CLOEXEC); + if (buffer_fd < 0) { + if (errno == ENOSYS) { + buffer_fd = open_tmpfile("/dev/shm", O_RDWR | O_CLOEXEC); + if (buffer_fd < 0) + return buffer_fd; + + seal = false; + } else + return -errno; + } n = writev(buffer_fd, w, j); if (n < 0) return -errno; + if (seal) { + r = fcntl(buffer_fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL); + if (r < 0) + return -errno; + } + mh.msg_iov = NULL; mh.msg_iovlen = 0; diff --git a/src/journal/journald-native.c b/src/journal/journald-native.c index a6352022d..6a9ae8a6c 100644 --- a/src/journal/journald-native.c +++ b/src/journal/journald-native.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "socket-util.h" #include "path-util.h" @@ -306,17 +307,29 @@ void server_process_native_file( const char *label, size_t label_len) { struct stat st; - _cleanup_free_ void *p = NULL; - ssize_t n; + bool sealed; int r; + /* Data is in the passed fd, since it didn't fit in a + * datagram. */ + assert(s); assert(fd >= 0); - if (!ucred || ucred->uid != 0) { + /* If it's a memfd, check if it is sealed. If so, we can just + * use map it and use it, and do not need to copy the data + * out. */ + r = fcntl(fd, F_GET_SEALS); + sealed = r >= 0 && + (r & (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL)) == (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL); + + if (!sealed && (!ucred || ucred->uid != 0)) { _cleanup_free_ char *sl = NULL, *k = NULL; const char *e; + /* If this is not a sealed memfd, and the peer is unknown or + * unprivileged, then verify the path. */ + if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) { log_oom(); return; @@ -344,11 +357,6 @@ void server_process_native_file( } } - /* Data is in the passed file, since it didn't fit in a - * datagram. We can't map the file here, since clients might - * then truncate it and trigger a SIGBUS for us. So let's - * stupidly read it */ - if (fstat(fd, &st) < 0) { log_error("Failed to stat passed file, ignoring: %m"); return; @@ -367,17 +375,41 @@ void server_process_native_file( return; } - p = malloc(st.st_size); - if (!p) { - log_oom(); - return; - } + if (sealed) { + void *p; + size_t ps; + + /* The file is sealed, we can just map it and use it. */ - n = pread(fd, p, st.st_size, 0); - if (n < 0) - log_error("Failed to read file, ignoring: %s", strerror(-n)); - else if (n > 0) - server_process_native_message(s, p, n, ucred, tv, label, label_len); + ps = PAGE_ALIGN(st.st_size); + p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0); + if (p == MAP_FAILED) { + log_error("Failed to map memfd, ignoring: %m"); + return; + } + + server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len); + assert_se(munmap(p, ps) >= 0); + } else { + _cleanup_free_ void *p = NULL; + ssize_t n; + + /* The file is not sealed, we can't map the file here, since + * clients might then truncate it and trigger a SIGBUS for + * us. So let's stupidly read it */ + + p = malloc(st.st_size); + if (!p) { + log_oom(); + return; + } + + n = pread(fd, p, st.st_size, 0); + if (n < 0) + log_error("Failed to read file, ignoring: %s", strerror(-n)); + else if (n > 0) + server_process_native_message(s, p, n, ucred, tv, label, label_len); + } } int server_open_native_socket(Server*s) {