chiark / gitweb /
journal: when sending huge log messages prefer memfds over temporary files in /dev/shm
[elogind.git] / src / journal / journald-native.c
index c54f6475d330dc70c18eefc02cd635e70a0b1696..6a9ae8a6ceabbce87cbf548c4844bedb0e60802f 100644 (file)
@@ -22,6 +22,7 @@
 #include <unistd.h>
 #include <stddef.h>
 #include <sys/epoll.h>
+#include <sys/mman.h>
 
 #include "socket-util.h"
 #include "path-util.h"
@@ -82,7 +83,7 @@ void server_process_native_message(
         struct iovec *iovec = NULL;
         unsigned n = 0, j, tn = (unsigned) -1;
         const char *p;
-        size_t remaining, m = 0;
+        size_t remaining, m = 0, entry_size = 0;
         int priority = LOG_INFO;
         char *identifier = NULL, *message = NULL;
         pid_t object_pid = 0;
@@ -106,9 +107,16 @@ void server_process_native_message(
 
                 if (e == p) {
                         /* Entry separator */
+
+                        if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+                                log_debug("Entry is too big with %u properties and %zu bytes, ignoring.", n, entry_size);
+                                continue;
+                        }
+
                         server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
                         n = 0;
                         priority = LOG_INFO;
+                        entry_size = 0;
 
                         p++;
                         remaining--;
@@ -126,8 +134,7 @@ void server_process_native_message(
                 /* A property follows */
 
                 /* n received properties, +1 for _TRANSPORT */
-                if (!GREEDY_REALLOC(iovec, m, n + 1 + N_IOVEC_META_FIELDS +
-                                              !!object_pid * N_IOVEC_OBJECT_FIELDS)) {
+                if (!GREEDY_REALLOC(iovec, m, n + 1 + N_IOVEC_META_FIELDS + !!object_pid * N_IOVEC_OBJECT_FIELDS)) {
                         log_oom();
                         break;
                 }
@@ -145,6 +152,7 @@ void server_process_native_message(
                                  * field */
                                 iovec[n].iov_base = (char*) p;
                                 iovec[n].iov_len = l;
+                                entry_size += iovec[n].iov_len;
                                 n++;
 
                                 /* We need to determine the priority
@@ -214,7 +222,7 @@ void server_process_native_message(
                         l = le64toh(l_le);
 
                         if (l > DATA_SIZE_MAX) {
-                                log_debug("Received binary data block too large, ignoring.");
+                                log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
                                 break;
                         }
 
@@ -237,6 +245,7 @@ void server_process_native_message(
                         if (valid_user_field(p, e - p, false)) {
                                 iovec[n].iov_base = k;
                                 iovec[n].iov_len = (e - p) + 1 + l;
+                                entry_size += iovec[n].iov_len;
                                 n++;
                         } else
                                 free(k);
@@ -251,6 +260,13 @@ void server_process_native_message(
 
         tn = n++;
         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+        entry_size += strlen("_TRANSPORT=journal");
+
+        if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+                log_debug("Entry is too big with %u properties and %zu bytes, ignoring.",
+                          n, entry_size);
+                goto finish;
+        }
 
         if (message) {
                 if (s->forward_to_syslog)
@@ -291,17 +307,29 @@ void server_process_native_file(
                 const char *label, size_t label_len) {
 
         struct stat st;
-        _cleanup_free_ void *p = NULL;
-        ssize_t n;
+        bool sealed;
         int r;
 
+        /* Data is in the passed fd, since it didn't fit in a
+         * datagram. */
+
         assert(s);
         assert(fd >= 0);
 
-        if (!ucred || ucred->uid != 0) {
+        /* If it's a memfd, check if it is sealed. If so, we can just
+         * use map it and use it, and do not need to copy the data
+         * out. */
+        r = fcntl(fd, F_GET_SEALS);
+        sealed = r >= 0 &&
+                (r & (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL)) == (F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL);
+
+        if (!sealed && (!ucred || ucred->uid != 0)) {
                 _cleanup_free_ char *sl = NULL, *k = NULL;
                 const char *e;
 
+                /* If this is not a sealed memfd, and the peer is unknown or
+                 * unprivileged, then verify the path. */
+
                 if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
                         log_oom();
                         return;
@@ -329,11 +357,6 @@ void server_process_native_file(
                 }
         }
 
-        /* Data is in the passed file, since it didn't fit in a
-         * datagram. We can't map the file here, since clients might
-         * then truncate it and trigger a SIGBUS for us. So let's
-         * stupidly read it */
-
         if (fstat(fd, &st) < 0) {
                 log_error("Failed to stat passed file, ignoring: %m");
                 return;
@@ -352,26 +375,53 @@ void server_process_native_file(
                 return;
         }
 
-        p = malloc(st.st_size);
-        if (!p) {
-                log_oom();
-                return;
-        }
+        if (sealed) {
+                void *p;
+                size_t ps;
 
-        n = pread(fd, p, st.st_size, 0);
-        if (n < 0)
-                log_error("Failed to read file, ignoring: %s", strerror(-n));
-        else if (n > 0)
-                server_process_native_message(s, p, n, ucred, tv, label, label_len);
+                /* The file is sealed, we can just map it and use it. */
+
+                ps = PAGE_ALIGN(st.st_size);
+                p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
+                if (p == MAP_FAILED) {
+                        log_error("Failed to map memfd, ignoring: %m");
+                        return;
+                }
+
+                server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
+                assert_se(munmap(p, ps) >= 0);
+        } else {
+                _cleanup_free_ void *p = NULL;
+                ssize_t n;
+
+                /* The file is not sealed, we can't map the file here, since
+                 * clients might then truncate it and trigger a SIGBUS for
+                 * us. So let's stupidly read it */
+
+                p = malloc(st.st_size);
+                if (!p) {
+                        log_oom();
+                        return;
+                }
+
+                n = pread(fd, p, st.st_size, 0);
+                if (n < 0)
+                        log_error("Failed to read file, ignoring: %s", strerror(-n));
+                else if (n > 0)
+                        server_process_native_message(s, p, n, ucred, tv, label, label_len);
+        }
 }
 
 int server_open_native_socket(Server*s) {
-        union sockaddr_union sa;
         int one, r;
 
         assert(s);
 
         if (s->native_fd < 0) {
+                union sockaddr_union sa = {
+                        .un.sun_family = AF_UNIX,
+                        .un.sun_path = "/run/systemd/journal/socket",
+                };
 
                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
                 if (s->native_fd < 0) {
@@ -379,15 +429,11 @@ int server_open_native_socket(Server*s) {
                         return -errno;
                 }
 
-                zero(sa);
-                sa.un.sun_family = AF_UNIX;
-                strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
-
                 unlink(sa.un.sun_path);
 
                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
                 if (r < 0) {
-                        log_error("bind() failed: %m");
+                        log_error("bind(%s) failed: %m", sa.un.sun_path);
                         return -errno;
                 }
 
@@ -403,7 +449,7 @@ int server_open_native_socket(Server*s) {
         }
 
 #ifdef HAVE_SELINUX
-        if (use_selinux()) {
+        if (mac_selinux_use()) {
                 one = 1;
                 r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
                 if (r < 0)