chiark / gitweb /
memfd: always use our internal utility functions where we have them
[elogind.git] / src / journal / journald-native.c
index 7aa99a399071defba07eff3ca004ae9ad193bfa3..d3735ec73c598706516355aae48239c81afc7bb4 100644 (file)
 ***/
 
 #include <unistd.h>
+#include <stddef.h>
 #include <sys/epoll.h>
+#include <sys/mman.h>
 
 #include "socket-util.h"
-#include "journald.h"
+#include "path-util.h"
+#include "selinux-util.h"
+#include "journald-server.h"
 #include "journald-native.h"
 #include "journald-kmsg.h"
 #include "journald-console.h"
 #include "journald-syslog.h"
+#include "journald-wall.h"
+#include "memfd.h"
 
-#define ENTRY_SIZE_MAX (1024*1024*32)
-
-static bool valid_user_field(const char *p, size_t l) {
+bool valid_user_field(const char *p, size_t l, bool allow_protected) {
         const char *a;
 
         /* We kinda enforce POSIX syntax recommendations for
@@ -49,7 +53,7 @@ static bool valid_user_field(const char *p, size_t l) {
                 return false;
 
         /* Variables starting with an underscore are protected */
-        if (p[0] == '_')
+        if (!allow_protected && p[0] == '_')
                 return false;
 
         /* Don't allow digits as first character */
@@ -58,14 +62,18 @@ static bool valid_user_field(const char *p, size_t l) {
 
         /* Only allow A-Z0-9 and '_' */
         for (a = p; a < p + l; a++)
-                if (!((*a >= 'A' && *a <= 'Z') ||
-                      (*a >= '0' && *a <= '9') ||
-                      *a == '_'))
+                if ((*a < 'A' || *a > 'Z') &&
+                    (*a < '0' || *a > '9') &&
+                    *a != '_')
                         return false;
 
         return true;
 }
 
+static bool allow_object_pid(struct ucred *ucred) {
+        return ucred && ucred->uid == 0;
+}
+
 void server_process_native_message(
                 Server *s,
                 const void *buffer, size_t buffer_size,
@@ -74,11 +82,12 @@ void server_process_native_message(
                 const char *label, size_t label_len) {
 
         struct iovec *iovec = NULL;
-        unsigned n = 0, m = 0, j, tn = (unsigned) -1;
+        unsigned n = 0, j, tn = (unsigned) -1;
         const char *p;
-        size_t remaining;
+        size_t remaining, m = 0, entry_size = 0;
         int priority = LOG_INFO;
         char *identifier = NULL, *message = NULL;
+        pid_t object_pid = 0;
 
         assert(s);
         assert(buffer || buffer_size == 0);
@@ -99,9 +108,16 @@ void server_process_native_message(
 
                 if (e == p) {
                         /* Entry separator */
-                        server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
+
+                        if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+                                log_debug("Entry is too big with %u properties and %zu bytes, ignoring.", n, entry_size);
+                                continue;
+                        }
+
+                        server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
                         n = 0;
                         priority = LOG_INFO;
+                        entry_size = 0;
 
                         p++;
                         remaining--;
@@ -118,24 +134,15 @@ void server_process_native_message(
 
                 /* A property follows */
 
-                if (n+N_IOVEC_META_FIELDS >= m) {
-                        struct iovec *c;
-                        unsigned u;
-
-                        u = MAX((n+N_IOVEC_META_FIELDS+1) * 2U, 4U);
-                        c = realloc(iovec, u * sizeof(struct iovec));
-                        if (!c) {
-                                log_oom();
-                                break;
-                        }
-
-                        iovec = c;
-                        m = u;
+                /* n received properties, +1 for _TRANSPORT */
+                if (!GREEDY_REALLOC(iovec, m, n + 1 + N_IOVEC_META_FIELDS + !!object_pid * N_IOVEC_OBJECT_FIELDS)) {
+                        log_oom();
+                        break;
                 }
 
                 q = memchr(p, '=', e - p);
                 if (q) {
-                        if (valid_user_field(p, q - p)) {
+                        if (valid_user_field(p, q - p, false)) {
                                 size_t l;
 
                                 l = e - p;
@@ -146,29 +153,30 @@ void server_process_native_message(
                                  * field */
                                 iovec[n].iov_base = (char*) p;
                                 iovec[n].iov_len = l;
+                                entry_size += iovec[n].iov_len;
                                 n++;
 
                                 /* We need to determine the priority
                                  * of this entry for the rate limiting
                                  * logic */
                                 if (l == 10 &&
-                                    memcmp(p, "PRIORITY=", 9) == 0 &&
+                                    startswith(p, "PRIORITY=") &&
                                     p[9] >= '0' && p[9] <= '9')
                                         priority = (priority & LOG_FACMASK) | (p[9] - '0');
 
                                 else if (l == 17 &&
-                                         memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
+                                         startswith(p, "SYSLOG_FACILITY=") &&
                                          p[16] >= '0' && p[16] <= '9')
                                         priority = (priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
 
                                 else if (l == 18 &&
-                                         memcmp(p, "SYSLOG_FACILITY=", 16) == 0 &&
+                                         startswith(p, "SYSLOG_FACILITY=") &&
                                          p[16] >= '0' && p[16] <= '9' &&
                                          p[17] >= '0' && p[17] <= '9')
                                         priority = (priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
 
                                 else if (l >= 19 &&
-                                         memcmp(p, "SYSLOG_IDENTIFIER=", 18) == 0) {
+                                         startswith(p, "SYSLOG_IDENTIFIER=")) {
                                         char *t;
 
                                         t = strndup(p + 18, l - 18);
@@ -177,7 +185,7 @@ void server_process_native_message(
                                                 identifier = t;
                                         }
                                 } else if (l >= 8 &&
-                                           memcmp(p, "MESSAGE=", 8) == 0) {
+                                           startswith(p, "MESSAGE=")) {
                                         char *t;
 
                                         t = strndup(p + 8, l - 8);
@@ -185,6 +193,16 @@ void server_process_native_message(
                                                 free(message);
                                                 message = t;
                                         }
+                                } else if (l > strlen("OBJECT_PID=") &&
+                                           l < strlen("OBJECT_PID=")  + DECIMAL_STR_MAX(pid_t) &&
+                                           startswith(p, "OBJECT_PID=") &&
+                                           allow_object_pid(ucred)) {
+                                        char buf[DECIMAL_STR_MAX(pid_t)];
+                                        memcpy(buf, p + strlen("OBJECT_PID="), l - strlen("OBJECT_PID="));
+                                        char_array_0(buf);
+
+                                        /* ignore error */
+                                        parse_pid(buf, &object_pid);
                                 }
                         }
 
@@ -204,7 +222,12 @@ void server_process_native_message(
                         memcpy(&l_le, e + 1, sizeof(uint64_t));
                         l = le64toh(l_le);
 
-                        if (remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
+                        if (l > DATA_SIZE_MAX) {
+                                log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring.", l);
+                                break;
+                        }
+
+                        if ((uint64_t) remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
                             e[1+sizeof(uint64_t)+l] != '\n') {
                                 log_debug("Failed to parse message, ignoring.");
                                 break;
@@ -220,9 +243,10 @@ void server_process_native_message(
                         k[e - p] = '=';
                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
 
-                        if (valid_user_field(p, e - p)) {
+                        if (valid_user_field(p, e - p, false)) {
                                 iovec[n].iov_base = k;
                                 iovec[n].iov_len = (e - p) + 1 + l;
+                                entry_size += iovec[n].iov_len;
                                 n++;
                         } else
                                 free(k);
@@ -237,6 +261,13 @@ void server_process_native_message(
 
         tn = n++;
         IOVEC_SET_STRING(iovec[tn], "_TRANSPORT=journal");
+        entry_size += strlen("_TRANSPORT=journal");
+
+        if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
+                log_debug("Entry is too big with %u properties and %zu bytes, ignoring.",
+                          n, entry_size);
+                goto finish;
+        }
 
         if (message) {
                 if (s->forward_to_syslog)
@@ -247,9 +278,12 @@ void server_process_native_message(
 
                 if (s->forward_to_console)
                         server_forward_console(s, priority, identifier, message, ucred);
+
+                if (s->forward_to_wall)
+                        server_forward_wall(s, priority, identifier, message, ucred);
         }
 
-        server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority);
+        server_dispatch_message(s, iovec, n, m, ucred, tv, label, label_len, NULL, priority, object_pid);
 
 finish:
         for (j = 0; j < n; j++)  {
@@ -274,16 +308,53 @@ void server_process_native_file(
                 const char *label, size_t label_len) {
 
         struct stat st;
-        void *p;
-        ssize_t n;
+        bool sealed;
+        int r;
+
+        /* Data is in the passed fd, since it didn't fit in a
+         * datagram. */
 
         assert(s);
         assert(fd >= 0);
 
-        /* Data is in the passed file, since it didn't fit in a
-         * datagram. We can't map the file here, since clients might
-         * then truncate it and trigger a SIGBUS for us. So let's
-         * stupidly read it */
+        /* If it's a memfd, check if it is sealed. If so, we can just
+         * use map it and use it, and do not need to copy the data
+         * out. */
+        sealed = memfd_get_sealed(fd) > 0;
+
+        if (!sealed && (!ucred || ucred->uid != 0)) {
+                _cleanup_free_ char *sl = NULL, *k = NULL;
+                const char *e;
+
+                /* If this is not a sealed memfd, and the peer is unknown or
+                 * unprivileged, then verify the path. */
+
+                if (asprintf(&sl, "/proc/self/fd/%i", fd) < 0) {
+                        log_oom();
+                        return;
+                }
+
+                r = readlink_malloc(sl, &k);
+                if (r < 0) {
+                        log_error("readlink(%s) failed: %m", sl);
+                        return;
+                }
+
+                e = path_startswith(k, "/dev/shm/");
+                if (!e)
+                        e = path_startswith(k, "/tmp/");
+                if (!e)
+                        e = path_startswith(k, "/var/tmp/");
+                if (!e) {
+                        log_error("Received file outside of allowed directories. Refusing.");
+                        return;
+                }
+
+                if (!filename_is_safe(e)) {
+                        log_error("Received file in subdirectory of allowed directories. Refusing.");
+                        return;
+                }
+        }
 
         if (fstat(fd, &st) < 0) {
                 log_error("Failed to stat passed file, ignoring: %m");
@@ -303,29 +374,53 @@ void server_process_native_file(
                 return;
         }
 
-        p = malloc(st.st_size);
-        if (!p) {
-                log_oom();
-                return;
-        }
+        if (sealed) {
+                void *p;
+                size_t ps;
 
-        n = pread(fd, p, st.st_size, 0);
-        if (n < 0)
-                log_error("Failed to read file, ignoring: %s", strerror(-n));
-        else if (n > 0)
-                server_process_native_message(s, p, n, ucred, tv, label, label_len);
+                /* The file is sealed, we can just map it and use it. */
 
-        free(p);
+                ps = PAGE_ALIGN(st.st_size);
+                p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
+                if (p == MAP_FAILED) {
+                        log_error("Failed to map memfd, ignoring: %m");
+                        return;
+                }
+
+                server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
+                assert_se(munmap(p, ps) >= 0);
+        } else {
+                _cleanup_free_ void *p = NULL;
+                ssize_t n;
+
+                /* The file is not sealed, we can't map the file here, since
+                 * clients might then truncate it and trigger a SIGBUS for
+                 * us. So let's stupidly read it */
+
+                p = malloc(st.st_size);
+                if (!p) {
+                        log_oom();
+                        return;
+                }
+
+                n = pread(fd, p, st.st_size, 0);
+                if (n < 0)
+                        log_error("Failed to read file, ignoring: %s", strerror(-n));
+                else if (n > 0)
+                        server_process_native_message(s, p, n, ucred, tv, label, label_len);
+        }
 }
 
 int server_open_native_socket(Server*s) {
-        union sockaddr_union sa;
         int one, r;
-        struct epoll_event ev;
 
         assert(s);
 
         if (s->native_fd < 0) {
+                union sockaddr_union sa = {
+                        .un.sun_family = AF_UNIX,
+                        .un.sun_path = "/run/systemd/journal/socket",
+                };
 
                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
                 if (s->native_fd < 0) {
@@ -333,15 +428,11 @@ int server_open_native_socket(Server*s) {
                         return -errno;
                 }
 
-                zero(sa);
-                sa.un.sun_family = AF_UNIX;
-                strncpy(sa.un.sun_path, "/run/systemd/journal/socket", sizeof(sa.un.sun_path));
-
                 unlink(sa.un.sun_path);
 
                 r = bind(s->native_fd, &sa.sa, offsetof(union sockaddr_union, un.sun_path) + strlen(sa.un.sun_path));
                 if (r < 0) {
-                        log_error("bind() failed: %m");
+                        log_error("bind(%s) failed: %m", sa.un.sun_path);
                         return -errno;
                 }
 
@@ -357,10 +448,12 @@ int server_open_native_socket(Server*s) {
         }
 
 #ifdef HAVE_SELINUX
-        one = 1;
-        r = setsockopt(s->syslog_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
-        if (r < 0)
-                log_warning("SO_PASSSEC failed: %m");
+        if (mac_selinux_use()) {
+                one = 1;
+                r = setsockopt(s->native_fd, SOL_SOCKET, SO_PASSSEC, &one, sizeof(one));
+                if (r < 0)
+                        log_warning("SO_PASSSEC failed: %m");
+        }
 #endif
 
         one = 1;
@@ -370,12 +463,10 @@ int server_open_native_socket(Server*s) {
                 return -errno;
         }
 
-        zero(ev);
-        ev.events = EPOLLIN;
-        ev.data.fd = s->native_fd;
-        if (epoll_ctl(s->epoll_fd, EPOLL_CTL_ADD, s->native_fd, &ev) < 0) {
-                log_error("Failed to add native server fd to epoll object: %m");
-                return -errno;
+        r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, process_datagram, s);
+        if (r < 0) {
+                log_error("Failed to add native server fd to event loop: %s", strerror(-r));
+                return r;
         }
 
         return 0;