chiark / gitweb /
journal: by default do not decompress dat objects larger than 64K
authorLennart Poettering <lennart@poettering.net>
Tue, 20 Nov 2012 23:28:00 +0000 (00:28 +0100)
committerLennart Poettering <lennart@poettering.net>
Tue, 20 Nov 2012 23:28:00 +0000 (00:28 +0100)
This introduces a new data threshold setting for sd_journal objects
which controls the maximum size of objects to decompress. This is
relieves the library from having to decompress full data objects even
if a client program is only interested in the initial part of them.

This speeds up "systemd-coredumpctl" drastically when invoked without
parameters.

13 files changed:
man/sd_journal_get_data.xml
man/sd_journal_query_unique.xml
src/journal/compress.c
src/journal/compress.h
src/journal/coredumpctl.c
src/journal/journal-file.c
src/journal/journal-internal.h
src/journal/journal-verify.c
src/journal/journald-server.c
src/journal/libsystemd-journal.sym
src/journal/sd-journal.c
src/shared/logs-show.c
src/systemd/sd-journal.h

index 6470f19..1259b0c 100644 (file)
@@ -47,6 +47,8 @@
                 <refname>sd_journal_enumerate_data</refname>
                 <refname>sd_journal_restart_data</refname>
                 <refname>SD_JOURNAL_FOREACH_DATA</refname>
+                <refname>sd_journal_set_data_threshold</refname>
+                <refname>sd_journal_get_data_threshold</refname>
                 <refpurpose>Read data fields from the current journal entry</refpurpose>
         </refnamediv>
 
                                 <paramdef>size_t <parameter>length</parameter></paramdef>
                         </funcprototype>
 
+                        <funcprototype>
+                                <funcdef>int <function>sd_journal_set_data_threshold</function></funcdef>
+                                <paramdef>sd_journal* <parameter>j</parameter></paramdef>
+                                <paramdef>size_t <parameter>sz</parameter></paramdef>
+                        </funcprototype>
+
+                        <funcprototype>
+                                <funcdef>int <function>sd_journal_get_data_threshold</function></funcdef>
+                                <paramdef>sd_journal* <parameter>j</parameter></paramdef>
+                                <paramdef>size_t* <parameter>sz</parameter></paramdef>
+                        </funcprototype>
                 </funcsynopsis>
         </refsynopsisdiv>
 
                 <function>sd_journal_enumerate_data()</function>, or
                 the read pointer is altered. Note that the data
                 returned will be prefixed with the field name and
-                '='.</para>
+                '='. Also note that by default data fields larger than
+                64K might get truncated to 64K. This threshold may be
+                changed and turned off with
+                <function>sd_journal_set_data_threshold()</function> (see
+                below).</para>
 
                 <para><function>sd_journal_enumerate_data()</function>
                 may be used to iterate through all fields of the
                 <citerefentry><refentrytitle>sd_journal_next</refentrytitle><manvolnum>3</manvolnum></citerefentry>
                 (or related call) has been called at least
                 once, in order to position the read pointer at a valid entry.</para>
+
+                <para><function>sd_journal_set_data_threshold()</function>
+                may be used to change the data field size threshold
+                for data returned by
+                <function>sd_journal_get_data()</function>,
+                <function>sd_journal_enumerate_data()</function> and
+                <function>sd_journal_enumerate_unique()</function>. This
+                threshold is a hint only: it indicates that the client
+                program is interested only in the initial parts of the
+                data fields, up to the threshold in size -- but the
+                library might still return larger data objects. That
+                means applications should not rely exclusively on this
+                setting to limit the size of the data fields returned,
+                but need to apply a explicit size limit on the
+                returned data as well. This threshold defaults to 64K
+                by default. To retrieve the complete data fields this
+                threshold should be turned off by setting it to 0, so
+                that the library always returns the complete data
+                objects. It is recommended to set this threshold as
+                low as possible since this relieves the library from
+                having to decompress large compressed data objects in
+                full.</para>
+
+                <para><function>sd_journal_get_data_threshold()</function>
+                returns the currently configured data field size
+                threshold.</para>
         </refsect1>
 
         <refsect1>
                 read, 0 when no more fields are known, or a negative
                 errno-style error
                 code. <function>sd_journal_restart_data()</function>
-                returns nothing.</para>
+                returns
+                nothing. <function>sd_journal_set_data_threshold()</function>
+                and <function>sd_journal_get_threshold()</function>
+                return 0 on success or a negative errno-style error
+                code.</para>
         </refsect1>
 
         <refsect1>
                 <title>Notes</title>
 
                 <para>The <function>sd_journal_get_data()</function>,
-                <function>sd_journal_enumerate_data()</function> and
-                <function>sd_journal_restart_data()</function>
+                <function>sd_journal_enumerate_data()</function>,
+                <function>sd_journal_restart_data()</function>,
+                <function>sd_journal_set_data_threshold()</function>
+                and
+                <function>sd_journal_get_data_threshold()</function>
                 interfaces are available as shared library, which can
                 be compiled and linked to with the
                 <literal>libsystemd-journal</literal>
index f2f8af0..502a7e0 100644 (file)
                 invocation of
                 <function>sd_journal_enumerate_unique()</function>. Note
                 that the data returned will be prefixed with the field
-                name and '='.</para>
+                name and '='. Note that this call is subject to the
+                data field size threshold as controlled by
+                <function>sd_journal_set_data_threshold()</function>.</para>
 
                 <para><function>sd_journal_restart_unique()</function>
                 resets the data enumeration index to the beginning of
index 75e70c5..a4427be 100644 (file)
@@ -24,6 +24,7 @@
 #include <string.h>
 #include <lzma.h>
 
+#include "macro.h"
 #include "compress.h"
 
 bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_size) {
@@ -66,10 +67,11 @@ fail:
 }
 
 bool uncompress_blob(const void *src, uint64_t src_size,
-                     void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size) {
+                     void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size, uint64_t dst_max) {
 
         lzma_stream s = LZMA_STREAM_INIT;
         lzma_ret ret;
+        uint64_t space;
         bool b = false;
 
         assert(src);
@@ -98,7 +100,8 @@ bool uncompress_blob(const void *src, uint64_t src_size,
         s.avail_in = src_size;
 
         s.next_out = *dst;
-        s.avail_out = *dst_alloc_size;
+        space = dst_max > 0 ? MIN(*dst_alloc_size, dst_max) : *dst_alloc_size;
+        s.avail_out = space;
 
         for (;;) {
                 void *p;
@@ -111,18 +114,23 @@ bool uncompress_blob(const void *src, uint64_t src_size,
                 if (ret != LZMA_OK)
                         goto fail;
 
-                p = realloc(*dst, *dst_alloc_size*2);
+                if (dst_max > 0 && (space - s.avail_out) >= dst_max)
+                        break;
+
+                p = realloc(*dst, space*2);
                 if (!p)
                         goto fail;
 
                 s.next_out = (uint8_t*) p + ((uint8_t*) s.next_out - (uint8_t*) *dst);
-                s.avail_out += *dst_alloc_size;
+                s.avail_out += space;
+
+                space *= 2;
 
                 *dst = p;
-                *dst_alloc_size *= 2;
+                *dst_alloc_size = space;
         }
 
-        *dst_size = *dst_alloc_size - s.avail_out;
+        *dst_size = space - s.avail_out;
         b = true;
 
 fail:
index b6f1aa0..2b87e73 100644 (file)
@@ -27,7 +27,7 @@
 bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_size);
 
 bool uncompress_blob(const void *src, uint64_t src_size,
-                     void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size);
+                     void **dst, uint64_t *dst_alloc_size, uint64_t* dst_size, uint64_t dst_max);
 
 bool uncompress_startswith(const void *src, uint64_t src_size,
                            void **buffer, uint64_t *buffer_size,
index 4adc923..f9cebb7 100644 (file)
@@ -342,6 +342,11 @@ static int dump_list(sd_journal *j) {
 
         assert(j);
 
+        /* The coredumps are likely to compressed, and for just
+         * listing them we don#t need to decompress them, so let's
+         * pick a fairly low data threshold here */
+        sd_journal_set_data_threshold(j, 4096);
+
         SD_JOURNAL_FOREACH(j) {
                 if (field)
                         print_field(stdout, j);
@@ -381,6 +386,9 @@ static int dump_core(sd_journal* j) {
 
         assert(j);
 
+        /* We want full data, nothing truncated. */
+        sd_journal_set_data_threshold(j, 0);
+
         r = focus(j);
         if (r < 0)
                 return r;
@@ -428,6 +436,8 @@ static int run_gdb(sd_journal *j) {
 
         assert(j);
 
+        sd_journal_set_data_threshold(j, 0);
+
         r = focus(j);
         if (r < 0)
                 return r;
index 3df099d..13fc8ed 100644 (file)
@@ -780,7 +780,7 @@ int journal_file_find_data_object_with_hash(
 
                         l -= offsetof(Object, data.payload);
 
-                        if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
+                        if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
                                 return -EBADMSG;
 
                         if (rsize == size &&
@@ -2591,7 +2591,6 @@ int journal_file_open_reliably(
                                  metrics, mmap_cache, template, ret);
 }
 
-
 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
         uint64_t i, n;
         uint64_t q, xor_hash = 0;
@@ -2645,7 +2644,7 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
 #ifdef HAVE_XZ
                         uint64_t rsize;
 
-                        if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
+                        if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
                                 return -EBADMSG;
 
                         data = from->compress_buffer;
index 75a4129..97de0e7 100644 (file)
@@ -121,6 +121,8 @@ struct sd_journal {
         uint64_t unique_offset;
 
         bool on_network;
+
+        size_t data_threshold;
 };
 
 char *journal_make_match_string(sd_journal *j);
index 1a67d5a..ed28b45 100644 (file)
@@ -69,7 +69,7 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
 
                         if (!uncompress_blob(o->data.payload,
                                              le64toh(o->object.size) - offsetof(Object, data.payload),
-                                             &b, &alloc, &b_size))
+                                             &b, &alloc, &b_size, 0))
                                 return -EBADMSG;
 
                         h2 = hash64(b, b_size);
index 3eaf0d3..43ffe75 100644 (file)
@@ -934,6 +934,8 @@ int server_flush_to_var(Server *s) {
                 return r;
         }
 
+        sd_journal_set_data_threshold(j, 0);
+
         SD_JOURNAL_FOREACH(j) {
                 Object *o = NULL;
                 JournalFile *f;
index 17b5bf8..7b602f5 100644 (file)
@@ -86,4 +86,6 @@ global:
         sd_journal_fd_reliable;
         sd_journal_get_catalog;
         sd_journal_get_catalog_for_message_id;
+        sd_journal_set_data_threshold;
+        sd_journal_get_data_threshold;
 } LIBSYSTEMD_JOURNAL_195;
index fe0478f..095fbb2 100644 (file)
@@ -47,6 +47,8 @@
 
 #define REPLACE_VAR_MAX 256
 
+#define DEFAULT_DATA_THRESHOLD (64*1024)
+
 static void detach_location(sd_journal *j) {
         Iterator i;
         JournalFile *f;
@@ -1560,6 +1562,7 @@ static sd_journal *journal_new(int flags, const char *path) {
 
         j->inotify_fd = -1;
         j->flags = flags;
+        j->data_threshold = DEFAULT_DATA_THRESHOLD;
 
         if (path) {
                 j->path = strdup(path);
@@ -1838,7 +1841,8 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
                                 uint64_t rsize;
 
                                 if (!uncompress_blob(o->data.payload, l,
-                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize))
+                                                     &f->compress_buffer, &f->compress_buffer_size, &rsize,
+                                                     j->data_threshold))
                                         return -EBADMSG;
 
                                 *data = f->compress_buffer;
@@ -1862,7 +1866,7 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
                         *data = o->data.payload;
                         *size = t;
 
-                        return 0;
+                        return 1;
                 }
 
                 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
@@ -1873,7 +1877,7 @@ _public_ int sd_journal_get_data(sd_journal *j, const char *field, const void **
         return -ENOENT;
 }
 
-static int return_data(JournalFile *f, Object *o, const void **data, size_t *size) {
+static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **data, size_t *size) {
         size_t t;
         uint64_t l;
 
@@ -1888,7 +1892,7 @@ static int return_data(JournalFile *f, Object *o, const void **data, size_t *siz
 #ifdef HAVE_XZ
                 uint64_t rsize;
 
-                if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
+                if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, j->data_threshold))
                         return -EBADMSG;
 
                 *data = f->compress_buffer;
@@ -1942,7 +1946,7 @@ _public_ int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t
         if (le_hash != o->data.hash)
                 return -EBADMSG;
 
-        r = return_data(f, o, data, size);
+        r = return_data(j, f, o, data, size);
         if (r < 0)
                 return r;
 
@@ -2339,7 +2343,7 @@ _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_
                 if (o->object.type != OBJECT_DATA)
                         return -EBADMSG;
 
-                r = return_data(j->unique_file, o, &odata, &ol);
+                r = return_data(j, j->unique_file, o, &odata, &ol);
                 if (r < 0)
                         return r;
 
@@ -2371,7 +2375,7 @@ _public_ int sd_journal_enumerate_unique(sd_journal *j, const void **data, size_
                 if (found)
                         continue;
 
-                r = return_data(j->unique_file, o, data, l);
+                r = return_data(j, j->unique_file, o, data, l);
                 if (r < 0)
                         return r;
 
@@ -2456,3 +2460,21 @@ _public_ int sd_journal_get_catalog_for_message_id(sd_id128_t id, char **ret) {
 
         return catalog_get(id, ret);
 }
+
+_public_ int sd_journal_set_data_threshold(sd_journal *j, size_t sz) {
+        if (!j)
+                return -EINVAL;
+
+        j->data_threshold = sz;
+        return 0;
+}
+
+_public_ int sd_journal_get_data_threshold(sd_journal *j, size_t *sz) {
+        if (!j)
+                return -EINVAL;
+        if (!sz)
+                return -EINVAL;
+
+        *sz = j->data_threshold;
+        return 0;
+}
index cb93761..ca5ad43 100644 (file)
@@ -91,7 +91,7 @@ static bool shall_print(const char *p, size_t l, OutputFlags flags) {
         if (flags & OUTPUT_SHOW_ALL)
                 return true;
 
-        if (l > PRINT_THRESHOLD)
+        if (l >= PRINT_THRESHOLD)
                 return false;
 
         if (!utf8_is_printable_n(p, l))
@@ -119,6 +119,8 @@ static int output_short(
         assert(f);
         assert(j);
 
+        sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : PRINT_THRESHOLD);
+
         SD_JOURNAL_FOREACH_DATA(j, data, length) {
 
                 r = parse_field(data, length, "PRIORITY=", &priority, &priority_len);
@@ -308,6 +310,8 @@ static int output_verbose(
         assert(f);
         assert(j);
 
+        sd_journal_set_data_threshold(j, 0);
+
         r = sd_journal_get_realtime_usec(j, &realtime);
         if (r < 0) {
                 log_error("Failed to get realtime timestamp: %s", strerror(-r));
@@ -368,6 +372,8 @@ static int output_export(
 
         assert(j);
 
+        sd_journal_set_data_threshold(j, 0);
+
         r = sd_journal_get_realtime_usec(j, &realtime);
         if (r < 0) {
                 log_error("Failed to get realtime timestamp: %s", strerror(-r));
@@ -441,7 +447,7 @@ void json_escape(
         assert(f);
         assert(p);
 
-        if (!(flags & OUTPUT_SHOW_ALL) && l > JSON_THRESHOLD)
+        if (!(flags & OUTPUT_SHOW_ALL) && l >= JSON_THRESHOLD)
 
                 fputs("null", f);
 
@@ -502,6 +508,8 @@ static int output_json(
 
         assert(j);
 
+        sd_journal_set_data_threshold(j, flags & OUTPUT_SHOW_ALL ? 0 : JSON_THRESHOLD);
+
         r = sd_journal_get_realtime_usec(j, &realtime);
         if (r < 0) {
                 log_error("Failed to get realtime timestamp: %s", strerror(-r));
@@ -714,6 +722,8 @@ static int output_cat(
         assert(j);
         assert(f);
 
+        sd_journal_set_data_threshold(j, 0);
+
         r = sd_journal_get_data(j, "MESSAGE", &data, &l);
         if (r < 0) {
                 /* An entry without MESSAGE=? */
index fd9c0f5..2e8d2d8 100644 (file)
@@ -97,6 +97,9 @@ int sd_journal_next_skip(sd_journal *j, uint64_t skip);
 int sd_journal_get_realtime_usec(sd_journal *j, uint64_t *ret);
 int sd_journal_get_monotonic_usec(sd_journal *j, uint64_t *ret, sd_id128_t *ret_boot_id);
 
+int sd_journal_set_data_threshold(sd_journal *j, size_t sz);
+int sd_journal_get_data_threshold(sd_journal *j, size_t *sz);
+
 int sd_journal_get_data(sd_journal *j, const char *field, const void **data, size_t *l);
 int sd_journal_enumerate_data(sd_journal *j, const void **data, size_t *l);
 void sd_journal_restart_data(sd_journal *j);