chiark / gitweb /
journal: automatically rotate journal files if the data hash table is full > 75%
authorLennart Poettering <lennart@poettering.net>
Mon, 16 Jul 2012 20:24:02 +0000 (22:24 +0200)
committerLennart Poettering <lennart@poettering.net>
Mon, 16 Jul 2012 22:59:03 +0000 (00:59 +0200)
Previously, when the main data hash table grows too full the performance
simply started to decrease drastically. Instead, now simply rotate to a
new journal file as the hash table gets to full, so that we can start
with a new fresh empty hash table.

man/journalctl.xml
src/journal/journal-def.h
src/journal/journal-file.c
src/journal/journal-file.h
src/journal/journal-internal.h
src/journal/journalctl.c
src/journal/journald.c
src/journal/sd-journal.c

index 05c8703..f9abbfb 100644 (file)
                                 similar.</para></listitem>
                         </varlistentry>
 
+                        <varlistentry>
+                                <term><option>--header</option></term>
+
+                                <listitem><para>Instead of showing
+                                journal contents show internal header
+                                information of the journal fiels
+                                accessed.</para></listitem>
+                        </varlistentry>
+
                 </variablelist>
         </refsect1>
 
index b30ae79..ac89e61 100644 (file)
@@ -172,12 +172,15 @@ _packed_ struct Header {
         le64_t tail_object_offset;
         le64_t n_objects;
         le64_t n_entries;
-        le64_t seqnum;
-        le64_t first_seqnum;
+        le64_t tail_seqnum;
+        le64_t head_seqnum;
         le64_t entry_array_offset;
         le64_t head_entry_realtime;
         le64_t tail_entry_realtime;
         le64_t tail_entry_monotonic;
+        /* Added in 187 */
+        le64_t n_data;
+        le64_t n_fields;
 };
 
 #endif
index a110a00..9128f0d 100644 (file)
  * size */
 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
 
-static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
+/* n_data was the first entry we added after the initial file format design */
+#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
 
 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
 
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+        (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
+static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
+
 void journal_file_close(JournalFile *f) {
         int t;
 
@@ -107,7 +113,7 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         if (template) {
                 h.seqnum_id = template->header->seqnum_id;
-                h.seqnum = template->header->seqnum;
+                h.tail_seqnum = template->header->tail_seqnum;
         } else
                 h.seqnum_id = h.file_id;
 
@@ -161,7 +167,8 @@ static int journal_file_verify_header(JournalFile *f) {
                 return -EPROTONOSUPPORT;
 #endif
 
-        if (f->header->header_size != htole64(ALIGN64(sizeof(*(f->header)))))
+        /* The first addition was n_data, so check that we are at least this large */
+        if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
                 return -EBADMSG;
 
         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
@@ -427,7 +434,7 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
 
         assert(f);
 
-        r = le64toh(f->header->seqnum) + 1;
+        r = le64toh(f->header->tail_seqnum) + 1;
 
         if (seqnum) {
                 /* If an external seqnum counter was passed, we update
@@ -440,10 +447,10 @@ static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
                 *seqnum = r;
         }
 
-        f->header->seqnum = htole64(r);
+        f->header->tail_seqnum = htole64(r);
 
-        if (f->header->first_seqnum == 0)
-                f->header->first_seqnum = htole64(r);
+        if (f->header->head_seqnum == 0)
+                f->header->head_seqnum = htole64(r);
 
         return r;
 }
@@ -614,6 +621,9 @@ static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, ui
 
         f->data_hash_table[h].tail_hash_offset = htole64(offset);
 
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+                f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
+
         return 0;
 }
 
@@ -1809,27 +1819,13 @@ int journal_file_move_to_entry_by_realtime_for_data(
 }
 
 void journal_file_dump(JournalFile *f) {
-        char a[33], b[33], c[33];
         Object *o;
         int r;
         uint64_t p;
 
         assert(f);
 
-        printf("File Path: %s\n"
-               "File ID: %s\n"
-               "Machine ID: %s\n"
-               "Boot ID: %s\n"
-               "Arena size: %llu\n"
-               "Objects: %lu\n"
-               "Entries: %lu\n",
-               f->path,
-               sd_id128_to_string(f->header->file_id, a),
-               sd_id128_to_string(f->header->machine_id, b),
-               sd_id128_to_string(f->header->boot_id, c),
-               (unsigned long long) le64toh(f->header->arena_size),
-               (unsigned long) le64toh(f->header->n_objects),
-               (unsigned long) le64toh(f->header->n_entries));
+        journal_file_print_header(f);
 
         p = le64toh(f->header->header_size);
         while (p != 0) {
@@ -1885,6 +1881,58 @@ fail:
         log_error("File corrupt");
 }
 
+void journal_file_print_header(JournalFile *f) {
+        char a[33], b[33], c[33];
+        char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
+
+        assert(f);
+
+        printf("File Path: %s\n"
+               "File ID: %s\n"
+               "Machine ID: %s\n"
+               "Boot ID: %s\n"
+               "Sequential Number ID: %s\n"
+               "Header size: %llu\n"
+               "Arena size: %llu\n"
+               "Data Hash Table Size: %llu\n"
+               "Field Hash Table Size: %llu\n"
+               "Objects: %llu\n"
+               "Entry Objects: %llu\n"
+               "Rotate Suggested: %s\n"
+               "Head Sequential Number: %llu\n"
+               "Tail Sequential Number: %llu\n"
+               "Head Realtime Timestamp: %s\n"
+               "Tail Realtime Timestamp: %s\n",
+               f->path,
+               sd_id128_to_string(f->header->file_id, a),
+               sd_id128_to_string(f->header->machine_id, b),
+               sd_id128_to_string(f->header->boot_id, c),
+               sd_id128_to_string(f->header->seqnum_id, c),
+               (unsigned long long) le64toh(f->header->header_size),
+               (unsigned long long) le64toh(f->header->arena_size),
+               (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+               (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+               (unsigned long long) le64toh(f->header->n_objects),
+               (unsigned long long) le64toh(f->header->n_entries),
+               yes_no(journal_file_rotate_suggested(f)),
+               (unsigned long long) le64toh(f->header->head_seqnum),
+               (unsigned long long) le64toh(f->header->tail_seqnum),
+               format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
+               format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+                printf("Data Objects: %llu\n"
+                       "Data Hash Table Fill: %.1f%%\n",
+                       (unsigned long long) le64toh(f->header->n_data),
+                       100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+                printf("Field Objects: %llu\n"
+                       "Field Hash Table Fill: %.1f%%\n",
+                       (unsigned long long) le64toh(f->header->n_fields),
+                       100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
+}
+
 int journal_file_open(
                 const char *fname,
                 int flags,
@@ -1950,7 +1998,7 @@ int journal_file_open(
                 }
         }
 
-        if (f->last_stat.st_size < (off_t) sizeof(Header)) {
+        if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
                 r = -EIO;
                 goto fail;
         }
@@ -2032,7 +2080,7 @@ int journal_file_rotate(JournalFile **f) {
         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
                  "-%016llx-%016llx.journal",
-                 (unsigned long long) le64toh((*f)->header->seqnum),
+                 (unsigned long long) le64toh((*f)->header->tail_seqnum),
                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
 
         r = rename(old_file->path, p);
@@ -2510,3 +2558,28 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, u
 
         return 1;
 }
+
+bool journal_file_rotate_suggested(JournalFile *f) {
+        assert(f);
+
+        /* If we gained new header fields we gained new features,
+         * hence suggest a rotation */
+        if (le64toh(f->header->header_size) < sizeof(Header))
+                return true;
+
+        /* Let's check if the hash tables grew over a certain fill
+         * level (75%, borrowing this value from Java's hash table
+         * implementation), and if so suggest a rotation. To calculate
+         * the fill level we need the n_data field, which only exists
+         * in newer versions. */
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+                if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL)
+                        return true;
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+                if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL)
+                        return true;
+
+        return false;
+}
index 5c42ecd..6219742 100644 (file)
@@ -120,6 +120,7 @@ int journal_file_move_to_entry_by_monotonic_for_data(JournalFile *f, uint64_t da
 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset);
 
 void journal_file_dump(JournalFile *f);
+void journal_file_print_header(JournalFile *f);
 
 int journal_file_rotate(JournalFile **f);
 
@@ -132,4 +133,6 @@ void journal_default_metrics(JournalMetrics *m, int fd);
 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to);
 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
 
+bool journal_file_rotate_suggested(JournalFile *f);
+
 #endif
index 64d05f0..b767901 100644 (file)
@@ -108,5 +108,6 @@ struct sd_journal {
 };
 
 char *journal_make_match_string(sd_journal *j);
+void journal_print_header(sd_journal *j);
 
 #endif
index 65b3bd5..0d37107 100644 (file)
@@ -40,6 +40,7 @@
 #include "pager.h"
 #include "logs-show.h"
 #include "strv.h"
+#include "journal-internal.h"
 
 static OutputMode arg_output = OUTPUT_SHORT;
 static bool arg_follow = false;
@@ -48,6 +49,7 @@ static bool arg_no_pager = false;
 static int arg_lines = -1;
 static bool arg_no_tail = false;
 static bool arg_new_id128 = false;
+static bool arg_print_header = false;
 static bool arg_quiet = false;
 static bool arg_local = false;
 static bool arg_this_boot = false;
@@ -70,6 +72,7 @@ static int help(void) {
                "  -l --local          Only local entries\n"
                "  -b --this-boot      Show data only from current boot\n"
                "  -D --directory=PATH Show journal files from directory\n"
+               "     --header         Show journal header information\n"
                "     --new-id128      Generate a new 128 Bit id\n",
                program_invocation_short_name);
 
@@ -82,7 +85,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_VERSION = 0x100,
                 ARG_NO_PAGER,
                 ARG_NO_TAIL,
-                ARG_NEW_ID128
+                ARG_NEW_ID128,
+                ARG_HEADER
         };
 
         static const struct option options[] = {
@@ -99,6 +103,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "local",     no_argument,       NULL, 'l'           },
                 { "this-boot", no_argument,       NULL, 'b'           },
                 { "directory", required_argument, NULL, 'D'           },
+                { "header",    no_argument,       NULL, ARG_HEADER    },
                 { NULL,        0,                 NULL, 0             }
         };
 
@@ -174,6 +179,10 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_directory = optarg;
                         break;
 
+                case ARG_HEADER:
+                        arg_print_header = true;
+                        break;
+
                 case '?':
                         return -EINVAL;
 
@@ -331,6 +340,12 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        if (arg_print_header) {
+                journal_print_header(j);
+                r = 0;
+                goto finish;
+        }
+
         r = add_this_boot(j);
         if (r < 0)
                 goto finish;
index fd292f0..e66bb07 100644 (file)
@@ -460,6 +460,59 @@ static char *shortened_cgroup_path(pid_t pid) {
         return path;
 }
 
+static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n) {
+        JournalFile *f;
+        bool vacuumed = false;
+        int r;
+
+        assert(s);
+        assert(iovec);
+        assert(n > 0);
+
+        f = find_journal(s, uid);
+        if (!f)
+                return;
+
+        if (journal_file_rotate_suggested(f)) {
+                log_info("Journal header limits reached or header out-of-date, rotating.");
+                server_rotate(s);
+                server_vacuum(s);
+                vacuumed = true;
+        }
+
+        for (;;) {
+                r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
+                if (r >= 0)
+                        return;
+
+                if (vacuumed ||
+                    (r != -E2BIG && /* hit limit */
+                     r != -EFBIG && /* hit fs limit */
+                     r != -EDQUOT && /* quota hit */
+                     r != -ENOSPC && /* disk full */
+                     r != -EBADMSG && /* corrupted */
+                     r != -ENODATA && /* truncated */
+                     r != -EHOSTDOWN && /* other machine */
+                     r != -EPROTONOSUPPORT /* unsupported feature */)) {
+                        log_error("Failed to write entry, ignoring: %s", strerror(-r));
+                        return;
+                }
+
+                if (r == -E2BIG || r == -EFBIG || r == EDQUOT || r == ENOSPC)
+                        log_info("Allocation limit reached, rotating.");
+                else if (r == -EHOSTDOWN)
+                        log_info("Journal file from other machine, rotating.");
+                else
+                        log_warning("Journal file corrupted, rotating.");
+
+                server_rotate(s);
+                server_vacuum(s);
+                vacuumed = true;
+
+                log_info("Retrying write.");
+        }
+}
+
 static void dispatch_message_real(
                 Server *s,
                 struct iovec *iovec, unsigned n, unsigned m,
@@ -480,8 +533,6 @@ static void dispatch_message_real(
         int r;
         char *t;
         uid_t loginuid = 0, realuid = 0;
-        JournalFile *f;
-        bool vacuumed = false;
 
         assert(s);
         assert(iovec);
@@ -626,37 +677,7 @@ static void dispatch_message_real(
 
         assert(n <= m);
 
-retry:
-        f = find_journal(s, realuid == 0 ? 0 : loginuid);
-        if (f) {
-                r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
-
-                if ((r == -E2BIG || /* hit limit */
-                     r == -EFBIG || /* hit fs limit */
-                     r == -EDQUOT || /* quota hit */
-                     r == -ENOSPC || /* disk full */
-                     r == -EBADMSG || /* corrupted */
-                     r == -ENODATA || /* truncated */
-                     r == -EHOSTDOWN || /* other machine */
-                     r == -EPROTONOSUPPORT) && /* unsupported feature */
-                    !vacuumed) {
-
-                        if (r == -E2BIG)
-                                log_info("Allocation limit reached, rotating.");
-                        else
-                                log_warning("Journal file corrupted, rotating.");
-
-                        server_rotate(s);
-                        server_vacuum(s);
-                        vacuumed = true;
-
-                        log_info("Retrying write.");
-                        goto retry;
-                }
-
-                if (r < 0)
-                        log_error("Failed to write entry, ignoring: %s", strerror(-r));
-        }
+        write_to_journal(s, realuid == 0 ? 0 : loginuid, iovec, n);
 
         free(pid);
         free(uid);
index 6befd8b..77469e9 100644 (file)
@@ -2009,6 +2009,22 @@ _public_ int sd_journal_get_cutoff_monotonic_usec(sd_journal *j, sd_id128_t boot
         return first ? 0 : 1;
 }
 
+void journal_print_header(sd_journal *j) {
+        Iterator i;
+        JournalFile *f;
+        bool newline = false;
+
+        assert(j);
+
+        HASHMAP_FOREACH(f, j->files, i) {
+                if (newline)
+                        putchar('\n');
+                else
+                        newline = true;
+
+                journal_file_print_header(f);
+        }
+}
 
 /* _public_ int sd_journal_query_unique(sd_journal *j, const char *field) { */
 /*         if (!j) */