chiark / gitweb /
journal: implement generic sharable mmap caching logic
[elogind.git] / src / journal / journal-file.c
index 180b43a9c53c4f068c83e5fba43d9876ce646f74..1f5e04d791c2e8db334035034175ff74d2e170de 100644 (file)
 #include "journal-file.h"
 #include "lookup3.h"
 #include "compress.h"
+#include "fsprg.h"
 
 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
 
-#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
-
 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
 
 /* This is the minimum journal file size */
 #define JOURNAL_HEADER_CONTAINS(h, field) \
         (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
 
-static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
+static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
 
 void journal_file_close(JournalFile *f) {
-        int t;
-
         assert(f);
 
+        /* Write the final tag */
+        if (f->authenticate)
+                journal_file_append_tag(f);
+
+        /* Sync everything to disk, before we mark the file offline */
+        if (f->mmap && f->fd >= 0)
+                mmap_cache_close_fd(f->mmap, f->fd);
+
+        if (f->writable && f->fd >= 0)
+                fdatasync(f->fd);
+
         if (f->header) {
-                if (f->writable)
+                /* Mark the file offline. Don't override the archived state if it already is set */
+                if (f->writable && f->header->state == STATE_ONLINE)
                         f->header->state = STATE_OFFLINE;
 
                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
         }
 
-        for (t = 0; t < _WINDOW_MAX; t++)
-                if (f->windows[t].ptr)
-                        munmap(f->windows[t].ptr, f->windows[t].size);
-
         if (f->fd >= 0)
                 close_nointr_nofail(f->fd);
 
         free(f->path);
 
+        if (f->mmap)
+                mmap_cache_unref(f->mmap);
+
 #ifdef HAVE_XZ
         free(f->compress_buffer);
 #endif
 
+#ifdef HAVE_GCRYPT
+        if (f->fsprg_header)
+                munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
+
+        if (f->hmac)
+                gcry_md_close(f->hmac);
+#endif
+
         free(f);
 }
 
@@ -104,9 +121,15 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
         assert(f);
 
         zero(h);
-        memcpy(h.signature, signature, 8);
+        memcpy(h.signature, HEADER_SIGNATURE, 8);
         h.header_size = htole64(ALIGN64(sizeof(h)));
 
+        h.incompatible_flags =
+                htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
+
+        h.compatible_flags =
+                htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
+
         r = sd_id128_randomize(&h.file_id);
         if (r < 0)
                 return r;
@@ -148,7 +171,9 @@ static int journal_file_refresh_header(JournalFile *f) {
 
         f->header->state = STATE_ONLINE;
 
-        __sync_synchronize();
+        /* Sync the online state to disk */
+        msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
+        fdatasync(f->fd);
 
         return 0;
 }
@@ -156,17 +181,31 @@ static int journal_file_refresh_header(JournalFile *f) {
 static int journal_file_verify_header(JournalFile *f) {
         assert(f);
 
-        if (memcmp(f->header, signature, 8))
+        if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
                 return -EBADMSG;
 
+        /* In both read and write mode we refuse to open files with
+         * incompatible flags we don't know */
 #ifdef HAVE_XZ
-        if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
+        if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
                 return -EPROTONOSUPPORT;
 #else
         if (f->header->incompatible_flags != 0)
                 return -EPROTONOSUPPORT;
 #endif
 
+        /* When open for writing we refuse to open files with
+         * compatible flags, too */
+        if (f->writable) {
+#ifdef HAVE_GCRYPT
+                if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
+                        return -EPROTONOSUPPORT;
+#else
+                if (f->header->compatible_flags != 0)
+                        return -EPROTONOSUPPORT;
+#endif
+        }
+
         /* The first addition was n_data, so check that we are at least this large */
         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
                 return -EBADMSG;
@@ -199,6 +238,9 @@ static int journal_file_verify_header(JournalFile *f) {
                 }
         }
 
+        f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
+        f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
+
         return 0;
 }
 
@@ -261,59 +303,11 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         return 0;
 }
 
-static int journal_file_map(
-                JournalFile *f,
-                uint64_t offset,
-                uint64_t size,
-                void **_window,
-                uint64_t *_woffset,
-                uint64_t *_wsize,
-                void **ret) {
-
-        uint64_t woffset, wsize;
-        void *window;
-
+static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
         assert(f);
-        assert(size > 0);
         assert(ret);
 
-        woffset = offset & ~((uint64_t) page_size() - 1ULL);
-        wsize = size + (offset - woffset);
-        wsize = PAGE_ALIGN(wsize);
-
         /* Avoid SIGBUS on invalid accesses */
-        if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
-                return -EADDRNOTAVAIL;
-
-        window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
-        if (window == MAP_FAILED)
-                return -errno;
-
-        if (_window)
-                *_window = window;
-
-        if (_woffset)
-                *_woffset = woffset;
-
-        if (_wsize)
-                *_wsize = wsize;
-
-        *ret = (uint8_t*) window + (offset - woffset);
-
-        return 0;
-}
-
-static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
-        void *p = NULL;
-        uint64_t delta;
-        int r;
-        Window *w;
-
-        assert(f);
-        assert(ret);
-        assert(wt >= 0);
-        assert(wt < _WINDOW_MAX);
-
         if (offset + size > (uint64_t) f->last_stat.st_size) {
                 /* Hmm, out of range? Let's refresh the fstat() data
                  * first, before we trust that check. */
@@ -323,57 +317,7 @@ static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_
                         return -EADDRNOTAVAIL;
         }
 
-        w = f->windows + wt;
-
-        if (_likely_(w->ptr &&
-                     w->offset <= offset &&
-                     w->offset + w->size >= offset + size)) {
-
-                *ret = (uint8_t*) w->ptr + (offset - w->offset);
-                return 0;
-        }
-
-        if (w->ptr) {
-                if (munmap(w->ptr, w->size) < 0)
-                        return -errno;
-
-                w->ptr = NULL;
-                w->size = w->offset = 0;
-        }
-
-        if (size < DEFAULT_WINDOW_SIZE) {
-                /* If the default window size is larger then what was
-                 * asked for extend the mapping a bit in the hope to
-                 * minimize needed remappings later on. We add half
-                 * the window space before and half behind the
-                 * requested mapping */
-
-                delta = (DEFAULT_WINDOW_SIZE - size) / 2;
-
-                if (delta > offset)
-                        delta = offset;
-
-                offset -= delta;
-                size = DEFAULT_WINDOW_SIZE;
-        } else
-                delta = 0;
-
-        if (offset + size > (uint64_t) f->last_stat.st_size)
-                size = (uint64_t) f->last_stat.st_size - offset;
-
-        if (size <= 0)
-                return -EADDRNOTAVAIL;
-
-        r = journal_file_map(f,
-                             offset, size,
-                             &w->ptr, &w->offset, &w->size,
-                             &p);
-
-        if (r < 0)
-                return r;
-
-        *ret = (uint8_t*) p + delta;
-        return 0;
+        return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
 }
 
 static bool verify_hash(Object *o) {
@@ -393,17 +337,38 @@ static bool verify_hash(Object *o) {
         return h1 == h2;
 }
 
+static uint64_t minimum_header_size(Object *o) {
+
+        static uint64_t table[] = {
+                [OBJECT_DATA] = sizeof(DataObject),
+                [OBJECT_FIELD] = sizeof(FieldObject),
+                [OBJECT_ENTRY] = sizeof(EntryObject),
+                [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
+                [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
+                [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
+                [OBJECT_TAG] = sizeof(TagObject),
+        };
+
+        if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
+                return sizeof(ObjectHeader);
+
+        return table[o->object.type];
+}
+
 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
         int r;
         void *t;
         Object *o;
         uint64_t s;
+        unsigned context;
 
         assert(f);
         assert(ret);
-        assert(type < _OBJECT_TYPE_MAX);
 
-        r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
+        /* One context for each type, plus one catch-all for the rest */
+        context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
+
+        r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
         if (r < 0)
                 return r;
 
@@ -413,6 +378,12 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
         if (s < sizeof(ObjectHeader))
                 return -EBADMSG;
 
+        if (o->object.type <= OBJECT_UNUSED)
+                return -EBADMSG;
+
+        if (s < minimum_header_size(o))
+                return -EBADMSG;
+
         if (type >= 0 && o->object.type != type)
                 return -EBADMSG;
 
@@ -431,7 +402,7 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
         return 0;
 }
 
-static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
+static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
         uint64_t r;
 
         assert(f);
@@ -464,6 +435,7 @@ static int journal_file_append_object(JournalFile *f, int type, uint64_t size, O
         void *t;
 
         assert(f);
+        assert(type > 0 && type < _OBJECT_TYPE_MAX);
         assert(size >= sizeof(ObjectHeader));
         assert(offset);
         assert(ret);
@@ -509,16 +481,16 @@ static int journal_file_setup_data_hash_table(JournalFile *f) {
 
         assert(f);
 
-        /* We estimate that we need 1 hash table entry per 2K of
+        /* We estimate that we need 1 hash table entry per 768 of
            journal file and we want to make sure we never get beyond
            75% fill level. Calculate the hash table size for the
            maximum file size based on these metrics. */
 
-        s = (f->metrics.max_size * 4 / 2048 / 3) * sizeof(HashItem);
+        s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
 
-        log_info("Reserving %llu entries in hash table.", (unsigned long long) s);
+        log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
 
         r = journal_file_append_object(f,
                                        OBJECT_DATA_HASH_TABLE,
@@ -569,7 +541,7 @@ static int journal_file_map_data_hash_table(JournalFile *f) {
         s = le64toh(f->header->data_hash_table_size);
 
         r = journal_file_move_to(f,
-                                 WINDOW_DATA_HASH_TABLE,
+                                 OBJECT_DATA_HASH_TABLE,
                                  p, s,
                                  &t);
         if (r < 0)
@@ -590,7 +562,7 @@ static int journal_file_map_field_hash_table(JournalFile *f) {
         s = le64toh(f->header->field_hash_table_size);
 
         r = journal_file_move_to(f,
-                                 WINDOW_FIELD_HASH_TABLE,
+                                 OBJECT_FIELD_HASH_TABLE,
                                  p, s,
                                  &t);
         if (r < 0)
@@ -780,20 +752,22 @@ static int journal_file_append_data(
                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
                         o->object.flags |= OBJECT_COMPRESSED;
 
-                        f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
-
                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
                 }
         }
 #endif
 
-        if (!compressed)
+        if (!compressed && size > 0)
                 memcpy(o->data.payload, data, size);
 
         r = journal_file_link_data(f, o, p, hash);
         if (r < 0)
                 return r;
 
+        r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
+        if (r < 0)
+                return r;
+
         /* The linking might have altered the window, so let's
          * refresh our pointer */
         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
@@ -870,6 +844,10 @@ static int link_entry_into_array(JournalFile *f,
         if (r < 0)
                 return r;
 
+        r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
+        if (r < 0)
+                return r;
+
         o->entry_array.items[i] = htole64(p);
 
         if (ap == 0)
@@ -1000,13 +978,17 @@ static int journal_file_append_entry_internal(
         if (r < 0)
                 return r;
 
-        o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
+        o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
         o->entry.realtime = htole64(ts->realtime);
         o->entry.monotonic = htole64(ts->monotonic);
         o->entry.xor_hash = htole64(xor_hash);
         o->entry.boot_id = f->header->boot_id;
 
+        r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
+        if (r < 0)
+                return r;
+
         r = journal_file_link_entry(f, o, np);
         if (r < 0)
                 return r;
@@ -1056,7 +1038,12 @@ int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const st
             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
                 return -EINVAL;
 
-        items = alloca(sizeof(EntryItem) * n_iovec);
+        r = journal_file_maybe_append_tag(f, ts->realtime);
+        if (r < 0)
+                return r;
+
+        /* alloca() can't take 0, hence let's allocate at least one */
+        items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
 
         for (i = 0; i < n_iovec; i++) {
                 uint64_t p;
@@ -1830,6 +1817,412 @@ int journal_file_move_to_entry_by_realtime_for_data(
                                              ret, offset, NULL);
 }
 
+static void *fsprg_state(JournalFile *f) {
+        uint64_t a, b;
+        assert(f);
+
+        if (!f->authenticate)
+                return NULL;
+
+        a = le64toh(f->fsprg_header->header_size);
+        b = le64toh(f->fsprg_header->state_size);
+
+        if (a + b > f->fsprg_size)
+                return NULL;
+
+        return (uint8_t*) f->fsprg_header + a;
+}
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+        uint64_t r;
+
+        assert(f);
+
+        r = le64toh(f->header->n_tags) + 1;
+        f->header->n_tags = htole64(r);
+
+        return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+        Object *o;
+        uint64_t p;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (!f->hmac_running)
+                return 0;
+
+        log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
+
+        assert(f->hmac);
+
+        r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+        if (r < 0)
+                return r;
+
+        o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+
+        /* Add the tag object itself, so that we can protect its
+         * header. This will exclude the actual hash value in it */
+        r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
+        if (r < 0)
+                return r;
+
+        /* Get the HMAC tag and store it in the object */
+        memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+        f->hmac_running = false;
+
+        return 0;
+}
+
+static int journal_file_hmac_start(JournalFile *f) {
+        uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (f->hmac_running)
+                return 0;
+
+        /* Prepare HMAC for next cycle */
+        gcry_md_reset(f->hmac);
+        FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
+        gcry_md_setkey(f->hmac, key, sizeof(key));
+
+        f->hmac_running = true;
+
+        return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+        uint64_t t;
+
+        assert(f);
+        assert(epoch);
+        assert(f->authenticate);
+
+        if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
+            le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
+                return -ENOTSUP;
+
+        if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
+                return -ESTALE;
+
+        t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
+        t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
+
+        *epoch = t;
+        return 0;
+}
+
+static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch > goal)
+                return -ESTALE;
+
+        return epoch != goal;
+}
+
+static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch < goal)
+                log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
+
+        for (;;) {
+                if (epoch > goal)
+                        return -ESTALE;
+                if (epoch == goal)
+                        return 0;
+
+                FSPRG_Evolve(fsprg_state(f));
+                epoch = FSPRG_GetEpoch(fsprg_state(f));
+        }
+}
+
+static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_need_evolve(f, realtime);
+        if (r <= 0)
+                return 0;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_evolve(f, realtime);
+        if (r < 0)
+                return r;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
+        int r;
+        Object *o;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_move_to_object(f, type, p, &o);
+        if (r < 0)
+                return r;
+
+        gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+        switch (o->object.type) {
+
+        case OBJECT_DATA:
+                /* All but: hash and payload are mutable */
+                gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
+                break;
+
+        case OBJECT_ENTRY:
+                /* All */
+                gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
+                break;
+
+        case OBJECT_FIELD_HASH_TABLE:
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_ENTRY_ARRAY:
+                /* Nothing: everything is mutable */
+                break;
+
+        case OBJECT_TAG:
+                /* All but the tag itself */
+                gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+                break;
+        default:
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+static int journal_file_hmac_put_header(JournalFile *f) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        /* All but state+reserved, boot_id, arena_size,
+         * tail_object_offset, n_objects, n_entries, tail_seqnum,
+         * head_entry_realtime, tail_entry_realtime,
+         * tail_entry_monotonic, n_data, n_fields, header_tag */
+
+        gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+        gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
+        gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+        gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+        gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
+
+        return 0;
+}
+
+static int journal_file_load_fsprg(JournalFile *f) {
+        int r, fd = -1;
+        char *p = NULL;
+        struct stat st;
+        FSPRGHeader *m = NULL;
+        sd_id128_t machine;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = sd_id128_get_machine(&machine);
+        if (r < 0)
+                return r;
+
+        if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
+                     SD_ID128_FORMAT_VAL(machine)) < 0)
+                return -ENOMEM;
+
+        fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+        if (fd < 0) {
+                log_error("Failed to open %s: %m", p);
+                r = -errno;
+                goto finish;
+        }
+
+        if (fstat(fd, &st) < 0) {
+                r = -errno;
+                goto finish;
+        }
+
+        if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
+        if (m == MAP_FAILED) {
+                m = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (m->incompatible_flags != 0) {
+                r = -EPROTONOSUPPORT;
+                goto finish;
+        }
+
+        if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
+        if ((uint64_t) st.st_size < f->fsprg_size) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        if (!sd_id128_equal(machine, m->machine_id)) {
+                r = -EHOSTDOWN;
+                goto finish;
+        }
+
+        if (le64toh(m->fsprg_start_usec) <= 0 ||
+            le64toh(m->fsprg_interval_usec) <= 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+        if (f->fsprg_header == MAP_FAILED) {
+                f->fsprg_header = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        r = 0;
+
+finish:
+        if (m)
+                munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
+
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        free(p);
+        return r;
+}
+
+static int journal_file_setup_hmac(JournalFile *f) {
+        gcry_error_t e;
+
+        if (!f->authenticate)
+                return 0;
+
+        e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+        if (e != 0)
+                return -ENOTSUP;
+
+        return 0;
+}
+
+static int journal_file_append_first_tag(JournalFile *f) {
+        int r;
+        uint64_t p;
+
+        if (!f->authenticate)
+                return 0;
+
+        log_debug("Calculating first tag...");
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->field_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->data_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
 void journal_file_dump(JournalFile *f) {
         Object *o;
         int r;
@@ -1874,8 +2267,9 @@ void journal_file_dump(JournalFile *f) {
                         printf("Type: OBJECT_ENTRY_ARRAY\n");
                         break;
 
-                case OBJECT_SIGNATURE:
-                        printf("Type: OBJECT_SIGNATURE\n");
+                case OBJECT_TAG:
+                        printf("Type: OBJECT_TAG %llu\n",
+                               (unsigned long long) le64toh(o->tag.seqnum));
                         break;
                 }
 
@@ -1926,8 +2320,8 @@ void journal_file_print_header(JournalFile *f) {
                f->header->state == STATE_OFFLINE ? "offline" :
                f->header->state == STATE_ONLINE ? "online" :
                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
-               (f->header->compatible_flags & HEADER_COMPATIBLE_SIGNED) ? " SIGNED" : "",
-               (f->header->compatible_flags & ~HEADER_COMPATIBLE_SIGNED) ? " ???" : "",
+               (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
+               (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
                (unsigned long long) le64toh(f->header->header_size),
@@ -1959,7 +2353,10 @@ int journal_file_open(
                 const char *fname,
                 int flags,
                 mode_t mode,
+                bool compress,
+                bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap_cache,
                 JournalFile *template,
                 JournalFile **ret) {
 
@@ -1981,13 +2378,26 @@ int journal_file_open(
                 return -ENOMEM;
 
         f->fd = -1;
-        f->flags = flags;
         f->mode = mode;
-        f->writable = (flags & O_ACCMODE) != O_RDONLY;
+
+        f->flags = flags;
         f->prot = prot_from_flags(flags);
+        f->writable = (flags & O_ACCMODE) != O_RDONLY;
+        f->compress = compress;
+        f->authenticate = authenticate;
 
-        if (template)
-                f->compress = template->compress;
+        if (mmap_cache)
+                f->mmap = mmap_cache_ref(mmap_cache);
+        else {
+                /* One context for each type, plus the zeroth catchall
+                 * context. One fd for the file plus one for each type
+                 * (which we need during verification */
+                f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
+                if (!f->mmap) {
+                        r = -ENOMEM;
+                        goto fail;
+                }
+        }
 
         f->path = strdup(fname);
         if (!f->path) {
@@ -2009,6 +2419,12 @@ int journal_file_open(
         if (f->last_stat.st_size == 0 && f->writable) {
                 newly_created = true;
 
+                /* Try to load the FSPRG state, and if we can't, then
+                 * just don't do authentication */
+                r = journal_file_load_fsprg(f);
+                if (r < 0)
+                        f->authenticate = false;
+
                 r = journal_file_init_header(f, template);
                 if (r < 0)
                         goto fail;
@@ -2037,6 +2453,12 @@ int journal_file_open(
                         goto fail;
         }
 
+        if (!newly_created && f->writable) {
+                r = journal_file_load_fsprg(f);
+                if (r < 0)
+                        goto fail;
+        }
+
         if (f->writable) {
                 if (metrics) {
                         journal_default_metrics(metrics, f->fd);
@@ -2047,10 +2469,13 @@ int journal_file_open(
                 r = journal_file_refresh_header(f);
                 if (r < 0)
                         goto fail;
+
+                r = journal_file_setup_hmac(f);
+                if (r < 0)
+                        goto fail;
         }
 
         if (newly_created) {
-
                 r = journal_file_setup_field_hash_table(f);
                 if (r < 0)
                         goto fail;
@@ -2058,6 +2483,10 @@ int journal_file_open(
                 r = journal_file_setup_data_hash_table(f);
                 if (r < 0)
                         goto fail;
+
+                r = journal_file_append_first_tag(f);
+                if (r < 0)
+                        goto fail;
         }
 
         r = journal_file_map_field_hash_table(f);
@@ -2079,7 +2508,7 @@ fail:
         return r;
 }
 
-int journal_file_rotate(JournalFile **f) {
+int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
         char *p;
         size_t l;
         JournalFile *old_file, *new_file = NULL;
@@ -2118,7 +2547,7 @@ int journal_file_rotate(JournalFile **f) {
 
         old_file->header->state = STATE_ARCHIVED;
 
-        r = journal_file_open(old_file->path, old_file->flags, old_file->mode, NULL, old_file, &new_file);
+        r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
         journal_file_close(old_file);
 
         *f = new_file;
@@ -2129,7 +2558,10 @@ int journal_file_open_reliably(
                 const char *fname,
                 int flags,
                 mode_t mode,
+                bool compress,
+                bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap,
                 JournalFile *template,
                 JournalFile **ret) {
 
@@ -2137,11 +2569,13 @@ int journal_file_open_reliably(
         size_t l;
         char *p;
 
-        r = journal_file_open(fname, flags, mode, metrics, template, ret);
+        r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
         if (r != -EBADMSG && /* corrupted */
             r != -ENODATA && /* truncated */
             r != -EHOSTDOWN && /* other machine */
-            r != -EPROTONOSUPPORT) /* incompatible feature */
+            r != -EPROTONOSUPPORT && /* incompatible feature */
+            r != -EBUSY && /* unclean shutdown */
+            r != -ESHUTDOWN /* already archived */)
                 return r;
 
         if ((flags & O_ACCMODE) == O_RDONLY)
@@ -2150,6 +2584,9 @@ int journal_file_open_reliably(
         if (!(flags & O_CREAT))
                 return r;
 
+        if (!endswith(fname, ".journal"))
+                return r;
+
         /* The file is corrupted. Rotate it away and try it again (but only once) */
 
         l = strlen(fname);
@@ -2164,9 +2601,9 @@ int journal_file_open_reliably(
         if (r < 0)
                 return -errno;
 
-        log_warning("File %s corrupted, renaming and replacing.", fname);
+        log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
 
-        return journal_file_open(fname, flags, mode, metrics, template, ret);
+        return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
 }
 
 struct vacuum_info {
@@ -2333,7 +2770,8 @@ int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t m
                 n_list ++;
         }
 
-        qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
+        if (n_list > 0)
+                qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
 
         for(i = 0; i < n_list; i++) {
                 struct statvfs ss;
@@ -2592,8 +3030,10 @@ bool journal_file_rotate_suggested(JournalFile *f) {
 
         /* If we gained new header fields we gained new features,
          * hence suggest a rotation */
-        if (le64toh(f->header->header_size) < sizeof(Header))
+        if (le64toh(f->header->header_size) < sizeof(Header)) {
+                log_debug("%s uses an outdated header, suggesting rotation.", f->path);
                 return true;
+        }
 
         /* Let's check if the hash tables grew over a certain fill
          * level (75%, borrowing this value from Java's hash table
@@ -2602,12 +3042,26 @@ bool journal_file_rotate_suggested(JournalFile *f) {
          * in newer versions. */
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
-                if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL)
+                if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+                        log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
+                                  f->path,
+                                  100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
+                                  (unsigned long long) le64toh(f->header->n_data),
+                                  (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
+                                  (unsigned long long) (f->last_stat.st_size),
+                                  (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
                         return true;
+                }
 
         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
-                if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL)
+                if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
+                        log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
+                                  f->path,
+                                  100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
+                                  (unsigned long long) le64toh(f->header->n_fields),
+                                  (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
                         return true;
+                }
 
         return false;
 }