#include <fcntl.h>
#include <stddef.h>
+#ifdef HAVE_XATTR
+#include <attr/xattr.h>
+#endif
+
#include "journal-def.h"
#include "journal-file.h"
+#include "journal-authenticate.h"
#include "lookup3.h"
#include "compress.h"
#include "fsprg.h"
#define COMPRESSION_SIZE_THRESHOLD (512ULL)
/* This is the minimum journal file size */
-#define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
+#define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL) /* 4 MiB */
/* These are the lower and upper bounds if we deduce the max_use value
* from the file system size */
/* n_data was the first entry we added after the initial file format design */
#define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
-#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+/* How many entries to keep in the entry array chain cache at max */
+#define CHAIN_CACHE_MAX 20
+
+/* How much to increase the journal file size at once each time we allocate something new. */
+#define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
+
+static int journal_file_set_online(JournalFile *f) {
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (!(f->fd >= 0 && f->header))
+ return -EINVAL;
+
+ switch(f->header->state) {
+ case STATE_ONLINE:
+ return 0;
+
+ case STATE_OFFLINE:
+ f->header->state = STATE_ONLINE;
+ fsync(f->fd);
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int journal_file_set_offline(JournalFile *f) {
+ assert(f);
+
+ if (!f->writable)
+ return -EPERM;
+
+ if (!(f->fd >= 0 && f->header))
+ return -EINVAL;
+
+ if (f->header->state != STATE_ONLINE)
+ return 0;
+
+ fsync(f->fd);
-#define JOURNAL_HEADER_CONTAINS(h, field) \
- (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+ f->header->state = STATE_OFFLINE;
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
+ fsync(f->fd);
+
+ return 0;
+}
void journal_file_close(JournalFile *f) {
assert(f);
+#ifdef HAVE_GCRYPT
/* Write the final tag */
- if (f->authenticate)
+ if (f->seal && f->writable)
journal_file_append_tag(f);
+#endif
/* Sync everything to disk, before we mark the file offline */
if (f->mmap && f->fd >= 0)
mmap_cache_close_fd(f->mmap, f->fd);
- if (f->writable && f->fd >= 0)
- fdatasync(f->fd);
-
- if (f->header) {
- /* Mark the file offline. Don't override the archived state if it already is set */
- if (f->writable && f->header->state == STATE_ONLINE)
- f->header->state = STATE_OFFLINE;
+ journal_file_set_offline(f);
+ if (f->header)
munmap(f->header, PAGE_ALIGN(sizeof(Header)));
- }
if (f->fd >= 0)
close_nointr_nofail(f->fd);
if (f->mmap)
mmap_cache_unref(f->mmap);
+ hashmap_free_free(f->chain_cache);
+
#ifdef HAVE_XZ
free(f->compress_buffer);
#endif
#ifdef HAVE_GCRYPT
- if (f->fsprg_header)
- munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
+ if (f->fss_file)
+ munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
+ else if (f->fsprg_state)
+ free(f->fsprg_state);
+
+ free(f->fsprg_seed);
if (f->hmac)
gcry_md_close(f->hmac);
htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
h.compatible_flags =
- htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
+ htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
r = sd_id128_randomize(&h.file_id);
if (r < 0)
f->header->boot_id = boot_id;
- f->header->state = STATE_ONLINE;
+ journal_file_set_online(f);
/* Sync the online state to disk */
- msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
- fdatasync(f->fd);
+ fsync(f->fd);
return 0;
}
* compatible flags, too */
if (f->writable) {
#ifdef HAVE_GCRYPT
- if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
+ if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
return -EPROTONOSUPPORT;
#else
if (f->header->compatible_flags != 0)
#endif
}
+ if (f->header->state >= _STATE_MAX)
+ return -EBADMSG;
+
/* The first addition was n_data, so check that we are at least this large */
if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
return -EBADMSG;
- if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
- !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+ if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
return -EBADMSG;
- if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
+ if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
+ return -ENODATA;
+
+ if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
+ return -ENODATA;
+
+ if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->field_hash_table_offset)) ||
+ !VALID64(le64toh(f->header->tail_object_offset)) ||
+ !VALID64(le64toh(f->header->entry_array_offset)))
+ return -ENODATA;
+
+ if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
+ le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
+ le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
+ le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
return -ENODATA;
if (f->writable) {
}
}
- f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
- f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
+ f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
+
+ f->seal = JOURNAL_HEADER_SEALED(f->header);
return 0;
}
if (new_size <= old_size)
return 0;
- if (f->metrics.max_size > 0 &&
- new_size > f->metrics.max_size)
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
return -E2BIG;
- if (new_size > f->metrics.min_size &&
- f->metrics.keep_free > 0) {
+ if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
struct statvfs svfs;
if (fstatvfs(f->fd, &svfs) >= 0) {
}
}
+ /* Increase by larger blocks at once */
+ new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
+ if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
+ new_size = f->metrics.max_size;
+
/* Note that the glibc fallocate() fallback is very
inefficient, hence we try to minimize the allocation area
as we can. */
if (r != 0)
return -r;
- mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
-
if (fstat(f->fd, &f->last_stat) < 0)
return -errno;
return 0;
}
-static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
+static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
assert(f);
assert(ret);
+ if (size <= 0)
+ return -EINVAL;
+
/* Avoid SIGBUS on invalid accesses */
if (offset + size > (uint64_t) f->last_stat.st_size) {
/* Hmm, out of range? Let's refresh the fstat() data
return -EADDRNOTAVAIL;
}
- return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
-}
-
-static bool verify_hash(Object *o) {
- uint64_t h1, h2;
-
- assert(o);
-
- if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
- h1 = le64toh(o->data.hash);
- h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
- } else if (o->object.type == OBJECT_FIELD) {
- h1 = le64toh(o->field.hash);
- h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
- } else
- return true;
-
- return h1 == h2;
+ return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
}
static uint64_t minimum_header_size(Object *o) {
- static uint64_t table[] = {
+ static const uint64_t table[] = {
[OBJECT_DATA] = sizeof(DataObject),
[OBJECT_FIELD] = sizeof(FieldObject),
[OBJECT_ENTRY] = sizeof(EntryObject),
assert(f);
assert(ret);
+ /* Objects may only be located at multiple of 64 bit */
+ if (!VALID64(offset))
+ return -EFAULT;
+
/* One context for each type, plus one catch-all for the rest */
context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
- r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
+ r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
if (r < 0)
return r;
if (s < minimum_header_size(o))
return -EBADMSG;
- if (type >= 0 && o->object.type != type)
+ if (type > 0 && o->object.type != type)
return -EBADMSG;
if (s > sizeof(ObjectHeader)) {
- r = journal_file_move_to(f, o->object.type, offset, s, &t);
+ r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
if (r < 0)
return r;
o = (Object*) t;
}
- if (!verify_hash(o))
- return -EBADMSG;
-
*ret = o;
return 0;
}
return r;
}
-static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
int r;
uint64_t p;
Object *tail, *o;
assert(offset);
assert(ret);
+ r = journal_file_set_online(f);
+ if (r < 0)
+ return r;
+
p = le64toh(f->header->tail_object_offset);
if (p == 0)
p = le64toh(f->header->header_size);
if (r < 0)
return r;
- r = journal_file_move_to(f, type, p, size, &t);
+ r = journal_file_move_to(f, type, false, p, size, &t);
if (r < 0)
return r;
if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
s = DEFAULT_DATA_HASH_TABLE_SIZE;
- log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
+ log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
r = journal_file_append_object(f,
OBJECT_DATA_HASH_TABLE,
assert(f);
+ /* We use a fixed size hash table for the fields as this
+ * number should grow very slowly only */
+
s = DEFAULT_FIELD_HASH_TABLE_SIZE;
r = journal_file_append_object(f,
OBJECT_FIELD_HASH_TABLE,
r = journal_file_move_to(f,
OBJECT_DATA_HASH_TABLE,
+ true,
p, s,
&t);
if (r < 0)
r = journal_file_move_to(f,
OBJECT_FIELD_HASH_TABLE,
+ true,
p, s,
&t);
if (r < 0)
return 0;
}
-static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
+static int journal_file_link_field(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
+ uint64_t p, h;
+ int r;
+
+ assert(f);
+ assert(o);
+ assert(offset > 0);
+
+ if (o->object.type != OBJECT_FIELD)
+ return -EINVAL;
+
+ /* This might alter the window we are looking at */
+
+ o->field.next_hash_offset = o->field.head_data_offset = 0;
+
+ h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
+ p = le64toh(f->field_hash_table[h].tail_hash_offset);
+ if (p == 0)
+ f->field_hash_table[h].head_hash_offset = htole64(offset);
+ else {
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ o->field.next_hash_offset = htole64(offset);
+ }
+
+ f->field_hash_table[h].tail_hash_offset = htole64(offset);
+
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
+
+ return 0;
+}
+
+static int journal_file_link_data(
+ JournalFile *f,
+ Object *o,
+ uint64_t offset,
+ uint64_t hash) {
+
uint64_t p, h;
int r;
assert(f);
assert(o);
assert(offset > 0);
- assert(o->object.type == OBJECT_DATA);
+
+ if (o->object.type != OBJECT_DATA)
+ return -EINVAL;
/* This might alter the window we are looking at */
h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
p = le64toh(f->data_hash_table[h].tail_hash_offset);
- if (p == 0) {
+ if (p == 0)
/* Only entry in the hash table is easy */
f->data_hash_table[h].head_hash_offset = htole64(offset);
- } else {
+ else {
/* Move back to the previous data object, to patch in
* pointer */
return 0;
}
+int journal_file_find_field_object_with_hash(
+ JournalFile *f,
+ const void *field, uint64_t size, uint64_t hash,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t p, osize, h;
+ int r;
+
+ assert(f);
+ assert(field && size > 0);
+
+ osize = offsetof(Object, field.payload) + size;
+
+ if (f->header->field_hash_table_size == 0)
+ return -EBADMSG;
+
+ h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
+ p = le64toh(f->field_hash_table[h].head_hash_offset);
+
+ while (p > 0) {
+ Object *o;
+
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+ if (le64toh(o->field.hash) == hash &&
+ le64toh(o->object.size) == osize &&
+ memcmp(o->field.payload, field, size) == 0) {
+
+ if (ret)
+ *ret = o;
+ if (offset)
+ *offset = p;
+
+ return 1;
+ }
+
+ p = le64toh(o->field.next_hash_offset);
+ }
+
+ return 0;
+}
+
+int journal_file_find_field_object(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash;
+
+ assert(f);
+ assert(field && size > 0);
+
+ hash = hash64(field, size);
+
+ return journal_file_find_field_object_with_hash(f,
+ field, size, hash,
+ ret, offset);
+}
+
int journal_file_find_data_object_with_hash(
JournalFile *f,
const void *data, uint64_t size, uint64_t hash,
l -= offsetof(Object, data.payload);
- if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
+ if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0))
return -EBADMSG;
if (rsize == size &&
ret, offset);
}
+static int journal_file_append_field(
+ JournalFile *f,
+ const void *field, uint64_t size,
+ Object **ret, uint64_t *offset) {
+
+ uint64_t hash, p;
+ uint64_t osize;
+ Object *o;
+ int r;
+
+ assert(f);
+ assert(field && size > 0);
+
+ hash = hash64(field, size);
+
+ r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
+ if (r < 0)
+ return r;
+ else if (r > 0) {
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+ }
+
+ osize = offsetof(Object, field.payload) + size;
+ r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
+ if (r < 0)
+ return r;
+
+ o->field.hash = htole64(hash);
+ memcpy(o->field.payload, field, size);
+
+ r = journal_file_link_field(f, o, p, hash);
+ if (r < 0)
+ return r;
+
+ /* The linking might have altered the window, so let's
+ * refresh our pointer */
+ r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
+ if (r < 0)
+ return r;
+
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
+ if (r < 0)
+ return r;
+#endif
+
+ if (ret)
+ *ret = o;
+
+ if (offset)
+ *offset = p;
+
+ return 0;
+}
+
static int journal_file_append_data(
JournalFile *f,
const void *data, uint64_t size,
Object *o;
int r;
bool compressed = false;
+ const void *eq;
assert(f);
assert(data || size == 0);
o->object.size = htole64(offsetof(Object, data.payload) + rsize);
o->object.flags |= OBJECT_COMPRESSED;
- log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
+ log_debug("Compressed data object %"PRIu64" -> %"PRIu64, size, rsize);
}
}
#endif
if (r < 0)
return r;
- r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
- if (r < 0)
- return r;
-
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
+ eq = memchr(data, '=', size);
+ if (eq && eq > data) {
+ uint64_t fp;
+ Object *fo;
+
+ /* Create field object ... */
+ r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
+ if (r < 0)
+ return r;
+
+ /* ... and link it in. */
+ o->data.next_field_offset = fo->field.head_data_offset;
+ fo->field.head_data_offset = le64toh(p);
+ }
+
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
+ if (r < 0)
+ return r;
+#endif
+
if (ret)
*ret = o;
uint64_t journal_file_entry_n_items(Object *o) {
assert(o);
- assert(o->object.type == OBJECT_ENTRY);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return 0;
return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
}
-static uint64_t journal_file_entry_array_n_items(Object *o) {
+uint64_t journal_file_entry_array_n_items(Object *o) {
assert(o);
- assert(o->object.type == OBJECT_ENTRY_ARRAY);
+
+ if (o->object.type != OBJECT_ENTRY_ARRAY)
+ return 0;
return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
}
+uint64_t journal_file_hash_table_n_items(Object *o) {
+ assert(o);
+
+ if (o->object.type != OBJECT_DATA_HASH_TABLE &&
+ o->object.type != OBJECT_FIELD_HASH_TABLE)
+ return 0;
+
+ return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
+}
+
static int link_entry_into_array(JournalFile *f,
le64_t *first,
le64_t *idx,
if (r < 0)
return r;
- r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
if (r < 0)
return r;
+#endif
o->entry_array.items[i] = htole64(p);
o->entry_array.next_entry_array_offset = htole64(q);
}
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
+
*idx = htole64(hidx + 1);
return 0;
assert(f);
assert(o);
assert(offset > 0);
- assert(o->object.type == OBJECT_ENTRY);
+
+ if (o->object.type != OBJECT_ENTRY)
+ return -EINVAL;
__sync_synchronize();
if (r < 0)
return r;
- /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
+ /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
if (f->header->head_entry_realtime == 0)
f->header->head_entry_realtime = o->entry.realtime;
o->entry.xor_hash = htole64(xor_hash);
o->entry.boot_id = f->header->boot_id;
- r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
if (r < 0)
return r;
+#endif
r = journal_file_link_entry(f, o, np);
if (r < 0)
__sync_synchronize();
if (ftruncate(f->fd, f->last_stat.st_size) < 0)
- log_error("Failed to to truncate file to its own size: %m");
+ log_error("Failed to truncate file to its own size: %m");
+}
+
+static int entry_item_cmp(const void *_a, const void *_b) {
+ const EntryItem *a = _a, *b = _b;
+
+ if (le64toh(a->object_offset) < le64toh(b->object_offset))
+ return -1;
+ if (le64toh(a->object_offset) > le64toh(b->object_offset))
+ return 1;
+ return 0;
}
int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
assert(f);
assert(iovec || n_iovec == 0);
- if (!f->writable)
- return -EPERM;
-
if (!ts) {
dual_timestamp_get(&_ts);
ts = &_ts;
ts->monotonic < le64toh(f->header->tail_entry_monotonic))
return -EINVAL;
+#ifdef HAVE_GCRYPT
r = journal_file_maybe_append_tag(f, ts->realtime);
if (r < 0)
return r;
+#endif
/* alloca() can't take 0, hence let's allocate at least one */
- items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
+ items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
for (i = 0; i < n_iovec; i++) {
uint64_t p;
items[i].hash = o->data.hash;
}
+ /* Order by the position on disk, in order to improve seek
+ * times for rotating media. */
+ qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
+
r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
journal_file_post_change(f);
return r;
}
-static int generic_array_get(JournalFile *f,
- uint64_t first,
- uint64_t i,
- Object **ret, uint64_t *offset) {
+typedef struct ChainCacheItem {
+ uint64_t first; /* the array at the begin of the chain */
+ uint64_t array; /* the cached array */
+ uint64_t begin; /* the first item in the cached array */
+ uint64_t total; /* the total number of items in all arrays before this one in the chain */
+ uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
+} ChainCacheItem;
+
+static void chain_cache_put(
+ Hashmap *h,
+ ChainCacheItem *ci,
+ uint64_t first,
+ uint64_t array,
+ uint64_t begin,
+ uint64_t total,
+ uint64_t last_index) {
+
+ if (!ci) {
+ /* If the chain item to cache for this chain is the
+ * first one it's not worth caching anything */
+ if (array == first)
+ return;
+
+ if (hashmap_size(h) >= CHAIN_CACHE_MAX)
+ ci = hashmap_steal_first(h);
+ else {
+ ci = new(ChainCacheItem, 1);
+ if (!ci)
+ return;
+ }
+
+ ci->first = first;
+
+ if (hashmap_put(h, &ci->first, ci) < 0) {
+ free(ci);
+ return;
+ }
+ } else
+ assert(ci->first == first);
+
+ ci->array = array;
+ ci->begin = begin;
+ ci->total = total;
+ ci->last_index = last_index;
+}
+
+static int generic_array_get(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *offset) {
Object *o;
- uint64_t p = 0, a;
+ uint64_t p = 0, a, t = 0;
int r;
+ ChainCacheItem *ci;
assert(f);
a = first;
+
+ /* Try the chain cache first */
+ ci = hashmap_get(f->chain_cache, &first);
+ if (ci && i > ci->total) {
+ a = ci->array;
+ i -= ci->total;
+ t = ci->total;
+ }
+
while (a > 0) {
- uint64_t n;
+ uint64_t k;
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
if (r < 0)
return r;
- n = journal_file_entry_array_n_items(o);
- if (i < n) {
+ k = journal_file_entry_array_n_items(o);
+ if (i < k) {
p = le64toh(o->entry_array.items[i]);
- break;
+ goto found;
}
- i -= n;
+ i -= k;
+ t += k;
a = le64toh(o->entry_array.next_entry_array_offset);
}
- if (a <= 0 || p <= 0)
- return 0;
+ return 0;
+
+found:
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, o->entry_array.items[0], t, i);
r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
if (r < 0)
return 1;
}
-static int generic_array_get_plus_one(JournalFile *f,
- uint64_t extra,
- uint64_t first,
- uint64_t i,
- Object **ret, uint64_t *offset) {
+static int generic_array_get_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t i,
+ Object **ret, uint64_t *offset) {
Object *o;
TEST_RIGHT
};
-static int generic_array_bisect(JournalFile *f,
- uint64_t first,
- uint64_t n,
- uint64_t needle,
- int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
- direction_t direction,
- Object **ret,
- uint64_t *offset,
- uint64_t *idx) {
-
- uint64_t a, p, t = 0, i = 0, last_p = 0;
+static int generic_array_bisect(
+ JournalFile *f,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset,
+ uint64_t *idx) {
+
+ uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
bool subtract_one = false;
Object *o, *array = NULL;
int r;
+ ChainCacheItem *ci;
assert(f);
assert(test_object);
+ /* Start with the first array in the chain */
a = first;
+
+ ci = hashmap_get(f->chain_cache, &first);
+ if (ci && n > ci->total) {
+ /* Ah, we have iterated this bisection array chain
+ * previously! Let's see if we can skip ahead in the
+ * chain, as far as the last time. But we can't jump
+ * backwards in the chain, so let's check that
+ * first. */
+
+ r = test_object(f, ci->begin, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_LEFT) {
+ /* OK, what we are looking for is right of the
+ * begin of this EntryArray, so let's jump
+ * straight to previously cached array in the
+ * chain */
+
+ a = ci->array;
+ n -= ci->total;
+ t = ci->total;
+ last_index = ci->last_index;
+ }
+ }
+
while (a > 0) {
uint64_t left, right, k, lp;
if (r == TEST_RIGHT) {
left = 0;
right -= 1;
+
+ if (last_index != (uint64_t) -1) {
+ assert(last_index <= right);
+
+ /* If we cached the last index we
+ * looked at, let's try to not to jump
+ * too wildly around and see if we can
+ * limit the range to look at early to
+ * the immediate neighbors of the last
+ * index we looked at. */
+
+ if (last_index > 0) {
+ uint64_t x = last_index - 1;
+
+ p = le64toh(array->entry_array.items[x]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = x;
+ else
+ left = x + 1;
+ }
+
+ if (last_index < right) {
+ uint64_t y = last_index + 1;
+
+ p = le64toh(array->entry_array.items[y]);
+ if (p <= 0)
+ return -EBADMSG;
+
+ r = test_object(f, p, needle);
+ if (r < 0)
+ return r;
+
+ if (r == TEST_FOUND)
+ r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
+
+ if (r == TEST_RIGHT)
+ right = y;
+ else
+ left = y + 1;
+ }
+
+ last_index = (uint64_t) -1;
+ }
+
for (;;) {
if (left == right) {
if (direction == DIRECTION_UP)
}
assert(left < right);
-
i = (left + right) / 2;
+
p = le64toh(array->entry_array.items[i]);
if (p <= 0)
return -EBADMSG;
n -= k;
t += k;
+ last_index = (uint64_t) -1;
a = le64toh(array->entry_array.next_entry_array_offset);
}
if (subtract_one && t == 0 && i == 0)
return 0;
+ /* Let's cache this item for the next invocation */
+ chain_cache_put(f->chain_cache, ci, first, a, array->entry_array.items[0], t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
+
if (subtract_one && i == 0)
p = last_p;
else if (subtract_one)
return 1;
}
-static int generic_array_bisect_plus_one(JournalFile *f,
- uint64_t extra,
- uint64_t first,
- uint64_t n,
- uint64_t needle,
- int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
- direction_t direction,
- Object **ret,
- uint64_t *offset,
- uint64_t *idx) {
+
+static int generic_array_bisect_plus_one(
+ JournalFile *f,
+ uint64_t extra,
+ uint64_t first,
+ uint64_t n,
+ uint64_t needle,
+ int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
+ direction_t direction,
+ Object **ret,
+ uint64_t *offset,
+ uint64_t *idx) {
int r;
bool step_back = false;
return 1;
}
-static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
+_pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
assert(f);
assert(p > 0);
return TEST_RIGHT;
}
+static inline int find_data_object_by_boot_id(
+ JournalFile *f,
+ sd_id128_t boot_id,
+ Object **o,
+ uint64_t *b) {
+ char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
+
+ sd_id128_to_string(boot_id, t + 9);
+ return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
+}
+
int journal_file_move_to_entry_by_monotonic(
JournalFile *f,
sd_id128_t boot_id,
Object **ret,
uint64_t *offset) {
- char t[9+32+1] = "_BOOT_ID=";
Object *o;
int r;
assert(f);
- sd_id128_to_string(boot_id, t + 9);
- r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
+ r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
if (r < 0)
return r;
if (r == 0)
direction_t direction,
Object **ret, uint64_t *offset) {
- char t[9+32+1] = "_BOOT_ID=";
Object *o, *d;
int r;
uint64_t b, z;
assert(f);
/* First, seek by time */
- sd_id128_to_string(boot_id, t + 9);
- r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
+ r = find_data_object_by_boot_id(f, boot_id, &o, &b);
if (r < 0)
return r;
if (r == 0)
ret, offset, NULL);
}
-static void *fsprg_state(JournalFile *f) {
- uint64_t a, b;
- assert(f);
-
- if (!f->authenticate)
- return NULL;
+void journal_file_dump(JournalFile *f) {
+ Object *o;
+ int r;
+ uint64_t p;
- a = le64toh(f->fsprg_header->header_size);
- b = le64toh(f->fsprg_header->state_size);
+ assert(f);
- if (a + b > f->fsprg_size)
- return NULL;
+ journal_file_print_header(f);
- return (uint8_t*) f->fsprg_header + a;
-}
+ p = le64toh(f->header->header_size);
+ while (p != 0) {
+ r = journal_file_move_to_object(f, -1, p, &o);
+ if (r < 0)
+ goto fail;
-static uint64_t journal_file_tag_seqnum(JournalFile *f) {
- uint64_t r;
+ switch (o->object.type) {
- assert(f);
+ case OBJECT_UNUSED:
+ printf("Type: OBJECT_UNUSED\n");
+ break;
- r = le64toh(f->header->n_tags) + 1;
- f->header->n_tags = htole64(r);
+ case OBJECT_DATA:
+ printf("Type: OBJECT_DATA\n");
+ break;
- return r;
-}
+ case OBJECT_FIELD:
+ printf("Type: OBJECT_FIELD\n");
+ break;
-int journal_file_append_tag(JournalFile *f) {
- Object *o;
- uint64_t p;
- int r;
+ case OBJECT_ENTRY:
+ printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
+ le64toh(o->entry.seqnum),
+ le64toh(o->entry.monotonic),
+ le64toh(o->entry.realtime));
+ break;
- assert(f);
+ case OBJECT_FIELD_HASH_TABLE:
+ printf("Type: OBJECT_FIELD_HASH_TABLE\n");
+ break;
- if (!f->authenticate)
- return 0;
+ case OBJECT_DATA_HASH_TABLE:
+ printf("Type: OBJECT_DATA_HASH_TABLE\n");
+ break;
- if (!f->hmac_running)
- return 0;
+ case OBJECT_ENTRY_ARRAY:
+ printf("Type: OBJECT_ENTRY_ARRAY\n");
+ break;
- log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
+ case OBJECT_TAG:
+ printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
+ le64toh(o->tag.seqnum),
+ le64toh(o->tag.epoch));
+ break;
- assert(f->hmac);
+ default:
+ printf("Type: unknown (%u)\n", o->object.type);
+ break;
+ }
- r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
- if (r < 0)
- return r;
+ if (o->object.flags & OBJECT_COMPRESSED)
+ printf("Flags: COMPRESSED\n");
- o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+ if (p == le64toh(f->header->tail_object_offset))
+ p = 0;
+ else
+ p = p + ALIGN64(le64toh(o->object.size));
+ }
- /* Add the tag object itself, so that we can protect its
- * header. This will exclude the actual hash value in it */
- r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
- if (r < 0)
- return r;
+ return;
+fail:
+ log_error("File corrupt");
+}
- /* Get the HMAC tag and store it in the object */
- memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
- f->hmac_running = false;
+static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
+ const char *x;
- return 0;
+ x = format_timestamp(buf, l, t);
+ if (x)
+ return x;
+ return " --- ";
}
-static int journal_file_hmac_start(JournalFile *f) {
- uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+void journal_file_print_header(JournalFile *f) {
+ char a[33], b[33], c[33], d[33];
+ char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
+ struct stat st;
+ char bytes[FORMAT_BYTES_MAX];
assert(f);
- if (!f->authenticate)
- return 0;
+ printf("File Path: %s\n"
+ "File ID: %s\n"
+ "Machine ID: %s\n"
+ "Boot ID: %s\n"
+ "Sequential Number ID: %s\n"
+ "State: %s\n"
+ "Compatible Flags:%s%s\n"
+ "Incompatible Flags:%s%s\n"
+ "Header size: %"PRIu64"\n"
+ "Arena size: %"PRIu64"\n"
+ "Data Hash Table Size: %"PRIu64"\n"
+ "Field Hash Table Size: %"PRIu64"\n"
+ "Rotate Suggested: %s\n"
+ "Head Sequential Number: %"PRIu64"\n"
+ "Tail Sequential Number: %"PRIu64"\n"
+ "Head Realtime Timestamp: %s\n"
+ "Tail Realtime Timestamp: %s\n"
+ "Tail Monotonic Timestamp: %s\n"
+ "Objects: %"PRIu64"\n"
+ "Entry Objects: %"PRIu64"\n",
+ f->path,
+ sd_id128_to_string(f->header->file_id, a),
+ sd_id128_to_string(f->header->machine_id, b),
+ sd_id128_to_string(f->header->boot_id, c),
+ sd_id128_to_string(f->header->seqnum_id, d),
+ f->header->state == STATE_OFFLINE ? "OFFLINE" :
+ f->header->state == STATE_ONLINE ? "ONLINE" :
+ f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
+ JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
+ (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
+ JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
+ (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
+ le64toh(f->header->header_size),
+ le64toh(f->header->arena_size),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
+ yes_no(journal_file_rotate_suggested(f, 0)),
+ le64toh(f->header->head_entry_seqnum),
+ le64toh(f->header->tail_entry_seqnum),
+ format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
+ format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
+ format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
+ le64toh(f->header->n_objects),
+ le64toh(f->header->n_entries));
- if (f->hmac_running)
- return 0;
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
+ printf("Data Objects: %"PRIu64"\n"
+ "Data Hash Table Fill: %.1f%%\n",
+ le64toh(f->header->n_data),
+ 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
- /* Prepare HMAC for next cycle */
- gcry_md_reset(f->hmac);
- FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
- gcry_md_setkey(f->hmac, key, sizeof(key));
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
+ printf("Field Objects: %"PRIu64"\n"
+ "Field Hash Table Fill: %.1f%%\n",
+ le64toh(f->header->n_fields),
+ 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
- f->hmac_running = true;
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+ printf("Tag Objects: %"PRIu64"\n",
+ le64toh(f->header->n_tags));
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
+ printf("Entry Array Objects: %"PRIu64"\n",
+ le64toh(f->header->n_entry_arrays));
- return 0;
+ if (fstat(f->fd, &st) >= 0)
+ printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
}
-static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
- uint64_t t;
+int journal_file_open(
+ const char *fname,
+ int flags,
+ mode_t mode,
+ bool compress,
+ bool seal,
+ JournalMetrics *metrics,
+ MMapCache *mmap_cache,
+ JournalFile *template,
+ JournalFile **ret) {
- assert(f);
- assert(epoch);
- assert(f->authenticate);
+ JournalFile *f;
+ int r;
+ bool newly_created = false;
- if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
- le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
- return -ENOTSUP;
+ assert(fname);
+ assert(ret);
- if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
- return -ESTALE;
+ if ((flags & O_ACCMODE) != O_RDONLY &&
+ (flags & O_ACCMODE) != O_RDWR)
+ return -EINVAL;
- t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
- t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
+ if (!endswith(fname, ".journal") &&
+ !endswith(fname, ".journal~"))
+ return -EINVAL;
- *epoch = t;
- return 0;
-}
+ f = new0(JournalFile, 1);
+ if (!f)
+ return -ENOMEM;
-static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
- uint64_t goal, epoch;
- int r;
- assert(f);
+ f->fd = -1;
+ f->mode = mode;
- if (!f->authenticate)
- return 0;
+ f->flags = flags;
+ f->prot = prot_from_flags(flags);
+ f->writable = (flags & O_ACCMODE) != O_RDONLY;
+#ifdef HAVE_XZ
+ f->compress = compress;
+#endif
+#ifdef HAVE_GCRYPT
+ f->seal = seal;
+#endif
- r = journal_file_get_epoch(f, realtime, &goal);
- if (r < 0)
- return r;
+ if (mmap_cache)
+ f->mmap = mmap_cache_ref(mmap_cache);
+ else {
+ f->mmap = mmap_cache_new();
+ if (!f->mmap) {
+ r = -ENOMEM;
+ goto fail;
+ }
+ }
- epoch = FSPRG_GetEpoch(fsprg_state(f));
- if (epoch > goal)
- return -ESTALE;
+ f->path = strdup(fname);
+ if (!f->path) {
+ r = -ENOMEM;
+ goto fail;
+ }
- return epoch != goal;
-}
+ f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
+ if (!f->chain_cache) {
+ r = -ENOMEM;
+ goto fail;
+ }
-static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
- uint64_t goal, epoch;
- int r;
+ f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
+ if (f->fd < 0) {
+ r = -errno;
+ goto fail;
+ }
- assert(f);
+ if (fstat(f->fd, &f->last_stat) < 0) {
+ r = -errno;
+ goto fail;
+ }
- if (!f->authenticate)
- return 0;
+ if (f->last_stat.st_size == 0 && f->writable) {
+#ifdef HAVE_XATTR
+ uint64_t crtime;
+
+ /* Let's attach the creation time to the journal file,
+ * so that the vacuuming code knows the age of this
+ * file even if the file might end up corrupted one
+ * day... Ideally we'd just use the creation time many
+ * file systems maintain for each file, but there is
+ * currently no usable API to query this, hence let's
+ * emulate this via extended attributes. If extended
+ * attributes are not supported we'll just skip this,
+ * and rely solely on mtime/atime/ctime of the file.*/
+
+ crtime = htole64((uint64_t) now(CLOCK_REALTIME));
+ fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
+#endif
- r = journal_file_get_epoch(f, realtime, &goal);
- if (r < 0)
- return r;
+#ifdef HAVE_GCRYPT
+ /* Try to load the FSPRG state, and if we can't, then
+ * just don't do sealing */
+ if (f->seal) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ f->seal = false;
+ }
+#endif
- epoch = FSPRG_GetEpoch(fsprg_state(f));
- if (epoch < goal)
- log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
+ r = journal_file_init_header(f, template);
+ if (r < 0)
+ goto fail;
- for (;;) {
- if (epoch > goal)
- return -ESTALE;
- if (epoch == goal)
- return 0;
+ if (fstat(f->fd, &f->last_stat) < 0) {
+ r = -errno;
+ goto fail;
+ }
- FSPRG_Evolve(fsprg_state(f));
- epoch = FSPRG_GetEpoch(fsprg_state(f));
+ newly_created = true;
}
-}
-
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
- int r;
- assert(f);
+ if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
+ r = -EIO;
+ goto fail;
+ }
- if (!f->authenticate)
- return 0;
+ f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
+ if (f->header == MAP_FAILED) {
+ f->header = NULL;
+ r = -errno;
+ goto fail;
+ }
- r = journal_file_need_evolve(f, realtime);
- if (r <= 0)
- return 0;
+ if (!newly_created) {
+ r = journal_file_verify_header(f);
+ if (r < 0)
+ goto fail;
+ }
- r = journal_file_append_tag(f);
- if (r < 0)
- return r;
-
- r = journal_file_evolve(f, realtime);
- if (r < 0)
- return r;
-
- r = journal_file_hmac_start(f);
- if (r < 0)
- return r;
-
- return 0;
-}
-
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
- int r;
- Object *o;
-
- assert(f);
-
- if (!f->authenticate)
- return 0;
-
- r = journal_file_hmac_start(f);
- if (r < 0)
- return r;
-
- r = journal_file_move_to_object(f, type, p, &o);
- if (r < 0)
- return r;
-
- gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
-
- switch (o->object.type) {
-
- case OBJECT_DATA:
- /* All but: hash and payload are mutable */
- gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
- gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
- break;
-
- case OBJECT_ENTRY:
- /* All */
- gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
- break;
-
- case OBJECT_FIELD_HASH_TABLE:
- case OBJECT_DATA_HASH_TABLE:
- case OBJECT_ENTRY_ARRAY:
- /* Nothing: everything is mutable */
- break;
-
- case OBJECT_TAG:
- /* All but the tag itself */
- gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int journal_file_hmac_put_header(JournalFile *f) {
- int r;
-
- assert(f);
-
- if (!f->authenticate)
- return 0;
-
- r = journal_file_hmac_start(f);
- if (r < 0)
- return r;
-
- /* All but state+reserved, boot_id, arena_size,
- * tail_object_offset, n_objects, n_entries, tail_seqnum,
- * head_entry_realtime, tail_entry_realtime,
- * tail_entry_monotonic, n_data, n_fields, header_tag */
-
- gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
- gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
- gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
- gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
- gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
-
- return 0;
-}
-
-static int journal_file_load_fsprg(JournalFile *f) {
- int r, fd = -1;
- char *p = NULL;
- struct stat st;
- FSPRGHeader *m = NULL;
- sd_id128_t machine;
-
- assert(f);
-
- if (!f->authenticate)
- return 0;
-
- r = sd_id128_get_machine(&machine);
- if (r < 0)
- return r;
-
- if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
- SD_ID128_FORMAT_VAL(machine)) < 0)
- return -ENOMEM;
-
- fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
- if (fd < 0) {
- log_error("Failed to open %s: %m", p);
- r = -errno;
- goto finish;
- }
-
- if (fstat(fd, &st) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
- r = -ENODATA;
- goto finish;
- }
-
- m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
- if (m == MAP_FAILED) {
- m = NULL;
- r = -errno;
- goto finish;
- }
-
- if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
- r = -EBADMSG;
- goto finish;
- }
-
- if (m->incompatible_flags != 0) {
- r = -EPROTONOSUPPORT;
- goto finish;
- }
-
- if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
- r = -EBADMSG;
- goto finish;
- }
-
- if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
- r = -EBADMSG;
- goto finish;
- }
-
- f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
- if ((uint64_t) st.st_size < f->fsprg_size) {
- r = -ENODATA;
- goto finish;
- }
-
- if (!sd_id128_equal(machine, m->machine_id)) {
- r = -EHOSTDOWN;
- goto finish;
- }
-
- if (le64toh(m->fsprg_start_usec) <= 0 ||
- le64toh(m->fsprg_interval_usec) <= 0) {
- r = -EBADMSG;
- goto finish;
- }
-
- f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
- if (f->fsprg_header == MAP_FAILED) {
- f->fsprg_header = NULL;
- r = -errno;
- goto finish;
- }
-
- r = 0;
-
-finish:
- if (m)
- munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
-
- if (fd >= 0)
- close_nointr_nofail(fd);
-
- free(p);
- return r;
-}
-
-static int journal_file_setup_hmac(JournalFile *f) {
- gcry_error_t e;
-
- if (!f->authenticate)
- return 0;
-
- e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
- if (e != 0)
- return -ENOTSUP;
-
- return 0;
-}
-
-static int journal_file_append_first_tag(JournalFile *f) {
- int r;
- uint64_t p;
-
- if (!f->authenticate)
- return 0;
-
- log_debug("Calculating first tag...");
-
- r = journal_file_hmac_put_header(f);
- if (r < 0)
- return r;
-
- p = le64toh(f->header->field_hash_table_offset);
- if (p < offsetof(Object, hash_table.items))
- return -EINVAL;
- p -= offsetof(Object, hash_table.items);
-
- r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
- if (r < 0)
- return r;
-
- p = le64toh(f->header->data_hash_table_offset);
- if (p < offsetof(Object, hash_table.items))
- return -EINVAL;
- p -= offsetof(Object, hash_table.items);
-
- r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
- if (r < 0)
- return r;
-
- r = journal_file_append_tag(f);
- if (r < 0)
- return r;
-
- return 0;
-}
-
-static int journal_file_object_verify(JournalFile *f, Object *o) {
- assert(f);
- assert(o);
-
- /* This does various superficial tests about the length an
- * possible field values. It does not follow any references to
- * other objects. */
-
- switch (o->object.type) {
- case OBJECT_DATA:
- if (le64toh(o->data.entry_offset) <= 0 ||
- le64toh(o->data.n_entries) <= 0)
- return -EBADMSG;
-
- if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
- return -EBADMSG;
- break;
-
- case OBJECT_FIELD:
- if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
- return -EBADMSG;
- break;
-
- case OBJECT_ENTRY:
- if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
- return -EBADMSG;
-
- if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
- return -EBADMSG;
-
- if (le64toh(o->entry.seqnum) <= 0 ||
- le64toh(o->entry.realtime) <= 0)
- return -EBADMSG;
-
- break;
-
- case OBJECT_DATA_HASH_TABLE:
- case OBJECT_FIELD_HASH_TABLE:
- if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
- return -EBADMSG;
-
- break;
-
- case OBJECT_ENTRY_ARRAY:
- if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
- return -EBADMSG;
-
- break;
-
- case OBJECT_TAG:
- if (le64toh(o->object.size) != sizeof(TagObject))
- return -EBADMSG;
- break;
- }
-
- return 0;
-}
-
-static void draw_progress(uint64_t p, usec_t *last_usec) {
- unsigned n, i, j, k;
- usec_t z, x;
-
- if (!isatty(STDOUT_FILENO))
- return;
-
- z = now(CLOCK_MONOTONIC);
- x = *last_usec;
-
- if (x != 0 && x + 40 * USEC_PER_MSEC > z)
- return;
-
- *last_usec = z;
-
- n = (3 * columns()) / 4;
- j = (n * (unsigned) p) / 65535ULL;
- k = n - j;
-
- fputs("\r\x1B[?25l", stdout);
-
- for (i = 0; i < j; i++)
- fputs("\xe2\x96\x88", stdout);
-
- for (i = 0; i < k; i++)
- fputs("\xe2\x96\x91", stdout);
-
- printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
-
- fputs("\r\x1B[?25h", stdout);
- fflush(stdout);
-}
-
-static void flush_progress(void) {
- unsigned n, i;
-
- if (!isatty(STDOUT_FILENO))
- return;
-
- n = (3 * columns()) / 4;
-
- putchar('\r');
-
- for (i = 0; i < n + 5; i++)
- putchar(' ');
-
- putchar('\r');
- fflush(stdout);
-}
-
-static int write_uint64(int fd, uint64_t p) {
- ssize_t k;
-
- k = write(fd, &p, sizeof(p));
- if (k < 0)
- return -errno;
- if (k != sizeof(p))
- return -EIO;
-
- return 0;
-}
-
-static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
- uint64_t a, b;
- int r;
-
- assert(m);
- assert(fd >= 0);
-
- /* Bisection ... */
-
- a = 0; b = n;
- while (a < b) {
- uint64_t c, *z;
-
- c = (a + b) / 2;
-
- r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
- if (r < 0)
- return r;
-
- if (*z == p)
- return 1;
-
- if (p < *z)
- b = c;
- else
- a = c;
- }
-
- return 0;
-}
-
-int journal_file_verify(JournalFile *f, const char *key) {
- int r;
- Object *o;
- uint64_t p = 0;
- uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
- sd_id128_t entry_boot_id;
- bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
- uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
- usec_t last_usec = 0;
- int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
- char data_path[] = "/var/tmp/journal-data-XXXXXX",
- entry_path[] = "/var/tmp/journal-entry-XXXXXX",
- entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
-
- assert(f);
-
- data_fd = mkostemp(data_path, O_CLOEXEC);
- if (data_fd < 0) {
- log_error("Failed to create data file: %m");
- goto fail;
- }
- unlink(data_path);
-
- entry_fd = mkostemp(entry_path, O_CLOEXEC);
- if (entry_fd < 0) {
- log_error("Failed to create entry file: %m");
- goto fail;
- }
- unlink(entry_path);
-
- entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
- if (entry_array_fd < 0) {
- log_error("Failed to create entry array file: %m");
- goto fail;
- }
- unlink(entry_array_path);
-
- /* First iteration: we go through all objects, verify the
- * superficial structure, headers, hashes. */
-
- r = journal_file_hmac_put_header(f);
- if (r < 0) {
- log_error("Failed to calculate HMAC of header.");
- goto fail;
- }
-
- p = le64toh(f->header->header_size);
- while (p != 0) {
- draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
- r = journal_file_move_to_object(f, -1, p, &o);
- if (r < 0) {
- log_error("Invalid object at %llu", (unsigned long long) p);
- goto fail;
- }
-
- if (le64toh(f->header->tail_object_offset) < p) {
- log_error("Invalid tail object pointer.");
- r = -EBADMSG;
- goto fail;
- }
-
- n_objects ++;
-
- r = journal_file_object_verify(f, o);
- if (r < 0) {
- log_error("Invalid object contents at %llu", (unsigned long long) p);
- goto fail;
- }
-
- r = journal_file_hmac_put_object(f, -1, p);
- if (r < 0) {
- log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
- goto fail;
- }
-
- if (o->object.flags & OBJECT_COMPRESSED &&
- !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
- log_error("Compressed object without compression at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- if (o->object.flags & OBJECT_COMPRESSED &&
- o->object.type != OBJECT_DATA) {
- log_error("Compressed non-data object at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- if (o->object.type == OBJECT_TAG) {
-
- if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
- log_error("Tag object without authentication at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- if (le64toh(o->tag.seqnum) != tag_seqnum) {
- log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- } else if (o->object.type == OBJECT_ENTRY) {
-
- r = write_uint64(entry_fd, p);
- if (r < 0)
- goto fail;
-
- if (!entry_seqnum_set &&
- le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
- log_error("Head entry sequence number incorrect");
- r = -EBADMSG;
- goto fail;
- }
-
- if (entry_seqnum_set &&
- entry_seqnum >= le64toh(o->entry.seqnum)) {
- log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- entry_seqnum = le64toh(o->entry.seqnum);
- entry_seqnum_set = true;
-
- if (entry_monotonic_set &&
- sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
- entry_monotonic > le64toh(o->entry.monotonic)) {
- log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- entry_monotonic = le64toh(o->entry.monotonic);
- entry_boot_id = o->entry.boot_id;
- entry_monotonic_set = true;
-
- if (!entry_realtime_set &&
- le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
- log_error("Head entry realtime timestamp incorrect");
- r = -EBADMSG;
- goto fail;
- }
-
- entry_realtime = le64toh(o->entry.realtime);
- entry_realtime_set = true;
-
- n_entries ++;
- } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
-
- r = write_uint64(entry_array_fd, p);
- if (r < 0)
- goto fail;
-
- if (p == le64toh(f->header->entry_array_offset)) {
- if (found_main_entry_array) {
- log_error("More than one main entry array at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- found_main_entry_array = true;
- }
-
- n_entry_arrays++;
-
- } else if (o->object.type == OBJECT_DATA) {
-
- r = write_uint64(data_fd, p);
- if (r < 0)
- goto fail;
-
- n_data++;
-
- } else if (o->object.type == OBJECT_FIELD)
- n_fields++;
- else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
- n_data_hash_tables++;
-
- if (n_data_hash_tables > 1) {
- log_error("More than one data hash table at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
- le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
- log_error("Header fields for data hash table invalid.");
- r = -EBADMSG;
- goto fail;
- }
- } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
- n_field_hash_tables++;
-
- if (n_field_hash_tables > 1) {
- log_error("More than one field hash table at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
- le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
- log_error("Header fields for field hash table invalid.");
- r = -EBADMSG;
- goto fail;
- }
- } else if (o->object.type >= _OBJECT_TYPE_MAX)
- n_weird ++;
-
- if (p == le64toh(f->header->tail_object_offset))
- p = 0;
- else
- p = p + ALIGN64(le64toh(o->object.size));
- }
-
- if (n_objects != le64toh(f->header->n_objects)) {
- log_error("Object number mismatch");
- r = -EBADMSG;
- goto fail;
- }
-
- if (n_entries != le64toh(f->header->n_entries)) {
- log_error("Entry number mismatch");
- r = -EBADMSG;
- goto fail;
- }
-
- if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
- n_data != le64toh(f->header->n_data)) {
- log_error("Data number mismatch");
- r = -EBADMSG;
- goto fail;
- }
-
- if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
- n_fields != le64toh(f->header->n_fields)) {
- log_error("Field number mismatch");
- r = -EBADMSG;
- goto fail;
- }
-
- if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
- tag_seqnum != le64toh(f->header->n_tags)) {
- log_error("Tag number mismatch");
- r = -EBADMSG;
- goto fail;
- }
-
- if (n_data_hash_tables != 1) {
- log_error("Missing data hash table");
- r = -EBADMSG;
- goto fail;
- }
-
- if (n_field_hash_tables != 1) {
- log_error("Missing field hash table");
- r = -EBADMSG;
- goto fail;
- }
-
- if (!found_main_entry_array) {
- log_error("Missing entry array");
- r = -EBADMSG;
- goto fail;
- }
-
- if (entry_seqnum_set &&
- entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
- log_error("Invalid tail seqnum");
- r = -EBADMSG;
- goto fail;
- }
-
- if (entry_monotonic_set &&
- (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
- entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
- log_error("Invalid tail monotonic timestamp");
- r = -EBADMSG;
- goto fail;
- }
-
- if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
- log_error("Invalid tail realtime timestamp");
- r = -EBADMSG;
- goto fail;
- }
-
- /* Second iteration: we go through all objects again, this
- * time verify all pointers. */
-
- p = le64toh(f->header->header_size);
- while (p != 0) {
- draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
- r = journal_file_move_to_object(f, -1, p, &o);
- if (r < 0) {
- log_error("Invalid object at %llu", (unsigned long long) p);
- goto fail;
- }
-
- if (o->object.type == OBJECT_ENTRY_ARRAY) {
- uint64_t i = 0, n;
-
- if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
- !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
- log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
-
- n = journal_file_entry_array_n_items(o);
- for (i = 0; i < n; i++) {
- if (le64toh(o->entry_array.items[i]) != 0 &&
- !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
-
- log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
- r = -EBADMSG;
- goto fail;
- }
- }
-
- }
-
- r = journal_file_move_to_object(f, -1, p, &o);
- if (r < 0) {
- log_error("Invalid object at %llu", (unsigned long long) p);
- goto fail;
- }
-
- if (p == le64toh(f->header->tail_object_offset))
- p = 0;
- else
- p = p + ALIGN64(le64toh(o->object.size));
- }
-
- flush_progress();
-
- mmap_cache_close_fd(f->mmap, data_fd);
- mmap_cache_close_fd(f->mmap, entry_fd);
- mmap_cache_close_fd(f->mmap, entry_array_fd);
-
- close_nointr_nofail(data_fd);
- close_nointr_nofail(entry_fd);
- close_nointr_nofail(entry_array_fd);
-
- return 0;
-
-fail:
- flush_progress();
-
- log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
- f->path,
- (unsigned long long) p,
- (unsigned long long) f->last_stat.st_size,
- (unsigned long long) (100 * p / f->last_stat.st_size));
-
- if (data_fd >= 0) {
- mmap_cache_close_fd(f->mmap, data_fd);
- close_nointr_nofail(data_fd);
- }
-
- if (entry_fd >= 0) {
- mmap_cache_close_fd(f->mmap, entry_fd);
- close_nointr_nofail(entry_fd);
- }
-
- if (entry_array_fd >= 0) {
- mmap_cache_close_fd(f->mmap, entry_array_fd);
- close_nointr_nofail(entry_array_fd);
- }
-
- return r;
-}
-
-void journal_file_dump(JournalFile *f) {
- Object *o;
- int r;
- uint64_t p;
-
- assert(f);
-
- journal_file_print_header(f);
-
- p = le64toh(f->header->header_size);
- while (p != 0) {
- r = journal_file_move_to_object(f, -1, p, &o);
- if (r < 0)
- goto fail;
-
- switch (o->object.type) {
-
- case OBJECT_UNUSED:
- printf("Type: OBJECT_UNUSED\n");
- break;
-
- case OBJECT_DATA:
- printf("Type: OBJECT_DATA\n");
- break;
-
- case OBJECT_ENTRY:
- printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
- (unsigned long long) le64toh(o->entry.seqnum),
- (unsigned long long) le64toh(o->entry.monotonic),
- (unsigned long long) le64toh(o->entry.realtime));
- break;
-
- case OBJECT_FIELD_HASH_TABLE:
- printf("Type: OBJECT_FIELD_HASH_TABLE\n");
- break;
-
- case OBJECT_DATA_HASH_TABLE:
- printf("Type: OBJECT_DATA_HASH_TABLE\n");
- break;
-
- case OBJECT_ENTRY_ARRAY:
- printf("Type: OBJECT_ENTRY_ARRAY\n");
- break;
-
- case OBJECT_TAG:
- printf("Type: OBJECT_TAG %llu\n",
- (unsigned long long) le64toh(o->tag.seqnum));
- break;
- }
-
- if (o->object.flags & OBJECT_COMPRESSED)
- printf("Flags: COMPRESSED\n");
-
- if (p == le64toh(f->header->tail_object_offset))
- p = 0;
- else
- p = p + ALIGN64(le64toh(o->object.size));
- }
-
- return;
-fail:
- log_error("File corrupt");
-}
-
-void journal_file_print_header(JournalFile *f) {
- char a[33], b[33], c[33];
- char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
-
- assert(f);
-
- printf("File Path: %s\n"
- "File ID: %s\n"
- "Machine ID: %s\n"
- "Boot ID: %s\n"
- "Sequential Number ID: %s\n"
- "State: %s\n"
- "Compatible Flags:%s%s\n"
- "Incompatible Flags:%s%s\n"
- "Header size: %llu\n"
- "Arena size: %llu\n"
- "Data Hash Table Size: %llu\n"
- "Field Hash Table Size: %llu\n"
- "Objects: %llu\n"
- "Entry Objects: %llu\n"
- "Rotate Suggested: %s\n"
- "Head Sequential Number: %llu\n"
- "Tail Sequential Number: %llu\n"
- "Head Realtime Timestamp: %s\n"
- "Tail Realtime Timestamp: %s\n",
- f->path,
- sd_id128_to_string(f->header->file_id, a),
- sd_id128_to_string(f->header->machine_id, b),
- sd_id128_to_string(f->header->boot_id, c),
- sd_id128_to_string(f->header->seqnum_id, c),
- f->header->state == STATE_OFFLINE ? "offline" :
- f->header->state == STATE_ONLINE ? "online" :
- f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
- (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
- (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
- (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
- (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
- (unsigned long long) le64toh(f->header->header_size),
- (unsigned long long) le64toh(f->header->arena_size),
- (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
- (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
- (unsigned long long) le64toh(f->header->n_objects),
- (unsigned long long) le64toh(f->header->n_entries),
- yes_no(journal_file_rotate_suggested(f)),
- (unsigned long long) le64toh(f->header->head_entry_seqnum),
- (unsigned long long) le64toh(f->header->tail_entry_seqnum),
- format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
- format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
-
- if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
- printf("Data Objects: %llu\n"
- "Data Hash Table Fill: %.1f%%\n",
- (unsigned long long) le64toh(f->header->n_data),
- 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
-
- if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
- printf("Field Objects: %llu\n"
- "Field Hash Table Fill: %.1f%%\n",
- (unsigned long long) le64toh(f->header->n_fields),
- 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
-}
-
-int journal_file_open(
- const char *fname,
- int flags,
- mode_t mode,
- bool compress,
- bool authenticate,
- JournalMetrics *metrics,
- MMapCache *mmap_cache,
- JournalFile *template,
- JournalFile **ret) {
-
- JournalFile *f;
- int r;
- bool newly_created = false;
-
- assert(fname);
-
- if ((flags & O_ACCMODE) != O_RDONLY &&
- (flags & O_ACCMODE) != O_RDWR)
- return -EINVAL;
-
- if (!endswith(fname, ".journal"))
- return -EINVAL;
-
- f = new0(JournalFile, 1);
- if (!f)
- return -ENOMEM;
-
- f->fd = -1;
- f->mode = mode;
-
- f->flags = flags;
- f->prot = prot_from_flags(flags);
- f->writable = (flags & O_ACCMODE) != O_RDONLY;
- f->compress = compress;
- f->authenticate = authenticate;
-
- if (mmap_cache)
- f->mmap = mmap_cache_ref(mmap_cache);
- else {
- /* One context for each type, plus the zeroth catchall
- * context. One fd for the file plus one for each type
- * (which we need during verification */
- f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
- if (!f->mmap) {
- r = -ENOMEM;
- goto fail;
- }
- }
-
- f->path = strdup(fname);
- if (!f->path) {
- r = -ENOMEM;
- goto fail;
- }
-
- f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
- if (f->fd < 0) {
- r = -errno;
- goto fail;
- }
-
- if (fstat(f->fd, &f->last_stat) < 0) {
- r = -errno;
- goto fail;
- }
-
- if (f->last_stat.st_size == 0 && f->writable) {
- newly_created = true;
-
- /* Try to load the FSPRG state, and if we can't, then
- * just don't do authentication */
- r = journal_file_load_fsprg(f);
- if (r < 0)
- f->authenticate = false;
-
- r = journal_file_init_header(f, template);
- if (r < 0)
- goto fail;
-
- if (fstat(f->fd, &f->last_stat) < 0) {
- r = -errno;
- goto fail;
- }
- }
-
- if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
- r = -EIO;
- goto fail;
- }
-
- f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
- if (f->header == MAP_FAILED) {
- f->header = NULL;
- r = -errno;
- goto fail;
- }
-
- if (!newly_created) {
- r = journal_file_verify_header(f);
- if (r < 0)
- goto fail;
- }
-
- if (!newly_created && f->writable) {
- r = journal_file_load_fsprg(f);
- if (r < 0)
- goto fail;
- }
+#ifdef HAVE_GCRYPT
+ if (!newly_created && f->writable) {
+ r = journal_file_fss_load(f);
+ if (r < 0)
+ goto fail;
+ }
+#endif
if (f->writable) {
if (metrics) {
r = journal_file_refresh_header(f);
if (r < 0)
goto fail;
-
- r = journal_file_setup_hmac(f);
- if (r < 0)
- goto fail;
}
+#ifdef HAVE_GCRYPT
+ r = journal_file_hmac_setup(f);
+ if (r < 0)
+ goto fail;
+#endif
+
if (newly_created) {
r = journal_file_setup_field_hash_table(f);
if (r < 0)
if (r < 0)
goto fail;
+#ifdef HAVE_GCRYPT
r = journal_file_append_first_tag(f);
if (r < 0)
goto fail;
+#endif
}
r = journal_file_map_field_hash_table(f);
if (r < 0)
goto fail;
- if (ret)
- *ret = f;
-
+ *ret = f;
return 0;
fail:
return r;
}
-int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
- char *p;
+int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
+ _cleanup_free_ char *p = NULL;
size_t l;
JournalFile *old_file, *new_file = NULL;
int r;
return -EINVAL;
l = strlen(old_file->path);
-
- p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
- if (!p)
+ r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
+ (int) l - 8, old_file->path,
+ SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
+ le64toh((*f)->header->head_entry_seqnum),
+ le64toh((*f)->header->head_entry_realtime));
+ if (r < 0)
return -ENOMEM;
- memcpy(p, old_file->path, l - 8);
- p[l-8] = '@';
- sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
- snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
- "-%016llx-%016llx.journal",
- (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
- (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
-
r = rename(old_file->path, p);
- free(p);
-
if (r < 0)
return -errno;
old_file->header->state = STATE_ARCHIVED;
- r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
+ r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
journal_file_close(old_file);
*f = new_file;
int flags,
mode_t mode,
bool compress,
- bool authenticate,
+ bool seal,
JournalMetrics *metrics,
- MMapCache *mmap,
+ MMapCache *mmap_cache,
JournalFile *template,
JournalFile **ret) {
int r;
size_t l;
- char *p;
+ _cleanup_free_ char *p = NULL;
- r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
+ r = journal_file_open(fname, flags, mode, compress, seal,
+ metrics, mmap_cache, template, ret);
if (r != -EBADMSG && /* corrupted */
r != -ENODATA && /* truncated */
r != -EHOSTDOWN && /* other machine */
l = strlen(fname);
if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
- (int) (l-8), fname,
+ (int) l - 8, fname,
(unsigned long long) now(CLOCK_REALTIME),
random_ull()) < 0)
return -ENOMEM;
r = rename(fname, p);
- free(p);
if (r < 0)
return -errno;
log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
- return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
-}
-
-struct vacuum_info {
- off_t usage;
- char *filename;
-
- uint64_t realtime;
- sd_id128_t seqnum_id;
- uint64_t seqnum;
-
- bool have_seqnum;
-};
-
-static int vacuum_compare(const void *_a, const void *_b) {
- const struct vacuum_info *a, *b;
-
- a = _a;
- b = _b;
-
- if (a->have_seqnum && b->have_seqnum &&
- sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
- if (a->seqnum < b->seqnum)
- return -1;
- else if (a->seqnum > b->seqnum)
- return 1;
- else
- return 0;
- }
-
- if (a->realtime < b->realtime)
- return -1;
- else if (a->realtime > b->realtime)
- return 1;
- else if (a->have_seqnum && b->have_seqnum)
- return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
- else
- return strcmp(a->filename, b->filename);
-}
-
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
- DIR *d;
- int r = 0;
- struct vacuum_info *list = NULL;
- unsigned n_list = 0, n_allocated = 0, i;
- uint64_t sum = 0;
-
- assert(directory);
-
- if (max_use <= 0)
- return 0;
-
- d = opendir(directory);
- if (!d)
- return -errno;
-
- for (;;) {
- int k;
- struct dirent buf, *de;
- size_t q;
- struct stat st;
- char *p;
- unsigned long long seqnum = 0, realtime;
- sd_id128_t seqnum_id;
- bool have_seqnum;
-
- k = readdir_r(d, &buf, &de);
- if (k != 0) {
- r = -k;
- goto finish;
- }
-
- if (!de)
- break;
-
- if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
- continue;
-
- if (!S_ISREG(st.st_mode))
- continue;
-
- q = strlen(de->d_name);
-
- if (endswith(de->d_name, ".journal")) {
-
- /* Vacuum archived files */
-
- if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
- continue;
-
- if (de->d_name[q-8-16-1] != '-' ||
- de->d_name[q-8-16-1-16-1] != '-' ||
- de->d_name[q-8-16-1-16-1-32-1] != '@')
- continue;
-
- p = strdup(de->d_name);
- if (!p) {
- r = -ENOMEM;
- goto finish;
- }
-
- de->d_name[q-8-16-1-16-1] = 0;
- if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
- free(p);
- continue;
- }
-
- if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
- free(p);
- continue;
- }
-
- have_seqnum = true;
-
- } else if (endswith(de->d_name, ".journal~")) {
- unsigned long long tmp;
-
- /* Vacuum corrupted files */
-
- if (q < 1 + 16 + 1 + 16 + 8 + 1)
- continue;
-
- if (de->d_name[q-1-8-16-1] != '-' ||
- de->d_name[q-1-8-16-1-16-1] != '@')
- continue;
-
- p = strdup(de->d_name);
- if (!p) {
- r = -ENOMEM;
- goto finish;
- }
-
- if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
- free(p);
- continue;
- }
-
- have_seqnum = false;
- } else
- continue;
-
- if (n_list >= n_allocated) {
- struct vacuum_info *j;
-
- n_allocated = MAX(n_allocated * 2U, 8U);
- j = realloc(list, n_allocated * sizeof(struct vacuum_info));
- if (!j) {
- free(p);
- r = -ENOMEM;
- goto finish;
- }
-
- list = j;
- }
-
- list[n_list].filename = p;
- list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
- list[n_list].seqnum = seqnum;
- list[n_list].realtime = realtime;
- list[n_list].seqnum_id = seqnum_id;
- list[n_list].have_seqnum = have_seqnum;
-
- sum += list[n_list].usage;
-
- n_list ++;
- }
-
- if (n_list > 0)
- qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
-
- for(i = 0; i < n_list; i++) {
- struct statvfs ss;
-
- if (fstatvfs(dirfd(d), &ss) < 0) {
- r = -errno;
- goto finish;
- }
-
- if (sum <= max_use &&
- (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
- break;
-
- if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
- log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
- sum -= list[i].usage;
- } else if (errno != ENOENT)
- log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
- }
-
-finish:
- for (i = 0; i < n_list; i++)
- free(list[i].filename);
-
- free(list);
-
- if (d)
- closedir(d);
-
- return r;
+ return journal_file_open(fname, flags, mode, compress, seal,
+ metrics, mmap_cache, template, ret);
}
int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
ts.monotonic = le64toh(o->entry.monotonic);
ts.realtime = le64toh(o->entry.realtime);
- if (to->tail_entry_monotonic_valid &&
- ts.monotonic < le64toh(to->header->tail_entry_monotonic))
- return -EINVAL;
-
n = journal_file_entry_n_items(o);
items = alloca(sizeof(EntryItem) * n);
#ifdef HAVE_XZ
uint64_t rsize;
- if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
+ if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0))
return -EBADMSG;
data = from->compress_buffer;
if (m->keep_free == (uint64_t) -1) {
if (fs_size > 0) {
- m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
+ m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
m->keep_free = DEFAULT_KEEP_FREE_UPPER;
m->keep_free = DEFAULT_KEEP_FREE;
}
- log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
- format_bytes(a, sizeof(a), m->max_use),
- format_bytes(b, sizeof(b), m->max_size),
- format_bytes(c, sizeof(c), m->min_size),
- format_bytes(d, sizeof(d), m->keep_free));
+ log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
+ format_bytes(a, sizeof(a), m->max_use),
+ format_bytes(b, sizeof(b), m->max_size),
+ format_bytes(c, sizeof(c), m->min_size),
+ format_bytes(d, sizeof(d), m->keep_free));
}
int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
}
int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
- char t[9+32+1] = "_BOOT_ID=";
Object *o;
uint64_t p;
int r;
assert(f);
assert(from || to);
- sd_id128_to_string(boot_id, t + 9);
-
- r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
+ r = find_data_object_by_boot_id(f, boot_id, &o, &p);
if (r <= 0)
return r;
return 1;
}
-bool journal_file_rotate_suggested(JournalFile *f) {
+bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
assert(f);
/* If we gained new header fields we gained new features,
if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
- log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
+ log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
- (unsigned long long) le64toh(f->header->n_data),
- (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
- (unsigned long long) (f->last_stat.st_size),
- (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
+ le64toh(f->header->n_data),
+ le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
+ (unsigned long long) f->last_stat.st_size,
+ f->last_stat.st_size / le64toh(f->header->n_data));
return true;
}
if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
- log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
+ log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
f->path,
100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
- (unsigned long long) le64toh(f->header->n_fields),
- (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
+ le64toh(f->header->n_fields),
+ le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
return true;
}
+ /* Are the data objects properly indexed by field objects? */
+ if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+ JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+ le64toh(f->header->n_data) > 0 &&
+ le64toh(f->header->n_fields) == 0)
+ return true;
+
+ if (max_file_usec > 0) {
+ usec_t t, h;
+
+ h = le64toh(f->header->head_entry_realtime);
+ t = now(CLOCK_REALTIME);
+
+ if (h > 0 && t > h + max_file_usec)
+ return true;
+ }
+
return false;
}