1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
36 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41 /* This is the minimum journal file size */
42 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
44 /* These are the lower and upper bounds if we deduce the max_use value
45 * from the file system size */
46 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
47 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
49 /* This is the upper bound if we deduce max_size from max_use */
50 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
52 /* This is the upper bound if we deduce the keep_free value from the
54 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56 /* This is the keep_free value when we can't determine the system
58 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
60 /* n_data was the first entry we added after the initial file format design */
61 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
65 #define JOURNAL_HEADER_CONTAINS(h, field) \
66 (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
68 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
69 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
71 void journal_file_close(JournalFile *f) {
74 /* Write the final tag */
76 journal_file_append_tag(f);
78 /* Sync everything to disk, before we mark the file offline */
79 if (f->mmap && f->fd >= 0)
80 mmap_cache_close_fd(f->mmap, f->fd);
82 if (f->writable && f->fd >= 0)
86 /* Mark the file offline. Don't override the archived state if it already is set */
87 if (f->writable && f->header->state == STATE_ONLINE)
88 f->header->state = STATE_OFFLINE;
90 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
94 close_nointr_nofail(f->fd);
99 mmap_cache_unref(f->mmap);
102 free(f->compress_buffer);
107 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
110 gcry_md_close(f->hmac);
116 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
124 memcpy(h.signature, HEADER_SIGNATURE, 8);
125 h.header_size = htole64(ALIGN64(sizeof(h)));
127 h.incompatible_flags =
128 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
131 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
133 r = sd_id128_randomize(&h.file_id);
138 h.seqnum_id = template->header->seqnum_id;
139 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
141 h.seqnum_id = h.file_id;
143 k = pwrite(f->fd, &h, sizeof(h), 0);
153 static int journal_file_refresh_header(JournalFile *f) {
159 r = sd_id128_get_machine(&f->header->machine_id);
163 r = sd_id128_get_boot(&boot_id);
167 if (sd_id128_equal(boot_id, f->header->boot_id))
168 f->tail_entry_monotonic_valid = true;
170 f->header->boot_id = boot_id;
172 f->header->state = STATE_ONLINE;
174 /* Sync the online state to disk */
175 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
181 static int journal_file_verify_header(JournalFile *f) {
184 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
187 /* In both read and write mode we refuse to open files with
188 * incompatible flags we don't know */
190 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
191 return -EPROTONOSUPPORT;
193 if (f->header->incompatible_flags != 0)
194 return -EPROTONOSUPPORT;
197 /* When open for writing we refuse to open files with
198 * compatible flags, too */
201 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
202 return -EPROTONOSUPPORT;
204 if (f->header->compatible_flags != 0)
205 return -EPROTONOSUPPORT;
209 /* The first addition was n_data, so check that we are at least this large */
210 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
213 if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
214 !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
217 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222 sd_id128_t machine_id;
225 r = sd_id128_get_machine(&machine_id);
229 if (!sd_id128_equal(machine_id, f->header->machine_id))
232 state = f->header->state;
234 if (state == STATE_ONLINE) {
235 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
237 } else if (state == STATE_ARCHIVED)
239 else if (state != STATE_OFFLINE) {
240 log_debug("Journal file %s has unknown state %u.", f->path, state);
245 f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
246 f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
251 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
252 uint64_t old_size, new_size;
257 /* We assume that this file is not sparse, and we know that
258 * for sure, since we always call posix_fallocate()
262 le64toh(f->header->header_size) +
263 le64toh(f->header->arena_size);
265 new_size = PAGE_ALIGN(offset + size);
266 if (new_size < le64toh(f->header->header_size))
267 new_size = le64toh(f->header->header_size);
269 if (new_size <= old_size)
272 if (f->metrics.max_size > 0 &&
273 new_size > f->metrics.max_size)
276 if (new_size > f->metrics.min_size &&
277 f->metrics.keep_free > 0) {
280 if (fstatvfs(f->fd, &svfs) >= 0) {
283 available = svfs.f_bfree * svfs.f_bsize;
285 if (available >= f->metrics.keep_free)
286 available -= f->metrics.keep_free;
290 if (new_size - old_size > available)
295 /* Note that the glibc fallocate() fallback is very
296 inefficient, hence we try to minimize the allocation area
298 r = posix_fallocate(f->fd, old_size, new_size - old_size);
302 mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
304 if (fstat(f->fd, &f->last_stat) < 0)
307 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
312 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
316 /* Avoid SIGBUS on invalid accesses */
317 if (offset + size > (uint64_t) f->last_stat.st_size) {
318 /* Hmm, out of range? Let's refresh the fstat() data
319 * first, before we trust that check. */
321 if (fstat(f->fd, &f->last_stat) < 0 ||
322 offset + size > (uint64_t) f->last_stat.st_size)
323 return -EADDRNOTAVAIL;
326 return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
329 static bool verify_hash(Object *o) {
334 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
335 h1 = le64toh(o->data.hash);
336 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
337 } else if (o->object.type == OBJECT_FIELD) {
338 h1 = le64toh(o->field.hash);
339 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
346 static uint64_t minimum_header_size(Object *o) {
348 static uint64_t table[] = {
349 [OBJECT_DATA] = sizeof(DataObject),
350 [OBJECT_FIELD] = sizeof(FieldObject),
351 [OBJECT_ENTRY] = sizeof(EntryObject),
352 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
353 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
354 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
355 [OBJECT_TAG] = sizeof(TagObject),
358 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
359 return sizeof(ObjectHeader);
361 return table[o->object.type];
364 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
374 /* One context for each type, plus one catch-all for the rest */
375 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
377 r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
382 s = le64toh(o->object.size);
384 if (s < sizeof(ObjectHeader))
387 if (o->object.type <= OBJECT_UNUSED)
390 if (s < minimum_header_size(o))
393 if (type >= 0 && o->object.type != type)
396 if (s > sizeof(ObjectHeader)) {
397 r = journal_file_move_to(f, o->object.type, offset, s, &t);
411 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
416 r = le64toh(f->header->tail_entry_seqnum) + 1;
419 /* If an external seqnum counter was passed, we update
420 * both the local and the external one, and set it to
421 * the maximum of both */
429 f->header->tail_entry_seqnum = htole64(r);
431 if (f->header->head_entry_seqnum == 0)
432 f->header->head_entry_seqnum = htole64(r);
437 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
444 assert(type > 0 && type < _OBJECT_TYPE_MAX);
445 assert(size >= sizeof(ObjectHeader));
449 p = le64toh(f->header->tail_object_offset);
451 p = le64toh(f->header->header_size);
453 r = journal_file_move_to_object(f, -1, p, &tail);
457 p += ALIGN64(le64toh(tail->object.size));
460 r = journal_file_allocate(f, p, size);
464 r = journal_file_move_to(f, type, p, size, &t);
471 o->object.type = type;
472 o->object.size = htole64(size);
474 f->header->tail_object_offset = htole64(p);
475 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
483 static int journal_file_setup_data_hash_table(JournalFile *f) {
490 /* We estimate that we need 1 hash table entry per 768 of
491 journal file and we want to make sure we never get beyond
492 75% fill level. Calculate the hash table size for the
493 maximum file size based on these metrics. */
495 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
496 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
497 s = DEFAULT_DATA_HASH_TABLE_SIZE;
499 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
501 r = journal_file_append_object(f,
502 OBJECT_DATA_HASH_TABLE,
503 offsetof(Object, hash_table.items) + s,
508 memset(o->hash_table.items, 0, s);
510 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
511 f->header->data_hash_table_size = htole64(s);
516 static int journal_file_setup_field_hash_table(JournalFile *f) {
523 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
524 r = journal_file_append_object(f,
525 OBJECT_FIELD_HASH_TABLE,
526 offsetof(Object, hash_table.items) + s,
531 memset(o->hash_table.items, 0, s);
533 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
534 f->header->field_hash_table_size = htole64(s);
539 static int journal_file_map_data_hash_table(JournalFile *f) {
546 p = le64toh(f->header->data_hash_table_offset);
547 s = le64toh(f->header->data_hash_table_size);
549 r = journal_file_move_to(f,
550 OBJECT_DATA_HASH_TABLE,
556 f->data_hash_table = t;
560 static int journal_file_map_field_hash_table(JournalFile *f) {
567 p = le64toh(f->header->field_hash_table_offset);
568 s = le64toh(f->header->field_hash_table_size);
570 r = journal_file_move_to(f,
571 OBJECT_FIELD_HASH_TABLE,
577 f->field_hash_table = t;
581 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
588 assert(o->object.type == OBJECT_DATA);
590 /* This might alter the window we are looking at */
592 o->data.next_hash_offset = o->data.next_field_offset = 0;
593 o->data.entry_offset = o->data.entry_array_offset = 0;
594 o->data.n_entries = 0;
596 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
597 p = le64toh(f->data_hash_table[h].tail_hash_offset);
599 /* Only entry in the hash table is easy */
600 f->data_hash_table[h].head_hash_offset = htole64(offset);
602 /* Move back to the previous data object, to patch in
605 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
609 o->data.next_hash_offset = htole64(offset);
612 f->data_hash_table[h].tail_hash_offset = htole64(offset);
614 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
615 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
620 int journal_file_find_data_object_with_hash(
622 const void *data, uint64_t size, uint64_t hash,
623 Object **ret, uint64_t *offset) {
625 uint64_t p, osize, h;
629 assert(data || size == 0);
631 osize = offsetof(Object, data.payload) + size;
633 if (f->header->data_hash_table_size == 0)
636 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
637 p = le64toh(f->data_hash_table[h].head_hash_offset);
642 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
646 if (le64toh(o->data.hash) != hash)
649 if (o->object.flags & OBJECT_COMPRESSED) {
653 l = le64toh(o->object.size);
654 if (l <= offsetof(Object, data.payload))
657 l -= offsetof(Object, data.payload);
659 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
663 memcmp(f->compress_buffer, data, size) == 0) {
674 return -EPROTONOSUPPORT;
677 } else if (le64toh(o->object.size) == osize &&
678 memcmp(o->data.payload, data, size) == 0) {
690 p = le64toh(o->data.next_hash_offset);
696 int journal_file_find_data_object(
698 const void *data, uint64_t size,
699 Object **ret, uint64_t *offset) {
704 assert(data || size == 0);
706 hash = hash64(data, size);
708 return journal_file_find_data_object_with_hash(f,
713 static int journal_file_append_data(
715 const void *data, uint64_t size,
716 Object **ret, uint64_t *offset) {
722 bool compressed = false;
725 assert(data || size == 0);
727 hash = hash64(data, size);
729 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
743 osize = offsetof(Object, data.payload) + size;
744 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
748 o->data.hash = htole64(hash);
752 size >= COMPRESSION_SIZE_THRESHOLD) {
755 compressed = compress_blob(data, size, o->data.payload, &rsize);
758 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
759 o->object.flags |= OBJECT_COMPRESSED;
761 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
766 if (!compressed && size > 0)
767 memcpy(o->data.payload, data, size);
769 r = journal_file_link_data(f, o, p, hash);
773 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
777 /* The linking might have altered the window, so let's
778 * refresh our pointer */
779 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
792 uint64_t journal_file_entry_n_items(Object *o) {
794 assert(o->object.type == OBJECT_ENTRY);
796 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
799 static uint64_t journal_file_entry_array_n_items(Object *o) {
801 assert(o->object.type == OBJECT_ENTRY_ARRAY);
803 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
806 static int link_entry_into_array(JournalFile *f,
811 uint64_t n = 0, ap = 0, q, i, a, hidx;
820 i = hidx = le64toh(*idx);
823 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
827 n = journal_file_entry_array_n_items(o);
829 o->entry_array.items[i] = htole64(p);
830 *idx = htole64(hidx + 1);
836 a = le64toh(o->entry_array.next_entry_array_offset);
847 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
848 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
853 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
857 o->entry_array.items[i] = htole64(p);
862 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
866 o->entry_array.next_entry_array_offset = htole64(q);
869 *idx = htole64(hidx + 1);
874 static int link_entry_into_array_plus_one(JournalFile *f,
893 i = htole64(le64toh(*idx) - 1);
894 r = link_entry_into_array(f, first, &i, p);
899 *idx = htole64(le64toh(*idx) + 1);
903 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
910 p = le64toh(o->entry.items[i].object_offset);
914 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
918 return link_entry_into_array_plus_one(f,
919 &o->data.entry_offset,
920 &o->data.entry_array_offset,
925 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
932 assert(o->object.type == OBJECT_ENTRY);
934 __sync_synchronize();
936 /* Link up the entry itself */
937 r = link_entry_into_array(f,
938 &f->header->entry_array_offset,
939 &f->header->n_entries,
944 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
946 if (f->header->head_entry_realtime == 0)
947 f->header->head_entry_realtime = o->entry.realtime;
949 f->header->tail_entry_realtime = o->entry.realtime;
950 f->header->tail_entry_monotonic = o->entry.monotonic;
952 f->tail_entry_monotonic_valid = true;
954 /* Link up the items */
955 n = journal_file_entry_n_items(o);
956 for (i = 0; i < n; i++) {
957 r = journal_file_link_entry_item(f, o, offset, i);
965 static int journal_file_append_entry_internal(
967 const dual_timestamp *ts,
969 const EntryItem items[], unsigned n_items,
971 Object **ret, uint64_t *offset) {
978 assert(items || n_items == 0);
981 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
983 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
987 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
988 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
989 o->entry.realtime = htole64(ts->realtime);
990 o->entry.monotonic = htole64(ts->monotonic);
991 o->entry.xor_hash = htole64(xor_hash);
992 o->entry.boot_id = f->header->boot_id;
994 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
998 r = journal_file_link_entry(f, o, np);
1011 void journal_file_post_change(JournalFile *f) {
1014 /* inotify() does not receive IN_MODIFY events from file
1015 * accesses done via mmap(). After each access we hence
1016 * trigger IN_MODIFY by truncating the journal file to its
1017 * current size which triggers IN_MODIFY. */
1019 __sync_synchronize();
1021 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1022 log_error("Failed to to truncate file to its own size: %m");
1025 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1029 uint64_t xor_hash = 0;
1030 struct dual_timestamp _ts;
1033 assert(iovec || n_iovec == 0);
1039 dual_timestamp_get(&_ts);
1043 if (f->tail_entry_monotonic_valid &&
1044 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1047 r = journal_file_maybe_append_tag(f, ts->realtime);
1051 /* alloca() can't take 0, hence let's allocate at least one */
1052 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1054 for (i = 0; i < n_iovec; i++) {
1058 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1062 xor_hash ^= le64toh(o->data.hash);
1063 items[i].object_offset = htole64(p);
1064 items[i].hash = o->data.hash;
1067 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1069 journal_file_post_change(f);
1074 static int generic_array_get(JournalFile *f,
1077 Object **ret, uint64_t *offset) {
1089 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1093 n = journal_file_entry_array_n_items(o);
1095 p = le64toh(o->entry_array.items[i]);
1100 a = le64toh(o->entry_array.next_entry_array_offset);
1103 if (a <= 0 || p <= 0)
1106 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1119 static int generic_array_get_plus_one(JournalFile *f,
1123 Object **ret, uint64_t *offset) {
1132 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1145 return generic_array_get(f, first, i-1, ret, offset);
1154 static int generic_array_bisect(JournalFile *f,
1158 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1159 direction_t direction,
1164 uint64_t a, p, t = 0, i = 0, last_p = 0;
1165 bool subtract_one = false;
1166 Object *o, *array = NULL;
1170 assert(test_object);
1174 uint64_t left, right, k, lp;
1176 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1180 k = journal_file_entry_array_n_items(array);
1186 lp = p = le64toh(array->entry_array.items[i]);
1190 r = test_object(f, p, needle);
1194 if (r == TEST_FOUND)
1195 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1197 if (r == TEST_RIGHT) {
1201 if (left == right) {
1202 if (direction == DIRECTION_UP)
1203 subtract_one = true;
1209 assert(left < right);
1211 i = (left + right) / 2;
1212 p = le64toh(array->entry_array.items[i]);
1216 r = test_object(f, p, needle);
1220 if (r == TEST_FOUND)
1221 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1223 if (r == TEST_RIGHT)
1231 if (direction == DIRECTION_UP) {
1233 subtract_one = true;
1244 a = le64toh(array->entry_array.next_entry_array_offset);
1250 if (subtract_one && t == 0 && i == 0)
1253 if (subtract_one && i == 0)
1255 else if (subtract_one)
1256 p = le64toh(array->entry_array.items[i-1]);
1258 p = le64toh(array->entry_array.items[i]);
1260 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1271 *idx = t + i + (subtract_one ? -1 : 0);
1276 static int generic_array_bisect_plus_one(JournalFile *f,
1281 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1282 direction_t direction,
1288 bool step_back = false;
1292 assert(test_object);
1297 /* This bisects the array in object 'first', but first checks
1299 r = test_object(f, extra, needle);
1303 if (r == TEST_FOUND)
1304 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1306 /* if we are looking with DIRECTION_UP then we need to first
1307 see if in the actual array there is a matching entry, and
1308 return the last one of that. But if there isn't any we need
1309 to return this one. Hence remember this, and return it
1312 step_back = direction == DIRECTION_UP;
1314 if (r == TEST_RIGHT) {
1315 if (direction == DIRECTION_DOWN)
1321 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1323 if (r == 0 && step_back)
1332 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1348 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1354 else if (p < needle)
1360 int journal_file_move_to_entry_by_offset(
1363 direction_t direction,
1367 return generic_array_bisect(f,
1368 le64toh(f->header->entry_array_offset),
1369 le64toh(f->header->n_entries),
1377 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1384 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1388 if (le64toh(o->entry.seqnum) == needle)
1390 else if (le64toh(o->entry.seqnum) < needle)
1396 int journal_file_move_to_entry_by_seqnum(
1399 direction_t direction,
1403 return generic_array_bisect(f,
1404 le64toh(f->header->entry_array_offset),
1405 le64toh(f->header->n_entries),
1412 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1419 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1423 if (le64toh(o->entry.realtime) == needle)
1425 else if (le64toh(o->entry.realtime) < needle)
1431 int journal_file_move_to_entry_by_realtime(
1434 direction_t direction,
1438 return generic_array_bisect(f,
1439 le64toh(f->header->entry_array_offset),
1440 le64toh(f->header->n_entries),
1442 test_object_realtime,
1447 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1454 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1458 if (le64toh(o->entry.monotonic) == needle)
1460 else if (le64toh(o->entry.monotonic) < needle)
1466 int journal_file_move_to_entry_by_monotonic(
1470 direction_t direction,
1474 char t[9+32+1] = "_BOOT_ID=";
1480 sd_id128_to_string(boot_id, t + 9);
1481 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1487 return generic_array_bisect_plus_one(f,
1488 le64toh(o->data.entry_offset),
1489 le64toh(o->data.entry_array_offset),
1490 le64toh(o->data.n_entries),
1492 test_object_monotonic,
1497 int journal_file_next_entry(
1499 Object *o, uint64_t p,
1500 direction_t direction,
1501 Object **ret, uint64_t *offset) {
1507 assert(p > 0 || !o);
1509 n = le64toh(f->header->n_entries);
1514 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1516 if (o->object.type != OBJECT_ENTRY)
1519 r = generic_array_bisect(f,
1520 le64toh(f->header->entry_array_offset),
1521 le64toh(f->header->n_entries),
1530 if (direction == DIRECTION_DOWN) {
1543 /* And jump to it */
1544 return generic_array_get(f,
1545 le64toh(f->header->entry_array_offset),
1550 int journal_file_skip_entry(
1552 Object *o, uint64_t p,
1554 Object **ret, uint64_t *offset) {
1563 if (o->object.type != OBJECT_ENTRY)
1566 r = generic_array_bisect(f,
1567 le64toh(f->header->entry_array_offset),
1568 le64toh(f->header->n_entries),
1577 /* Calculate new index */
1579 if ((uint64_t) -skip >= i)
1582 i = i - (uint64_t) -skip;
1584 i += (uint64_t) skip;
1586 n = le64toh(f->header->n_entries);
1593 return generic_array_get(f,
1594 le64toh(f->header->entry_array_offset),
1599 int journal_file_next_entry_for_data(
1601 Object *o, uint64_t p,
1602 uint64_t data_offset,
1603 direction_t direction,
1604 Object **ret, uint64_t *offset) {
1611 assert(p > 0 || !o);
1613 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1617 n = le64toh(d->data.n_entries);
1622 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1624 if (o->object.type != OBJECT_ENTRY)
1627 r = generic_array_bisect_plus_one(f,
1628 le64toh(d->data.entry_offset),
1629 le64toh(d->data.entry_array_offset),
1630 le64toh(d->data.n_entries),
1640 if (direction == DIRECTION_DOWN) {
1654 return generic_array_get_plus_one(f,
1655 le64toh(d->data.entry_offset),
1656 le64toh(d->data.entry_array_offset),
1661 int journal_file_move_to_entry_by_offset_for_data(
1663 uint64_t data_offset,
1665 direction_t direction,
1666 Object **ret, uint64_t *offset) {
1673 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1677 return generic_array_bisect_plus_one(f,
1678 le64toh(d->data.entry_offset),
1679 le64toh(d->data.entry_array_offset),
1680 le64toh(d->data.n_entries),
1687 int journal_file_move_to_entry_by_monotonic_for_data(
1689 uint64_t data_offset,
1692 direction_t direction,
1693 Object **ret, uint64_t *offset) {
1695 char t[9+32+1] = "_BOOT_ID=";
1702 /* First, seek by time */
1703 sd_id128_to_string(boot_id, t + 9);
1704 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1710 r = generic_array_bisect_plus_one(f,
1711 le64toh(o->data.entry_offset),
1712 le64toh(o->data.entry_array_offset),
1713 le64toh(o->data.n_entries),
1715 test_object_monotonic,
1721 /* And now, continue seeking until we find an entry that
1722 * exists in both bisection arrays */
1728 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1732 r = generic_array_bisect_plus_one(f,
1733 le64toh(d->data.entry_offset),
1734 le64toh(d->data.entry_array_offset),
1735 le64toh(d->data.n_entries),
1743 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1747 r = generic_array_bisect_plus_one(f,
1748 le64toh(o->data.entry_offset),
1749 le64toh(o->data.entry_array_offset),
1750 le64toh(o->data.n_entries),
1774 int journal_file_move_to_entry_by_seqnum_for_data(
1776 uint64_t data_offset,
1778 direction_t direction,
1779 Object **ret, uint64_t *offset) {
1786 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1790 return generic_array_bisect_plus_one(f,
1791 le64toh(d->data.entry_offset),
1792 le64toh(d->data.entry_array_offset),
1793 le64toh(d->data.n_entries),
1800 int journal_file_move_to_entry_by_realtime_for_data(
1802 uint64_t data_offset,
1804 direction_t direction,
1805 Object **ret, uint64_t *offset) {
1812 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1816 return generic_array_bisect_plus_one(f,
1817 le64toh(d->data.entry_offset),
1818 le64toh(d->data.entry_array_offset),
1819 le64toh(d->data.n_entries),
1821 test_object_realtime,
1826 static void *fsprg_state(JournalFile *f) {
1830 if (!f->authenticate)
1833 a = le64toh(f->fsprg_header->header_size);
1834 b = le64toh(f->fsprg_header->state_size);
1836 if (a + b > f->fsprg_size)
1839 return (uint8_t*) f->fsprg_header + a;
1842 static uint64_t journal_file_tag_seqnum(JournalFile *f) {
1847 r = le64toh(f->header->n_tags) + 1;
1848 f->header->n_tags = htole64(r);
1853 int journal_file_append_tag(JournalFile *f) {
1860 if (!f->authenticate)
1863 if (!f->hmac_running)
1866 log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1870 r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1874 o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
1876 /* Add the tag object itself, so that we can protect its
1877 * header. This will exclude the actual hash value in it */
1878 r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
1882 /* Get the HMAC tag and store it in the object */
1883 memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1884 f->hmac_running = false;
1889 static int journal_file_hmac_start(JournalFile *f) {
1890 uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1894 if (!f->authenticate)
1897 if (f->hmac_running)
1900 /* Prepare HMAC for next cycle */
1901 gcry_md_reset(f->hmac);
1902 FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1903 gcry_md_setkey(f->hmac, key, sizeof(key));
1905 f->hmac_running = true;
1910 static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1915 assert(f->authenticate);
1917 if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1918 le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1921 if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1924 t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1925 t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1931 static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1932 uint64_t goal, epoch;
1936 if (!f->authenticate)
1939 r = journal_file_get_epoch(f, realtime, &goal);
1943 epoch = FSPRG_GetEpoch(fsprg_state(f));
1947 return epoch != goal;
1950 static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1951 uint64_t goal, epoch;
1956 if (!f->authenticate)
1959 r = journal_file_get_epoch(f, realtime, &goal);
1963 epoch = FSPRG_GetEpoch(fsprg_state(f));
1965 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
1973 FSPRG_Evolve(fsprg_state(f));
1974 epoch = FSPRG_GetEpoch(fsprg_state(f));
1978 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
1983 if (!f->authenticate)
1986 r = journal_file_need_evolve(f, realtime);
1990 r = journal_file_append_tag(f);
1994 r = journal_file_evolve(f, realtime);
1998 r = journal_file_hmac_start(f);
2005 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2011 if (!f->authenticate)
2014 r = journal_file_hmac_start(f);
2018 r = journal_file_move_to_object(f, type, p, &o);
2022 gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2024 switch (o->object.type) {
2027 /* All but: hash and payload are mutable */
2028 gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
2029 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2034 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2037 case OBJECT_FIELD_HASH_TABLE:
2038 case OBJECT_DATA_HASH_TABLE:
2039 case OBJECT_ENTRY_ARRAY:
2040 /* Nothing: everything is mutable */
2044 /* All but the tag itself */
2045 gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
2054 static int journal_file_hmac_put_header(JournalFile *f) {
2059 if (!f->authenticate)
2062 r = journal_file_hmac_start(f);
2066 /* All but state+reserved, boot_id, arena_size,
2067 * tail_object_offset, n_objects, n_entries, tail_seqnum,
2068 * head_entry_realtime, tail_entry_realtime,
2069 * tail_entry_monotonic, n_data, n_fields, header_tag */
2071 gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2072 gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2073 gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2074 gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2075 gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
2080 static int journal_file_load_fsprg(JournalFile *f) {
2084 FSPRGHeader *m = NULL;
2089 if (!f->authenticate)
2092 r = sd_id128_get_machine(&machine);
2096 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2097 SD_ID128_FORMAT_VAL(machine)) < 0)
2100 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2102 log_error("Failed to open %s: %m", p);
2107 if (fstat(fd, &st) < 0) {
2112 if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2117 m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2118 if (m == MAP_FAILED) {
2124 if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2129 if (m->incompatible_flags != 0) {
2130 r = -EPROTONOSUPPORT;
2134 if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2139 if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2144 f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2145 if ((uint64_t) st.st_size < f->fsprg_size) {
2150 if (!sd_id128_equal(machine, m->machine_id)) {
2155 if (le64toh(m->fsprg_start_usec) <= 0 ||
2156 le64toh(m->fsprg_interval_usec) <= 0) {
2161 f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2162 if (f->fsprg_header == MAP_FAILED) {
2163 f->fsprg_header = NULL;
2172 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2175 close_nointr_nofail(fd);
2181 static int journal_file_setup_hmac(JournalFile *f) {
2184 if (!f->authenticate)
2187 e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2194 static int journal_file_append_first_tag(JournalFile *f) {
2198 if (!f->authenticate)
2201 log_debug("Calculating first tag...");
2203 r = journal_file_hmac_put_header(f);
2207 p = le64toh(f->header->field_hash_table_offset);
2208 if (p < offsetof(Object, hash_table.items))
2210 p -= offsetof(Object, hash_table.items);
2212 r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2216 p = le64toh(f->header->data_hash_table_offset);
2217 if (p < offsetof(Object, hash_table.items))
2219 p -= offsetof(Object, hash_table.items);
2221 r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2225 r = journal_file_append_tag(f);
2232 static int journal_file_object_verify(JournalFile *f, Object *o) {
2236 /* This does various superficial tests about the length an
2237 * possible field values. It does not follow any references to
2240 switch (o->object.type) {
2242 if (le64toh(o->data.entry_offset) <= 0 ||
2243 le64toh(o->data.n_entries) <= 0)
2246 if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
2251 if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
2256 if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
2259 if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
2262 if (le64toh(o->entry.seqnum) <= 0 ||
2263 le64toh(o->entry.realtime) <= 0)
2268 case OBJECT_DATA_HASH_TABLE:
2269 case OBJECT_FIELD_HASH_TABLE:
2270 if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
2275 case OBJECT_ENTRY_ARRAY:
2276 if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
2282 if (le64toh(o->object.size) != sizeof(TagObject))
2290 static void draw_progress(uint64_t p, usec_t *last_usec) {
2291 unsigned n, i, j, k;
2294 if (!isatty(STDOUT_FILENO))
2297 z = now(CLOCK_MONOTONIC);
2300 if (x != 0 && x + 40 * USEC_PER_MSEC > z)
2305 n = (3 * columns()) / 4;
2306 j = (n * (unsigned) p) / 65535ULL;
2309 fputs("\r\x1B[?25l", stdout);
2311 for (i = 0; i < j; i++)
2312 fputs("\xe2\x96\x88", stdout);
2314 for (i = 0; i < k; i++)
2315 fputs("\xe2\x96\x91", stdout);
2317 printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
2319 fputs("\r\x1B[?25h", stdout);
2323 static void flush_progress(void) {
2326 if (!isatty(STDOUT_FILENO))
2329 n = (3 * columns()) / 4;
2333 for (i = 0; i < n + 5; i++)
2340 static int write_uint64(int fd, uint64_t p) {
2343 k = write(fd, &p, sizeof(p));
2352 static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
2367 r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
2383 int journal_file_verify(JournalFile *f, const char *key) {
2387 uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
2388 sd_id128_t entry_boot_id;
2389 bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
2390 uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
2391 usec_t last_usec = 0;
2392 int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
2393 char data_path[] = "/var/tmp/journal-data-XXXXXX",
2394 entry_path[] = "/var/tmp/journal-entry-XXXXXX",
2395 entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
2399 data_fd = mkostemp(data_path, O_CLOEXEC);
2401 log_error("Failed to create data file: %m");
2406 entry_fd = mkostemp(entry_path, O_CLOEXEC);
2408 log_error("Failed to create entry file: %m");
2413 entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
2414 if (entry_array_fd < 0) {
2415 log_error("Failed to create entry array file: %m");
2418 unlink(entry_array_path);
2420 /* First iteration: we go through all objects, verify the
2421 * superficial structure, headers, hashes. */
2423 r = journal_file_hmac_put_header(f);
2425 log_error("Failed to calculate HMAC of header.");
2429 p = le64toh(f->header->header_size);
2431 draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
2433 r = journal_file_move_to_object(f, -1, p, &o);
2435 log_error("Invalid object at %llu", (unsigned long long) p);
2439 if (le64toh(f->header->tail_object_offset) < p) {
2440 log_error("Invalid tail object pointer.");
2447 r = journal_file_object_verify(f, o);
2449 log_error("Invalid object contents at %llu", (unsigned long long) p);
2453 r = journal_file_hmac_put_object(f, -1, p);
2455 log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
2459 if (o->object.flags & OBJECT_COMPRESSED &&
2460 !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
2461 log_error("Compressed object without compression at %llu", (unsigned long long) p);
2466 if (o->object.flags & OBJECT_COMPRESSED &&
2467 o->object.type != OBJECT_DATA) {
2468 log_error("Compressed non-data object at %llu", (unsigned long long) p);
2473 if (o->object.type == OBJECT_TAG) {
2475 if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
2476 log_error("Tag object without authentication at %llu", (unsigned long long) p);
2481 if (le64toh(o->tag.seqnum) != tag_seqnum) {
2482 log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
2487 } else if (o->object.type == OBJECT_ENTRY) {
2489 r = write_uint64(entry_fd, p);
2493 if (!entry_seqnum_set &&
2494 le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
2495 log_error("Head entry sequence number incorrect");
2500 if (entry_seqnum_set &&
2501 entry_seqnum >= le64toh(o->entry.seqnum)) {
2502 log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
2507 entry_seqnum = le64toh(o->entry.seqnum);
2508 entry_seqnum_set = true;
2510 if (entry_monotonic_set &&
2511 sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
2512 entry_monotonic > le64toh(o->entry.monotonic)) {
2513 log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
2518 entry_monotonic = le64toh(o->entry.monotonic);
2519 entry_boot_id = o->entry.boot_id;
2520 entry_monotonic_set = true;
2522 if (!entry_realtime_set &&
2523 le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
2524 log_error("Head entry realtime timestamp incorrect");
2529 entry_realtime = le64toh(o->entry.realtime);
2530 entry_realtime_set = true;
2533 } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
2535 r = write_uint64(entry_array_fd, p);
2539 if (p == le64toh(f->header->entry_array_offset)) {
2540 if (found_main_entry_array) {
2541 log_error("More than one main entry array at %llu", (unsigned long long) p);
2546 found_main_entry_array = true;
2551 } else if (o->object.type == OBJECT_DATA) {
2553 r = write_uint64(data_fd, p);
2559 } else if (o->object.type == OBJECT_FIELD)
2561 else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
2562 n_data_hash_tables++;
2564 if (n_data_hash_tables > 1) {
2565 log_error("More than one data hash table at %llu", (unsigned long long) p);
2570 if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
2571 le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
2572 log_error("Header fields for data hash table invalid.");
2576 } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
2577 n_field_hash_tables++;
2579 if (n_field_hash_tables > 1) {
2580 log_error("More than one field hash table at %llu", (unsigned long long) p);
2585 if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
2586 le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
2587 log_error("Header fields for field hash table invalid.");
2591 } else if (o->object.type >= _OBJECT_TYPE_MAX)
2594 if (p == le64toh(f->header->tail_object_offset))
2597 p = p + ALIGN64(le64toh(o->object.size));
2600 if (n_objects != le64toh(f->header->n_objects)) {
2601 log_error("Object number mismatch");
2606 if (n_entries != le64toh(f->header->n_entries)) {
2607 log_error("Entry number mismatch");
2612 if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2613 n_data != le64toh(f->header->n_data)) {
2614 log_error("Data number mismatch");
2619 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2620 n_fields != le64toh(f->header->n_fields)) {
2621 log_error("Field number mismatch");
2626 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
2627 tag_seqnum != le64toh(f->header->n_tags)) {
2628 log_error("Tag number mismatch");
2633 if (n_data_hash_tables != 1) {
2634 log_error("Missing data hash table");
2639 if (n_field_hash_tables != 1) {
2640 log_error("Missing field hash table");
2645 if (!found_main_entry_array) {
2646 log_error("Missing entry array");
2651 if (entry_seqnum_set &&
2652 entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
2653 log_error("Invalid tail seqnum");
2658 if (entry_monotonic_set &&
2659 (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
2660 entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
2661 log_error("Invalid tail monotonic timestamp");
2666 if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
2667 log_error("Invalid tail realtime timestamp");
2672 /* Second iteration: we go through all objects again, this
2673 * time verify all pointers. */
2675 p = le64toh(f->header->header_size);
2677 draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
2679 r = journal_file_move_to_object(f, -1, p, &o);
2681 log_error("Invalid object at %llu", (unsigned long long) p);
2685 if (o->object.type == OBJECT_ENTRY_ARRAY) {
2688 if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
2689 !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
2690 log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
2695 n = journal_file_entry_array_n_items(o);
2696 for (i = 0; i < n; i++) {
2697 if (le64toh(o->entry_array.items[i]) != 0 &&
2698 !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
2700 log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
2708 r = journal_file_move_to_object(f, -1, p, &o);
2710 log_error("Invalid object at %llu", (unsigned long long) p);
2714 if (p == le64toh(f->header->tail_object_offset))
2717 p = p + ALIGN64(le64toh(o->object.size));
2722 mmap_cache_close_fd(f->mmap, data_fd);
2723 mmap_cache_close_fd(f->mmap, entry_fd);
2724 mmap_cache_close_fd(f->mmap, entry_array_fd);
2726 close_nointr_nofail(data_fd);
2727 close_nointr_nofail(entry_fd);
2728 close_nointr_nofail(entry_array_fd);
2735 log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
2737 (unsigned long long) p,
2738 (unsigned long long) f->last_stat.st_size,
2739 (unsigned long long) (100 * p / f->last_stat.st_size));
2742 mmap_cache_close_fd(f->mmap, data_fd);
2743 close_nointr_nofail(data_fd);
2746 if (entry_fd >= 0) {
2747 mmap_cache_close_fd(f->mmap, entry_fd);
2748 close_nointr_nofail(entry_fd);
2751 if (entry_array_fd >= 0) {
2752 mmap_cache_close_fd(f->mmap, entry_array_fd);
2753 close_nointr_nofail(entry_array_fd);
2759 void journal_file_dump(JournalFile *f) {
2766 journal_file_print_header(f);
2768 p = le64toh(f->header->header_size);
2770 r = journal_file_move_to_object(f, -1, p, &o);
2774 switch (o->object.type) {
2777 printf("Type: OBJECT_UNUSED\n");
2781 printf("Type: OBJECT_DATA\n");
2785 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2786 (unsigned long long) le64toh(o->entry.seqnum),
2787 (unsigned long long) le64toh(o->entry.monotonic),
2788 (unsigned long long) le64toh(o->entry.realtime));
2791 case OBJECT_FIELD_HASH_TABLE:
2792 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2795 case OBJECT_DATA_HASH_TABLE:
2796 printf("Type: OBJECT_DATA_HASH_TABLE\n");
2799 case OBJECT_ENTRY_ARRAY:
2800 printf("Type: OBJECT_ENTRY_ARRAY\n");
2804 printf("Type: OBJECT_TAG %llu\n",
2805 (unsigned long long) le64toh(o->tag.seqnum));
2809 if (o->object.flags & OBJECT_COMPRESSED)
2810 printf("Flags: COMPRESSED\n");
2812 if (p == le64toh(f->header->tail_object_offset))
2815 p = p + ALIGN64(le64toh(o->object.size));
2820 log_error("File corrupt");
2823 void journal_file_print_header(JournalFile *f) {
2824 char a[33], b[33], c[33];
2825 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2829 printf("File Path: %s\n"
2833 "Sequential Number ID: %s\n"
2835 "Compatible Flags:%s%s\n"
2836 "Incompatible Flags:%s%s\n"
2837 "Header size: %llu\n"
2838 "Arena size: %llu\n"
2839 "Data Hash Table Size: %llu\n"
2840 "Field Hash Table Size: %llu\n"
2842 "Entry Objects: %llu\n"
2843 "Rotate Suggested: %s\n"
2844 "Head Sequential Number: %llu\n"
2845 "Tail Sequential Number: %llu\n"
2846 "Head Realtime Timestamp: %s\n"
2847 "Tail Realtime Timestamp: %s\n",
2849 sd_id128_to_string(f->header->file_id, a),
2850 sd_id128_to_string(f->header->machine_id, b),
2851 sd_id128_to_string(f->header->boot_id, c),
2852 sd_id128_to_string(f->header->seqnum_id, c),
2853 f->header->state == STATE_OFFLINE ? "offline" :
2854 f->header->state == STATE_ONLINE ? "online" :
2855 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
2856 (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2857 (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
2858 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2859 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
2860 (unsigned long long) le64toh(f->header->header_size),
2861 (unsigned long long) le64toh(f->header->arena_size),
2862 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2863 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2864 (unsigned long long) le64toh(f->header->n_objects),
2865 (unsigned long long) le64toh(f->header->n_entries),
2866 yes_no(journal_file_rotate_suggested(f)),
2867 (unsigned long long) le64toh(f->header->head_entry_seqnum),
2868 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
2869 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2870 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2872 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2873 printf("Data Objects: %llu\n"
2874 "Data Hash Table Fill: %.1f%%\n",
2875 (unsigned long long) le64toh(f->header->n_data),
2876 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2878 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2879 printf("Field Objects: %llu\n"
2880 "Field Hash Table Fill: %.1f%%\n",
2881 (unsigned long long) le64toh(f->header->n_fields),
2882 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2885 int journal_file_open(
2891 JournalMetrics *metrics,
2892 MMapCache *mmap_cache,
2893 JournalFile *template,
2894 JournalFile **ret) {
2898 bool newly_created = false;
2902 if ((flags & O_ACCMODE) != O_RDONLY &&
2903 (flags & O_ACCMODE) != O_RDWR)
2906 if (!endswith(fname, ".journal"))
2909 f = new0(JournalFile, 1);
2917 f->prot = prot_from_flags(flags);
2918 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2919 f->compress = compress;
2920 f->authenticate = authenticate;
2923 f->mmap = mmap_cache_ref(mmap_cache);
2925 /* One context for each type, plus the zeroth catchall
2926 * context. One fd for the file plus one for each type
2927 * (which we need during verification */
2928 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2935 f->path = strdup(fname);
2941 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2947 if (fstat(f->fd, &f->last_stat) < 0) {
2952 if (f->last_stat.st_size == 0 && f->writable) {
2953 newly_created = true;
2955 /* Try to load the FSPRG state, and if we can't, then
2956 * just don't do authentication */
2957 r = journal_file_load_fsprg(f);
2959 f->authenticate = false;
2961 r = journal_file_init_header(f, template);
2965 if (fstat(f->fd, &f->last_stat) < 0) {
2971 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2976 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2977 if (f->header == MAP_FAILED) {
2983 if (!newly_created) {
2984 r = journal_file_verify_header(f);
2989 if (!newly_created && f->writable) {
2990 r = journal_file_load_fsprg(f);
2997 journal_default_metrics(metrics, f->fd);
2998 f->metrics = *metrics;
2999 } else if (template)
3000 f->metrics = template->metrics;
3002 r = journal_file_refresh_header(f);
3006 r = journal_file_setup_hmac(f);
3011 if (newly_created) {
3012 r = journal_file_setup_field_hash_table(f);
3016 r = journal_file_setup_data_hash_table(f);
3020 r = journal_file_append_first_tag(f);
3025 r = journal_file_map_field_hash_table(f);
3029 r = journal_file_map_data_hash_table(f);
3039 journal_file_close(f);
3044 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
3047 JournalFile *old_file, *new_file = NULL;
3055 if (!old_file->writable)
3058 if (!endswith(old_file->path, ".journal"))
3061 l = strlen(old_file->path);
3063 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
3067 memcpy(p, old_file->path, l - 8);
3069 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
3070 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
3071 "-%016llx-%016llx.journal",
3072 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
3073 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
3075 r = rename(old_file->path, p);
3081 old_file->header->state = STATE_ARCHIVED;
3083 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
3084 journal_file_close(old_file);
3090 int journal_file_open_reliably(
3096 JournalMetrics *metrics,
3098 JournalFile *template,
3099 JournalFile **ret) {
3105 r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
3106 if (r != -EBADMSG && /* corrupted */
3107 r != -ENODATA && /* truncated */
3108 r != -EHOSTDOWN && /* other machine */
3109 r != -EPROTONOSUPPORT && /* incompatible feature */
3110 r != -EBUSY && /* unclean shutdown */
3111 r != -ESHUTDOWN /* already archived */)
3114 if ((flags & O_ACCMODE) == O_RDONLY)
3117 if (!(flags & O_CREAT))
3120 if (!endswith(fname, ".journal"))
3123 /* The file is corrupted. Rotate it away and try it again (but only once) */
3126 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
3128 (unsigned long long) now(CLOCK_REALTIME),
3132 r = rename(fname, p);
3137 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
3139 return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
3142 struct vacuum_info {
3147 sd_id128_t seqnum_id;
3153 static int vacuum_compare(const void *_a, const void *_b) {
3154 const struct vacuum_info *a, *b;
3159 if (a->have_seqnum && b->have_seqnum &&
3160 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
3161 if (a->seqnum < b->seqnum)
3163 else if (a->seqnum > b->seqnum)
3169 if (a->realtime < b->realtime)
3171 else if (a->realtime > b->realtime)
3173 else if (a->have_seqnum && b->have_seqnum)
3174 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
3176 return strcmp(a->filename, b->filename);
3179 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
3182 struct vacuum_info *list = NULL;
3183 unsigned n_list = 0, n_allocated = 0, i;
3191 d = opendir(directory);
3197 struct dirent buf, *de;
3201 unsigned long long seqnum = 0, realtime;
3202 sd_id128_t seqnum_id;
3205 k = readdir_r(d, &buf, &de);
3214 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
3217 if (!S_ISREG(st.st_mode))
3220 q = strlen(de->d_name);
3222 if (endswith(de->d_name, ".journal")) {
3224 /* Vacuum archived files */
3226 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
3229 if (de->d_name[q-8-16-1] != '-' ||
3230 de->d_name[q-8-16-1-16-1] != '-' ||
3231 de->d_name[q-8-16-1-16-1-32-1] != '@')
3234 p = strdup(de->d_name);
3240 de->d_name[q-8-16-1-16-1] = 0;
3241 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
3246 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
3253 } else if (endswith(de->d_name, ".journal~")) {
3254 unsigned long long tmp;
3256 /* Vacuum corrupted files */
3258 if (q < 1 + 16 + 1 + 16 + 8 + 1)
3261 if (de->d_name[q-1-8-16-1] != '-' ||
3262 de->d_name[q-1-8-16-1-16-1] != '@')
3265 p = strdup(de->d_name);
3271 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
3276 have_seqnum = false;
3280 if (n_list >= n_allocated) {
3281 struct vacuum_info *j;
3283 n_allocated = MAX(n_allocated * 2U, 8U);
3284 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
3294 list[n_list].filename = p;
3295 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
3296 list[n_list].seqnum = seqnum;
3297 list[n_list].realtime = realtime;
3298 list[n_list].seqnum_id = seqnum_id;
3299 list[n_list].have_seqnum = have_seqnum;
3301 sum += list[n_list].usage;
3307 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
3309 for(i = 0; i < n_list; i++) {
3312 if (fstatvfs(dirfd(d), &ss) < 0) {
3317 if (sum <= max_use &&
3318 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
3321 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
3322 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
3323 sum -= list[i].usage;
3324 } else if (errno != ENOENT)
3325 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
3329 for (i = 0; i < n_list; i++)
3330 free(list[i].filename);
3340 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
3342 uint64_t q, xor_hash = 0;
3355 ts.monotonic = le64toh(o->entry.monotonic);
3356 ts.realtime = le64toh(o->entry.realtime);
3358 if (to->tail_entry_monotonic_valid &&
3359 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
3362 n = journal_file_entry_n_items(o);
3363 items = alloca(sizeof(EntryItem) * n);
3365 for (i = 0; i < n; i++) {
3372 q = le64toh(o->entry.items[i].object_offset);
3373 le_hash = o->entry.items[i].hash;
3375 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
3379 if (le_hash != o->data.hash)
3382 l = le64toh(o->object.size) - offsetof(Object, data.payload);
3385 /* We hit the limit on 32bit machines */
3386 if ((uint64_t) t != l)
3389 if (o->object.flags & OBJECT_COMPRESSED) {
3393 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
3396 data = from->compress_buffer;
3399 return -EPROTONOSUPPORT;
3402 data = o->data.payload;
3404 r = journal_file_append_data(to, data, l, &u, &h);
3408 xor_hash ^= le64toh(u->data.hash);
3409 items[i].object_offset = htole64(h);
3410 items[i].hash = u->data.hash;
3412 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
3417 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
3420 void journal_default_metrics(JournalMetrics *m, int fd) {
3421 uint64_t fs_size = 0;
3423 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
3428 if (fstatvfs(fd, &ss) >= 0)
3429 fs_size = ss.f_frsize * ss.f_blocks;
3431 if (m->max_use == (uint64_t) -1) {
3434 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
3436 if (m->max_use > DEFAULT_MAX_USE_UPPER)
3437 m->max_use = DEFAULT_MAX_USE_UPPER;
3439 if (m->max_use < DEFAULT_MAX_USE_LOWER)
3440 m->max_use = DEFAULT_MAX_USE_LOWER;
3442 m->max_use = DEFAULT_MAX_USE_LOWER;
3444 m->max_use = PAGE_ALIGN(m->max_use);
3446 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
3447 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
3450 if (m->max_size == (uint64_t) -1) {
3451 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
3453 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
3454 m->max_size = DEFAULT_MAX_SIZE_UPPER;
3456 m->max_size = PAGE_ALIGN(m->max_size);
3458 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
3459 m->max_size = JOURNAL_FILE_SIZE_MIN;
3461 if (m->max_size*2 > m->max_use)
3462 m->max_use = m->max_size*2;
3464 if (m->min_size == (uint64_t) -1)
3465 m->min_size = JOURNAL_FILE_SIZE_MIN;
3467 m->min_size = PAGE_ALIGN(m->min_size);
3469 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
3470 m->min_size = JOURNAL_FILE_SIZE_MIN;
3472 if (m->min_size > m->max_size)
3473 m->max_size = m->min_size;
3476 if (m->keep_free == (uint64_t) -1) {
3479 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
3481 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
3482 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
3485 m->keep_free = DEFAULT_KEEP_FREE;
3488 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
3489 format_bytes(a, sizeof(a), m->max_use),
3490 format_bytes(b, sizeof(b), m->max_size),
3491 format_bytes(c, sizeof(c), m->min_size),
3492 format_bytes(d, sizeof(d), m->keep_free));
3495 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
3500 if (f->header->head_entry_realtime == 0)
3503 *from = le64toh(f->header->head_entry_realtime);
3507 if (f->header->tail_entry_realtime == 0)
3510 *to = le64toh(f->header->tail_entry_realtime);
3516 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3517 char t[9+32+1] = "_BOOT_ID=";
3525 sd_id128_to_string(boot_id, t + 9);
3527 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3531 if (le64toh(o->data.n_entries) <= 0)
3535 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3539 *from = le64toh(o->entry.monotonic);
3543 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3547 r = generic_array_get_plus_one(f,
3548 le64toh(o->data.entry_offset),
3549 le64toh(o->data.entry_array_offset),
3550 le64toh(o->data.n_entries)-1,
3555 *to = le64toh(o->entry.monotonic);
3561 bool journal_file_rotate_suggested(JournalFile *f) {
3564 /* If we gained new header fields we gained new features,
3565 * hence suggest a rotation */
3566 if (le64toh(f->header->header_size) < sizeof(Header)) {
3567 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3571 /* Let's check if the hash tables grew over a certain fill
3572 * level (75%, borrowing this value from Java's hash table
3573 * implementation), and if so suggest a rotation. To calculate
3574 * the fill level we need the n_data field, which only exists
3575 * in newer versions. */
3577 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3578 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3579 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3581 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3582 (unsigned long long) le64toh(f->header->n_data),
3583 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3584 (unsigned long long) (f->last_stat.st_size),
3585 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
3589 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3590 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3591 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3593 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3594 (unsigned long long) le64toh(f->header->n_fields),
3595 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));