1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 void journal_file_close(JournalFile *f) {
68 /* Write the final tag */
69 if (f->seal && f->writable)
70 journal_file_append_tag(f);
73 /* Sync everything to disk, before we mark the file offline */
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
77 if (f->writable && f->fd >= 0)
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
83 f->header->state = STATE_OFFLINE;
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
89 close_nointr_nofail(f->fd);
94 mmap_cache_unref(f->mmap);
97 free(f->compress_buffer);
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
109 gcry_md_close(f->hmac);
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
123 memcpy(h.signature, HEADER_SIGNATURE, 8);
124 h.header_size = htole64(ALIGN64(sizeof(h)));
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
132 r = sd_id128_randomize(&h.file_id);
137 h.seqnum_id = template->header->seqnum_id;
138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
140 h.seqnum_id = h.file_id;
142 k = pwrite(f->fd, &h, sizeof(h), 0);
152 static int journal_file_refresh_header(JournalFile *f) {
158 r = sd_id128_get_machine(&f->header->machine_id);
162 r = sd_id128_get_boot(&boot_id);
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
169 f->header->boot_id = boot_id;
171 f->header->state = STATE_ONLINE;
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
180 static int journal_file_verify_header(JournalFile *f) {
183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190 return -EPROTONOSUPPORT;
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201 return -EPROTONOSUPPORT;
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
208 if (f->header->state >= _STATE_MAX)
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
238 sd_id128_t machine_id;
241 r = sd_id128_get_machine(&machine_id);
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
248 state = f->header->state;
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
253 } else if (state == STATE_ARCHIVED)
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
264 f->seal = JOURNAL_HEADER_SEALED(f->header);
269 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
270 uint64_t old_size, new_size;
275 /* We assume that this file is not sparse, and we know that
276 * for sure, since we always call posix_fallocate()
280 le64toh(f->header->header_size) +
281 le64toh(f->header->arena_size);
283 new_size = PAGE_ALIGN(offset + size);
284 if (new_size < le64toh(f->header->header_size))
285 new_size = le64toh(f->header->header_size);
287 if (new_size <= old_size)
290 if (f->metrics.max_size > 0 &&
291 new_size > f->metrics.max_size)
294 if (new_size > f->metrics.min_size &&
295 f->metrics.keep_free > 0) {
298 if (fstatvfs(f->fd, &svfs) >= 0) {
301 available = svfs.f_bfree * svfs.f_bsize;
303 if (available >= f->metrics.keep_free)
304 available -= f->metrics.keep_free;
308 if (new_size - old_size > available)
313 /* Note that the glibc fallocate() fallback is very
314 inefficient, hence we try to minimize the allocation area
316 r = posix_fallocate(f->fd, old_size, new_size - old_size);
320 if (fstat(f->fd, &f->last_stat) < 0)
323 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
328 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
335 /* Avoid SIGBUS on invalid accesses */
336 if (offset + size > (uint64_t) f->last_stat.st_size) {
337 /* Hmm, out of range? Let's refresh the fstat() data
338 * first, before we trust that check. */
340 if (fstat(f->fd, &f->last_stat) < 0 ||
341 offset + size > (uint64_t) f->last_stat.st_size)
342 return -EADDRNOTAVAIL;
345 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
348 static uint64_t minimum_header_size(Object *o) {
350 static uint64_t table[] = {
351 [OBJECT_DATA] = sizeof(DataObject),
352 [OBJECT_FIELD] = sizeof(FieldObject),
353 [OBJECT_ENTRY] = sizeof(EntryObject),
354 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357 [OBJECT_TAG] = sizeof(TagObject),
360 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361 return sizeof(ObjectHeader);
363 return table[o->object.type];
366 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
376 /* Objects may only be located at multiple of 64 bit */
377 if (!VALID64(offset))
380 /* One context for each type, plus one catch-all for the rest */
381 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
383 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
388 s = le64toh(o->object.size);
390 if (s < sizeof(ObjectHeader))
393 if (o->object.type <= OBJECT_UNUSED)
396 if (s < minimum_header_size(o))
399 if (type >= 0 && o->object.type != type)
402 if (s > sizeof(ObjectHeader)) {
403 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
414 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
419 r = le64toh(f->header->tail_entry_seqnum) + 1;
422 /* If an external seqnum counter was passed, we update
423 * both the local and the external one, and set it to
424 * the maximum of both */
432 f->header->tail_entry_seqnum = htole64(r);
434 if (f->header->head_entry_seqnum == 0)
435 f->header->head_entry_seqnum = htole64(r);
440 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
447 assert(type > 0 && type < _OBJECT_TYPE_MAX);
448 assert(size >= sizeof(ObjectHeader));
452 p = le64toh(f->header->tail_object_offset);
454 p = le64toh(f->header->header_size);
456 r = journal_file_move_to_object(f, -1, p, &tail);
460 p += ALIGN64(le64toh(tail->object.size));
463 r = journal_file_allocate(f, p, size);
467 r = journal_file_move_to(f, type, false, p, size, &t);
474 o->object.type = type;
475 o->object.size = htole64(size);
477 f->header->tail_object_offset = htole64(p);
478 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
486 static int journal_file_setup_data_hash_table(JournalFile *f) {
493 /* We estimate that we need 1 hash table entry per 768 of
494 journal file and we want to make sure we never get beyond
495 75% fill level. Calculate the hash table size for the
496 maximum file size based on these metrics. */
498 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
499 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500 s = DEFAULT_DATA_HASH_TABLE_SIZE;
502 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
511 memset(o->hash_table.items, 0, s);
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
519 static int journal_file_setup_field_hash_table(JournalFile *f) {
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
534 memset(o->hash_table.items, 0, s);
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
542 static int journal_file_map_data_hash_table(JournalFile *f) {
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
552 r = journal_file_move_to(f,
553 OBJECT_DATA_HASH_TABLE,
560 f->data_hash_table = t;
564 static int journal_file_map_field_hash_table(JournalFile *f) {
571 p = le64toh(f->header->field_hash_table_offset);
572 s = le64toh(f->header->field_hash_table_size);
574 r = journal_file_move_to(f,
575 OBJECT_FIELD_HASH_TABLE,
582 f->field_hash_table = t;
586 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
594 if (o->object.type != OBJECT_DATA)
597 /* This might alter the window we are looking at */
599 o->data.next_hash_offset = o->data.next_field_offset = 0;
600 o->data.entry_offset = o->data.entry_array_offset = 0;
601 o->data.n_entries = 0;
603 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
604 p = le64toh(f->data_hash_table[h].tail_hash_offset);
606 /* Only entry in the hash table is easy */
607 f->data_hash_table[h].head_hash_offset = htole64(offset);
609 /* Move back to the previous data object, to patch in
612 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
616 o->data.next_hash_offset = htole64(offset);
619 f->data_hash_table[h].tail_hash_offset = htole64(offset);
621 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
622 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
627 int journal_file_find_data_object_with_hash(
629 const void *data, uint64_t size, uint64_t hash,
630 Object **ret, uint64_t *offset) {
632 uint64_t p, osize, h;
636 assert(data || size == 0);
638 osize = offsetof(Object, data.payload) + size;
640 if (f->header->data_hash_table_size == 0)
643 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
644 p = le64toh(f->data_hash_table[h].head_hash_offset);
649 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
653 if (le64toh(o->data.hash) != hash)
656 if (o->object.flags & OBJECT_COMPRESSED) {
660 l = le64toh(o->object.size);
661 if (l <= offsetof(Object, data.payload))
664 l -= offsetof(Object, data.payload);
666 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
670 memcmp(f->compress_buffer, data, size) == 0) {
681 return -EPROTONOSUPPORT;
684 } else if (le64toh(o->object.size) == osize &&
685 memcmp(o->data.payload, data, size) == 0) {
697 p = le64toh(o->data.next_hash_offset);
703 int journal_file_find_data_object(
705 const void *data, uint64_t size,
706 Object **ret, uint64_t *offset) {
711 assert(data || size == 0);
713 hash = hash64(data, size);
715 return journal_file_find_data_object_with_hash(f,
720 static int journal_file_append_data(
722 const void *data, uint64_t size,
723 Object **ret, uint64_t *offset) {
729 bool compressed = false;
732 assert(data || size == 0);
734 hash = hash64(data, size);
736 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
750 osize = offsetof(Object, data.payload) + size;
751 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
755 o->data.hash = htole64(hash);
759 size >= COMPRESSION_SIZE_THRESHOLD) {
762 compressed = compress_blob(data, size, o->data.payload, &rsize);
765 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
766 o->object.flags |= OBJECT_COMPRESSED;
768 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
773 if (!compressed && size > 0)
774 memcpy(o->data.payload, data, size);
776 r = journal_file_link_data(f, o, p, hash);
780 /* The linking might have altered the window, so let's
781 * refresh our pointer */
782 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
787 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
801 uint64_t journal_file_entry_n_items(Object *o) {
804 if (o->object.type != OBJECT_ENTRY)
807 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
810 uint64_t journal_file_entry_array_n_items(Object *o) {
813 if (o->object.type != OBJECT_ENTRY_ARRAY)
816 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
819 uint64_t journal_file_hash_table_n_items(Object *o) {
822 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
823 o->object.type != OBJECT_FIELD_HASH_TABLE)
826 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
829 static int link_entry_into_array(JournalFile *f,
834 uint64_t n = 0, ap = 0, q, i, a, hidx;
843 i = hidx = le64toh(*idx);
846 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
850 n = journal_file_entry_array_n_items(o);
852 o->entry_array.items[i] = htole64(p);
853 *idx = htole64(hidx + 1);
859 a = le64toh(o->entry_array.next_entry_array_offset);
870 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
871 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
877 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
882 o->entry_array.items[i] = htole64(p);
887 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
891 o->entry_array.next_entry_array_offset = htole64(q);
894 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
895 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
897 *idx = htole64(hidx + 1);
902 static int link_entry_into_array_plus_one(JournalFile *f,
921 i = htole64(le64toh(*idx) - 1);
922 r = link_entry_into_array(f, first, &i, p);
927 *idx = htole64(le64toh(*idx) + 1);
931 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
938 p = le64toh(o->entry.items[i].object_offset);
942 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
946 return link_entry_into_array_plus_one(f,
947 &o->data.entry_offset,
948 &o->data.entry_array_offset,
953 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
961 if (o->object.type != OBJECT_ENTRY)
964 __sync_synchronize();
966 /* Link up the entry itself */
967 r = link_entry_into_array(f,
968 &f->header->entry_array_offset,
969 &f->header->n_entries,
974 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
976 if (f->header->head_entry_realtime == 0)
977 f->header->head_entry_realtime = o->entry.realtime;
979 f->header->tail_entry_realtime = o->entry.realtime;
980 f->header->tail_entry_monotonic = o->entry.monotonic;
982 f->tail_entry_monotonic_valid = true;
984 /* Link up the items */
985 n = journal_file_entry_n_items(o);
986 for (i = 0; i < n; i++) {
987 r = journal_file_link_entry_item(f, o, offset, i);
995 static int journal_file_append_entry_internal(
997 const dual_timestamp *ts,
999 const EntryItem items[], unsigned n_items,
1001 Object **ret, uint64_t *offset) {
1008 assert(items || n_items == 0);
1011 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1013 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1017 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1018 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1019 o->entry.realtime = htole64(ts->realtime);
1020 o->entry.monotonic = htole64(ts->monotonic);
1021 o->entry.xor_hash = htole64(xor_hash);
1022 o->entry.boot_id = f->header->boot_id;
1025 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1030 r = journal_file_link_entry(f, o, np);
1043 void journal_file_post_change(JournalFile *f) {
1046 /* inotify() does not receive IN_MODIFY events from file
1047 * accesses done via mmap(). After each access we hence
1048 * trigger IN_MODIFY by truncating the journal file to its
1049 * current size which triggers IN_MODIFY. */
1051 __sync_synchronize();
1053 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1054 log_error("Failed to truncate file to its own size: %m");
1057 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1061 uint64_t xor_hash = 0;
1062 struct dual_timestamp _ts;
1065 assert(iovec || n_iovec == 0);
1071 dual_timestamp_get(&_ts);
1075 if (f->tail_entry_monotonic_valid &&
1076 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1080 r = journal_file_maybe_append_tag(f, ts->realtime);
1085 /* alloca() can't take 0, hence let's allocate at least one */
1086 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1088 for (i = 0; i < n_iovec; i++) {
1092 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1096 xor_hash ^= le64toh(o->data.hash);
1097 items[i].object_offset = htole64(p);
1098 items[i].hash = o->data.hash;
1101 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1103 journal_file_post_change(f);
1108 static int generic_array_get(JournalFile *f,
1111 Object **ret, uint64_t *offset) {
1123 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1127 n = journal_file_entry_array_n_items(o);
1129 p = le64toh(o->entry_array.items[i]);
1134 a = le64toh(o->entry_array.next_entry_array_offset);
1137 if (a <= 0 || p <= 0)
1140 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1153 static int generic_array_get_plus_one(JournalFile *f,
1157 Object **ret, uint64_t *offset) {
1166 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1179 return generic_array_get(f, first, i-1, ret, offset);
1188 static int generic_array_bisect(JournalFile *f,
1192 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1193 direction_t direction,
1198 uint64_t a, p, t = 0, i = 0, last_p = 0;
1199 bool subtract_one = false;
1200 Object *o, *array = NULL;
1204 assert(test_object);
1208 uint64_t left, right, k, lp;
1210 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1214 k = journal_file_entry_array_n_items(array);
1220 lp = p = le64toh(array->entry_array.items[i]);
1224 r = test_object(f, p, needle);
1228 if (r == TEST_FOUND)
1229 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1231 if (r == TEST_RIGHT) {
1235 if (left == right) {
1236 if (direction == DIRECTION_UP)
1237 subtract_one = true;
1243 assert(left < right);
1245 i = (left + right) / 2;
1246 p = le64toh(array->entry_array.items[i]);
1250 r = test_object(f, p, needle);
1254 if (r == TEST_FOUND)
1255 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1257 if (r == TEST_RIGHT)
1265 if (direction == DIRECTION_UP) {
1267 subtract_one = true;
1278 a = le64toh(array->entry_array.next_entry_array_offset);
1284 if (subtract_one && t == 0 && i == 0)
1287 if (subtract_one && i == 0)
1289 else if (subtract_one)
1290 p = le64toh(array->entry_array.items[i-1]);
1292 p = le64toh(array->entry_array.items[i]);
1294 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1305 *idx = t + i + (subtract_one ? -1 : 0);
1310 static int generic_array_bisect_plus_one(JournalFile *f,
1315 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1316 direction_t direction,
1322 bool step_back = false;
1326 assert(test_object);
1331 /* This bisects the array in object 'first', but first checks
1333 r = test_object(f, extra, needle);
1337 if (r == TEST_FOUND)
1338 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1340 /* if we are looking with DIRECTION_UP then we need to first
1341 see if in the actual array there is a matching entry, and
1342 return the last one of that. But if there isn't any we need
1343 to return this one. Hence remember this, and return it
1346 step_back = direction == DIRECTION_UP;
1348 if (r == TEST_RIGHT) {
1349 if (direction == DIRECTION_DOWN)
1355 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1357 if (r == 0 && step_back)
1366 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1382 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1388 else if (p < needle)
1394 int journal_file_move_to_entry_by_offset(
1397 direction_t direction,
1401 return generic_array_bisect(f,
1402 le64toh(f->header->entry_array_offset),
1403 le64toh(f->header->n_entries),
1411 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1418 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1422 if (le64toh(o->entry.seqnum) == needle)
1424 else if (le64toh(o->entry.seqnum) < needle)
1430 int journal_file_move_to_entry_by_seqnum(
1433 direction_t direction,
1437 return generic_array_bisect(f,
1438 le64toh(f->header->entry_array_offset),
1439 le64toh(f->header->n_entries),
1446 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1453 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1457 if (le64toh(o->entry.realtime) == needle)
1459 else if (le64toh(o->entry.realtime) < needle)
1465 int journal_file_move_to_entry_by_realtime(
1468 direction_t direction,
1472 return generic_array_bisect(f,
1473 le64toh(f->header->entry_array_offset),
1474 le64toh(f->header->n_entries),
1476 test_object_realtime,
1481 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1488 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1492 if (le64toh(o->entry.monotonic) == needle)
1494 else if (le64toh(o->entry.monotonic) < needle)
1500 int journal_file_move_to_entry_by_monotonic(
1504 direction_t direction,
1508 char t[9+32+1] = "_BOOT_ID=";
1514 sd_id128_to_string(boot_id, t + 9);
1515 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1521 return generic_array_bisect_plus_one(f,
1522 le64toh(o->data.entry_offset),
1523 le64toh(o->data.entry_array_offset),
1524 le64toh(o->data.n_entries),
1526 test_object_monotonic,
1531 int journal_file_next_entry(
1533 Object *o, uint64_t p,
1534 direction_t direction,
1535 Object **ret, uint64_t *offset) {
1541 assert(p > 0 || !o);
1543 n = le64toh(f->header->n_entries);
1548 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1550 if (o->object.type != OBJECT_ENTRY)
1553 r = generic_array_bisect(f,
1554 le64toh(f->header->entry_array_offset),
1555 le64toh(f->header->n_entries),
1564 if (direction == DIRECTION_DOWN) {
1577 /* And jump to it */
1578 return generic_array_get(f,
1579 le64toh(f->header->entry_array_offset),
1584 int journal_file_skip_entry(
1586 Object *o, uint64_t p,
1588 Object **ret, uint64_t *offset) {
1597 if (o->object.type != OBJECT_ENTRY)
1600 r = generic_array_bisect(f,
1601 le64toh(f->header->entry_array_offset),
1602 le64toh(f->header->n_entries),
1611 /* Calculate new index */
1613 if ((uint64_t) -skip >= i)
1616 i = i - (uint64_t) -skip;
1618 i += (uint64_t) skip;
1620 n = le64toh(f->header->n_entries);
1627 return generic_array_get(f,
1628 le64toh(f->header->entry_array_offset),
1633 int journal_file_next_entry_for_data(
1635 Object *o, uint64_t p,
1636 uint64_t data_offset,
1637 direction_t direction,
1638 Object **ret, uint64_t *offset) {
1645 assert(p > 0 || !o);
1647 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1651 n = le64toh(d->data.n_entries);
1656 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1658 if (o->object.type != OBJECT_ENTRY)
1661 r = generic_array_bisect_plus_one(f,
1662 le64toh(d->data.entry_offset),
1663 le64toh(d->data.entry_array_offset),
1664 le64toh(d->data.n_entries),
1674 if (direction == DIRECTION_DOWN) {
1688 return generic_array_get_plus_one(f,
1689 le64toh(d->data.entry_offset),
1690 le64toh(d->data.entry_array_offset),
1695 int journal_file_move_to_entry_by_offset_for_data(
1697 uint64_t data_offset,
1699 direction_t direction,
1700 Object **ret, uint64_t *offset) {
1707 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1711 return generic_array_bisect_plus_one(f,
1712 le64toh(d->data.entry_offset),
1713 le64toh(d->data.entry_array_offset),
1714 le64toh(d->data.n_entries),
1721 int journal_file_move_to_entry_by_monotonic_for_data(
1723 uint64_t data_offset,
1726 direction_t direction,
1727 Object **ret, uint64_t *offset) {
1729 char t[9+32+1] = "_BOOT_ID=";
1736 /* First, seek by time */
1737 sd_id128_to_string(boot_id, t + 9);
1738 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1744 r = generic_array_bisect_plus_one(f,
1745 le64toh(o->data.entry_offset),
1746 le64toh(o->data.entry_array_offset),
1747 le64toh(o->data.n_entries),
1749 test_object_monotonic,
1755 /* And now, continue seeking until we find an entry that
1756 * exists in both bisection arrays */
1762 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1766 r = generic_array_bisect_plus_one(f,
1767 le64toh(d->data.entry_offset),
1768 le64toh(d->data.entry_array_offset),
1769 le64toh(d->data.n_entries),
1777 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1781 r = generic_array_bisect_plus_one(f,
1782 le64toh(o->data.entry_offset),
1783 le64toh(o->data.entry_array_offset),
1784 le64toh(o->data.n_entries),
1808 int journal_file_move_to_entry_by_seqnum_for_data(
1810 uint64_t data_offset,
1812 direction_t direction,
1813 Object **ret, uint64_t *offset) {
1820 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1824 return generic_array_bisect_plus_one(f,
1825 le64toh(d->data.entry_offset),
1826 le64toh(d->data.entry_array_offset),
1827 le64toh(d->data.n_entries),
1834 int journal_file_move_to_entry_by_realtime_for_data(
1836 uint64_t data_offset,
1838 direction_t direction,
1839 Object **ret, uint64_t *offset) {
1846 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1850 return generic_array_bisect_plus_one(f,
1851 le64toh(d->data.entry_offset),
1852 le64toh(d->data.entry_array_offset),
1853 le64toh(d->data.n_entries),
1855 test_object_realtime,
1860 void journal_file_dump(JournalFile *f) {
1867 journal_file_print_header(f);
1869 p = le64toh(f->header->header_size);
1871 r = journal_file_move_to_object(f, -1, p, &o);
1875 switch (o->object.type) {
1878 printf("Type: OBJECT_UNUSED\n");
1882 printf("Type: OBJECT_DATA\n");
1886 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1887 (unsigned long long) le64toh(o->entry.seqnum),
1888 (unsigned long long) le64toh(o->entry.monotonic),
1889 (unsigned long long) le64toh(o->entry.realtime));
1892 case OBJECT_FIELD_HASH_TABLE:
1893 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1896 case OBJECT_DATA_HASH_TABLE:
1897 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1900 case OBJECT_ENTRY_ARRAY:
1901 printf("Type: OBJECT_ENTRY_ARRAY\n");
1905 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1906 (unsigned long long) le64toh(o->tag.seqnum),
1907 (unsigned long long) le64toh(o->tag.epoch));
1911 if (o->object.flags & OBJECT_COMPRESSED)
1912 printf("Flags: COMPRESSED\n");
1914 if (p == le64toh(f->header->tail_object_offset))
1917 p = p + ALIGN64(le64toh(o->object.size));
1922 log_error("File corrupt");
1925 void journal_file_print_header(JournalFile *f) {
1926 char a[33], b[33], c[33];
1927 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1929 char bytes[FORMAT_BYTES_MAX];
1933 printf("File Path: %s\n"
1937 "Sequential Number ID: %s\n"
1939 "Compatible Flags:%s%s\n"
1940 "Incompatible Flags:%s%s\n"
1941 "Header size: %llu\n"
1942 "Arena size: %llu\n"
1943 "Data Hash Table Size: %llu\n"
1944 "Field Hash Table Size: %llu\n"
1945 "Rotate Suggested: %s\n"
1946 "Head Sequential Number: %llu\n"
1947 "Tail Sequential Number: %llu\n"
1948 "Head Realtime Timestamp: %s\n"
1949 "Tail Realtime Timestamp: %s\n"
1951 "Entry Objects: %llu\n",
1953 sd_id128_to_string(f->header->file_id, a),
1954 sd_id128_to_string(f->header->machine_id, b),
1955 sd_id128_to_string(f->header->boot_id, c),
1956 sd_id128_to_string(f->header->seqnum_id, c),
1957 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1958 f->header->state == STATE_ONLINE ? "ONLINE" :
1959 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1960 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1961 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1962 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1963 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1964 (unsigned long long) le64toh(f->header->header_size),
1965 (unsigned long long) le64toh(f->header->arena_size),
1966 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1967 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1968 yes_no(journal_file_rotate_suggested(f)),
1969 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1970 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1971 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1972 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1973 (unsigned long long) le64toh(f->header->n_objects),
1974 (unsigned long long) le64toh(f->header->n_entries));
1976 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1977 printf("Data Objects: %llu\n"
1978 "Data Hash Table Fill: %.1f%%\n",
1979 (unsigned long long) le64toh(f->header->n_data),
1980 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1982 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1983 printf("Field Objects: %llu\n"
1984 "Field Hash Table Fill: %.1f%%\n",
1985 (unsigned long long) le64toh(f->header->n_fields),
1986 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1988 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1989 printf("Tag Objects: %llu\n",
1990 (unsigned long long) le64toh(f->header->n_tags));
1991 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1992 printf("Entry Array Objects: %llu\n",
1993 (unsigned long long) le64toh(f->header->n_entry_arrays));
1995 if (fstat(f->fd, &st) >= 0)
1996 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1999 int journal_file_open(
2005 JournalMetrics *metrics,
2006 MMapCache *mmap_cache,
2007 JournalFile *template,
2008 JournalFile **ret) {
2012 bool newly_created = false;
2016 if ((flags & O_ACCMODE) != O_RDONLY &&
2017 (flags & O_ACCMODE) != O_RDWR)
2020 if (!endswith(fname, ".journal") &&
2021 !endswith(fname, ".journal~"))
2024 f = new0(JournalFile, 1);
2032 f->prot = prot_from_flags(flags);
2033 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2035 f->compress = compress;
2042 f->mmap = mmap_cache_ref(mmap_cache);
2044 f->mmap = mmap_cache_new();
2051 f->path = strdup(fname);
2057 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2063 if (fstat(f->fd, &f->last_stat) < 0) {
2068 if (f->last_stat.st_size == 0 && f->writable) {
2069 newly_created = true;
2072 /* Try to load the FSPRG state, and if we can't, then
2073 * just don't do sealing */
2075 r = journal_file_fss_load(f);
2081 r = journal_file_init_header(f, template);
2085 if (fstat(f->fd, &f->last_stat) < 0) {
2091 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2096 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2097 if (f->header == MAP_FAILED) {
2103 if (!newly_created) {
2104 r = journal_file_verify_header(f);
2110 if (!newly_created && f->writable) {
2111 r = journal_file_fss_load(f);
2119 journal_default_metrics(metrics, f->fd);
2120 f->metrics = *metrics;
2121 } else if (template)
2122 f->metrics = template->metrics;
2124 r = journal_file_refresh_header(f);
2130 r = journal_file_hmac_setup(f);
2135 if (newly_created) {
2136 r = journal_file_setup_field_hash_table(f);
2140 r = journal_file_setup_data_hash_table(f);
2145 r = journal_file_append_first_tag(f);
2151 r = journal_file_map_field_hash_table(f);
2155 r = journal_file_map_data_hash_table(f);
2165 journal_file_close(f);
2170 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2173 JournalFile *old_file, *new_file = NULL;
2181 if (!old_file->writable)
2184 if (!endswith(old_file->path, ".journal"))
2187 l = strlen(old_file->path);
2189 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2193 memcpy(p, old_file->path, l - 8);
2195 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2196 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2197 "-%016llx-%016llx.journal",
2198 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2199 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2201 r = rename(old_file->path, p);
2207 old_file->header->state = STATE_ARCHIVED;
2209 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2210 journal_file_close(old_file);
2216 int journal_file_open_reliably(
2222 JournalMetrics *metrics,
2223 MMapCache *mmap_cache,
2224 JournalFile *template,
2225 JournalFile **ret) {
2231 r = journal_file_open(fname, flags, mode, compress, seal,
2232 metrics, mmap_cache, template, ret);
2233 if (r != -EBADMSG && /* corrupted */
2234 r != -ENODATA && /* truncated */
2235 r != -EHOSTDOWN && /* other machine */
2236 r != -EPROTONOSUPPORT && /* incompatible feature */
2237 r != -EBUSY && /* unclean shutdown */
2238 r != -ESHUTDOWN /* already archived */)
2241 if ((flags & O_ACCMODE) == O_RDONLY)
2244 if (!(flags & O_CREAT))
2247 if (!endswith(fname, ".journal"))
2250 /* The file is corrupted. Rotate it away and try it again (but only once) */
2253 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2255 (unsigned long long) now(CLOCK_REALTIME),
2259 r = rename(fname, p);
2264 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2266 return journal_file_open(fname, flags, mode, compress, seal,
2267 metrics, mmap_cache, template, ret);
2271 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2273 uint64_t q, xor_hash = 0;
2286 ts.monotonic = le64toh(o->entry.monotonic);
2287 ts.realtime = le64toh(o->entry.realtime);
2289 if (to->tail_entry_monotonic_valid &&
2290 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2293 n = journal_file_entry_n_items(o);
2294 items = alloca(sizeof(EntryItem) * n);
2296 for (i = 0; i < n; i++) {
2303 q = le64toh(o->entry.items[i].object_offset);
2304 le_hash = o->entry.items[i].hash;
2306 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2310 if (le_hash != o->data.hash)
2313 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2316 /* We hit the limit on 32bit machines */
2317 if ((uint64_t) t != l)
2320 if (o->object.flags & OBJECT_COMPRESSED) {
2324 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2327 data = from->compress_buffer;
2330 return -EPROTONOSUPPORT;
2333 data = o->data.payload;
2335 r = journal_file_append_data(to, data, l, &u, &h);
2339 xor_hash ^= le64toh(u->data.hash);
2340 items[i].object_offset = htole64(h);
2341 items[i].hash = u->data.hash;
2343 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2348 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2351 void journal_default_metrics(JournalMetrics *m, int fd) {
2352 uint64_t fs_size = 0;
2354 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2359 if (fstatvfs(fd, &ss) >= 0)
2360 fs_size = ss.f_frsize * ss.f_blocks;
2362 if (m->max_use == (uint64_t) -1) {
2365 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2367 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2368 m->max_use = DEFAULT_MAX_USE_UPPER;
2370 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2371 m->max_use = DEFAULT_MAX_USE_LOWER;
2373 m->max_use = DEFAULT_MAX_USE_LOWER;
2375 m->max_use = PAGE_ALIGN(m->max_use);
2377 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2378 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2381 if (m->max_size == (uint64_t) -1) {
2382 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2384 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2385 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2387 m->max_size = PAGE_ALIGN(m->max_size);
2389 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2390 m->max_size = JOURNAL_FILE_SIZE_MIN;
2392 if (m->max_size*2 > m->max_use)
2393 m->max_use = m->max_size*2;
2395 if (m->min_size == (uint64_t) -1)
2396 m->min_size = JOURNAL_FILE_SIZE_MIN;
2398 m->min_size = PAGE_ALIGN(m->min_size);
2400 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2401 m->min_size = JOURNAL_FILE_SIZE_MIN;
2403 if (m->min_size > m->max_size)
2404 m->max_size = m->min_size;
2407 if (m->keep_free == (uint64_t) -1) {
2410 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2412 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2413 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2416 m->keep_free = DEFAULT_KEEP_FREE;
2419 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2420 format_bytes(a, sizeof(a), m->max_use),
2421 format_bytes(b, sizeof(b), m->max_size),
2422 format_bytes(c, sizeof(c), m->min_size),
2423 format_bytes(d, sizeof(d), m->keep_free));
2426 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2431 if (f->header->head_entry_realtime == 0)
2434 *from = le64toh(f->header->head_entry_realtime);
2438 if (f->header->tail_entry_realtime == 0)
2441 *to = le64toh(f->header->tail_entry_realtime);
2447 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2448 char t[9+32+1] = "_BOOT_ID=";
2456 sd_id128_to_string(boot_id, t + 9);
2458 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2462 if (le64toh(o->data.n_entries) <= 0)
2466 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2470 *from = le64toh(o->entry.monotonic);
2474 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2478 r = generic_array_get_plus_one(f,
2479 le64toh(o->data.entry_offset),
2480 le64toh(o->data.entry_array_offset),
2481 le64toh(o->data.n_entries)-1,
2486 *to = le64toh(o->entry.monotonic);
2492 bool journal_file_rotate_suggested(JournalFile *f) {
2495 /* If we gained new header fields we gained new features,
2496 * hence suggest a rotation */
2497 if (le64toh(f->header->header_size) < sizeof(Header)) {
2498 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2502 /* Let's check if the hash tables grew over a certain fill
2503 * level (75%, borrowing this value from Java's hash table
2504 * implementation), and if so suggest a rotation. To calculate
2505 * the fill level we need the n_data field, which only exists
2506 * in newer versions. */
2508 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2509 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2510 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2512 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2513 (unsigned long long) le64toh(f->header->n_data),
2514 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2515 (unsigned long long) (f->last_stat.st_size),
2516 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2520 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2521 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2522 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2524 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2525 (unsigned long long) le64toh(f->header->n_fields),
2526 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));