1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 void journal_file_close(JournalFile *f) {
68 /* Write the final tag */
69 if (f->seal && f->writable)
70 journal_file_append_tag(f);
73 /* Sync everything to disk, before we mark the file offline */
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
77 if (f->writable && f->fd >= 0)
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
83 f->header->state = STATE_OFFLINE;
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
89 close_nointr_nofail(f->fd);
94 mmap_cache_unref(f->mmap);
97 free(f->compress_buffer);
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
109 gcry_md_close(f->hmac);
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
123 memcpy(h.signature, HEADER_SIGNATURE, 8);
124 h.header_size = htole64(ALIGN64(sizeof(h)));
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
132 r = sd_id128_randomize(&h.file_id);
137 h.seqnum_id = template->header->seqnum_id;
138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
140 h.seqnum_id = h.file_id;
142 k = pwrite(f->fd, &h, sizeof(h), 0);
152 static int journal_file_refresh_header(JournalFile *f) {
158 r = sd_id128_get_machine(&f->header->machine_id);
162 r = sd_id128_get_boot(&boot_id);
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
169 f->header->boot_id = boot_id;
171 f->header->state = STATE_ONLINE;
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
180 static int journal_file_verify_header(JournalFile *f) {
183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190 return -EPROTONOSUPPORT;
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201 return -EPROTONOSUPPORT;
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
208 if (f->header->state >= _STATE_MAX)
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
238 sd_id128_t machine_id;
241 r = sd_id128_get_machine(&machine_id);
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
248 state = f->header->state;
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
253 } else if (state == STATE_ARCHIVED)
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
264 f->seal = JOURNAL_HEADER_SEALED(f->header);
269 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
270 uint64_t old_size, new_size;
275 /* We assume that this file is not sparse, and we know that
276 * for sure, since we always call posix_fallocate()
280 le64toh(f->header->header_size) +
281 le64toh(f->header->arena_size);
283 new_size = PAGE_ALIGN(offset + size);
284 if (new_size < le64toh(f->header->header_size))
285 new_size = le64toh(f->header->header_size);
287 if (new_size <= old_size)
290 if (f->metrics.max_size > 0 &&
291 new_size > f->metrics.max_size)
294 if (new_size > f->metrics.min_size &&
295 f->metrics.keep_free > 0) {
298 if (fstatvfs(f->fd, &svfs) >= 0) {
301 available = svfs.f_bfree * svfs.f_bsize;
303 if (available >= f->metrics.keep_free)
304 available -= f->metrics.keep_free;
308 if (new_size - old_size > available)
313 /* Note that the glibc fallocate() fallback is very
314 inefficient, hence we try to minimize the allocation area
316 r = posix_fallocate(f->fd, old_size, new_size - old_size);
320 if (fstat(f->fd, &f->last_stat) < 0)
323 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
328 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
335 /* Avoid SIGBUS on invalid accesses */
336 if (offset + size > (uint64_t) f->last_stat.st_size) {
337 /* Hmm, out of range? Let's refresh the fstat() data
338 * first, before we trust that check. */
340 if (fstat(f->fd, &f->last_stat) < 0 ||
341 offset + size > (uint64_t) f->last_stat.st_size)
342 return -EADDRNOTAVAIL;
345 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
348 static uint64_t minimum_header_size(Object *o) {
350 static uint64_t table[] = {
351 [OBJECT_DATA] = sizeof(DataObject),
352 [OBJECT_FIELD] = sizeof(FieldObject),
353 [OBJECT_ENTRY] = sizeof(EntryObject),
354 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357 [OBJECT_TAG] = sizeof(TagObject),
360 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361 return sizeof(ObjectHeader);
363 return table[o->object.type];
366 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
376 /* Objects may only be located at multiple of 64 bit */
377 if (!VALID64(offset))
380 /* One context for each type, plus one catch-all for the rest */
381 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
383 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
388 s = le64toh(o->object.size);
390 if (s < sizeof(ObjectHeader))
393 if (o->object.type <= OBJECT_UNUSED)
396 if (s < minimum_header_size(o))
399 if (type >= 0 && o->object.type != type)
402 if (s > sizeof(ObjectHeader)) {
403 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
414 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
419 r = le64toh(f->header->tail_entry_seqnum) + 1;
422 /* If an external seqnum counter was passed, we update
423 * both the local and the external one, and set it to
424 * the maximum of both */
432 f->header->tail_entry_seqnum = htole64(r);
434 if (f->header->head_entry_seqnum == 0)
435 f->header->head_entry_seqnum = htole64(r);
440 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
447 assert(type > 0 && type < _OBJECT_TYPE_MAX);
448 assert(size >= sizeof(ObjectHeader));
452 p = le64toh(f->header->tail_object_offset);
454 p = le64toh(f->header->header_size);
456 r = journal_file_move_to_object(f, -1, p, &tail);
460 p += ALIGN64(le64toh(tail->object.size));
463 r = journal_file_allocate(f, p, size);
467 r = journal_file_move_to(f, type, false, p, size, &t);
474 o->object.type = type;
475 o->object.size = htole64(size);
477 f->header->tail_object_offset = htole64(p);
478 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
486 static int journal_file_setup_data_hash_table(JournalFile *f) {
493 /* We estimate that we need 1 hash table entry per 768 of
494 journal file and we want to make sure we never get beyond
495 75% fill level. Calculate the hash table size for the
496 maximum file size based on these metrics. */
498 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
499 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500 s = DEFAULT_DATA_HASH_TABLE_SIZE;
502 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
511 memset(o->hash_table.items, 0, s);
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
519 static int journal_file_setup_field_hash_table(JournalFile *f) {
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
534 memset(o->hash_table.items, 0, s);
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
542 static int journal_file_map_data_hash_table(JournalFile *f) {
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
552 r = journal_file_move_to(f,
553 OBJECT_DATA_HASH_TABLE,
560 f->data_hash_table = t;
564 static int journal_file_map_field_hash_table(JournalFile *f) {
571 p = le64toh(f->header->field_hash_table_offset);
572 s = le64toh(f->header->field_hash_table_size);
574 r = journal_file_move_to(f,
575 OBJECT_FIELD_HASH_TABLE,
582 f->field_hash_table = t;
586 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
594 if (o->object.type != OBJECT_DATA)
597 /* This might alter the window we are looking at */
599 o->data.next_hash_offset = o->data.next_field_offset = 0;
600 o->data.entry_offset = o->data.entry_array_offset = 0;
601 o->data.n_entries = 0;
603 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
604 p = le64toh(f->data_hash_table[h].tail_hash_offset);
606 /* Only entry in the hash table is easy */
607 f->data_hash_table[h].head_hash_offset = htole64(offset);
609 /* Move back to the previous data object, to patch in
612 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
616 o->data.next_hash_offset = htole64(offset);
619 f->data_hash_table[h].tail_hash_offset = htole64(offset);
621 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
622 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
627 int journal_file_find_data_object_with_hash(
629 const void *data, uint64_t size, uint64_t hash,
630 Object **ret, uint64_t *offset) {
632 uint64_t p, osize, h;
636 assert(data || size == 0);
638 osize = offsetof(Object, data.payload) + size;
640 if (f->header->data_hash_table_size == 0)
643 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
644 p = le64toh(f->data_hash_table[h].head_hash_offset);
649 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
653 if (le64toh(o->data.hash) != hash)
656 if (o->object.flags & OBJECT_COMPRESSED) {
660 l = le64toh(o->object.size);
661 if (l <= offsetof(Object, data.payload))
664 l -= offsetof(Object, data.payload);
666 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
670 memcmp(f->compress_buffer, data, size) == 0) {
681 return -EPROTONOSUPPORT;
684 } else if (le64toh(o->object.size) == osize &&
685 memcmp(o->data.payload, data, size) == 0) {
697 p = le64toh(o->data.next_hash_offset);
703 int journal_file_find_data_object(
705 const void *data, uint64_t size,
706 Object **ret, uint64_t *offset) {
711 assert(data || size == 0);
713 hash = hash64(data, size);
715 return journal_file_find_data_object_with_hash(f,
720 static int journal_file_append_data(
722 const void *data, uint64_t size,
723 Object **ret, uint64_t *offset) {
729 bool compressed = false;
732 assert(data || size == 0);
734 hash = hash64(data, size);
736 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
750 osize = offsetof(Object, data.payload) + size;
751 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
755 o->data.hash = htole64(hash);
759 size >= COMPRESSION_SIZE_THRESHOLD) {
762 compressed = compress_blob(data, size, o->data.payload, &rsize);
765 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
766 o->object.flags |= OBJECT_COMPRESSED;
768 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
773 if (!compressed && size > 0)
774 memcpy(o->data.payload, data, size);
776 r = journal_file_link_data(f, o, p, hash);
780 /* The linking might have altered the window, so let's
781 * refresh our pointer */
782 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
787 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
801 uint64_t journal_file_entry_n_items(Object *o) {
804 if (o->object.type != OBJECT_ENTRY)
807 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
810 uint64_t journal_file_entry_array_n_items(Object *o) {
813 if (o->object.type != OBJECT_ENTRY_ARRAY)
816 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
819 uint64_t journal_file_hash_table_n_items(Object *o) {
822 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
823 o->object.type != OBJECT_FIELD_HASH_TABLE)
826 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
829 static int link_entry_into_array(JournalFile *f,
834 uint64_t n = 0, ap = 0, q, i, a, hidx;
843 i = hidx = le64toh(*idx);
846 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
850 n = journal_file_entry_array_n_items(o);
852 o->entry_array.items[i] = htole64(p);
853 *idx = htole64(hidx + 1);
859 a = le64toh(o->entry_array.next_entry_array_offset);
870 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
871 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
877 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
882 o->entry_array.items[i] = htole64(p);
887 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
891 o->entry_array.next_entry_array_offset = htole64(q);
894 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
895 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
897 *idx = htole64(hidx + 1);
902 static int link_entry_into_array_plus_one(JournalFile *f,
921 i = htole64(le64toh(*idx) - 1);
922 r = link_entry_into_array(f, first, &i, p);
927 *idx = htole64(le64toh(*idx) + 1);
931 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
938 p = le64toh(o->entry.items[i].object_offset);
942 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
946 return link_entry_into_array_plus_one(f,
947 &o->data.entry_offset,
948 &o->data.entry_array_offset,
953 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
961 if (o->object.type != OBJECT_ENTRY)
964 __sync_synchronize();
966 /* Link up the entry itself */
967 r = link_entry_into_array(f,
968 &f->header->entry_array_offset,
969 &f->header->n_entries,
974 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
976 if (f->header->head_entry_realtime == 0)
977 f->header->head_entry_realtime = o->entry.realtime;
979 f->header->tail_entry_realtime = o->entry.realtime;
980 f->header->tail_entry_monotonic = o->entry.monotonic;
982 f->tail_entry_monotonic_valid = true;
984 /* Link up the items */
985 n = journal_file_entry_n_items(o);
986 for (i = 0; i < n; i++) {
987 r = journal_file_link_entry_item(f, o, offset, i);
995 static int journal_file_append_entry_internal(
997 const dual_timestamp *ts,
999 const EntryItem items[], unsigned n_items,
1001 Object **ret, uint64_t *offset) {
1008 assert(items || n_items == 0);
1011 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1013 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1017 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1018 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1019 o->entry.realtime = htole64(ts->realtime);
1020 o->entry.monotonic = htole64(ts->monotonic);
1021 o->entry.xor_hash = htole64(xor_hash);
1022 o->entry.boot_id = f->header->boot_id;
1025 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1030 r = journal_file_link_entry(f, o, np);
1043 void journal_file_post_change(JournalFile *f) {
1046 /* inotify() does not receive IN_MODIFY events from file
1047 * accesses done via mmap(). After each access we hence
1048 * trigger IN_MODIFY by truncating the journal file to its
1049 * current size which triggers IN_MODIFY. */
1051 __sync_synchronize();
1053 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1054 log_error("Failed to truncate file to its own size: %m");
1057 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1061 uint64_t xor_hash = 0;
1062 struct dual_timestamp _ts;
1065 assert(iovec || n_iovec == 0);
1071 dual_timestamp_get(&_ts);
1075 if (f->tail_entry_monotonic_valid &&
1076 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1080 r = journal_file_maybe_append_tag(f, ts->realtime);
1085 /* alloca() can't take 0, hence let's allocate at least one */
1086 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1088 for (i = 0; i < n_iovec; i++) {
1092 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1096 xor_hash ^= le64toh(o->data.hash);
1097 items[i].object_offset = htole64(p);
1098 items[i].hash = o->data.hash;
1101 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1103 journal_file_post_change(f);
1108 static int generic_array_get(JournalFile *f,
1111 Object **ret, uint64_t *offset) {
1123 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1127 n = journal_file_entry_array_n_items(o);
1129 p = le64toh(o->entry_array.items[i]);
1134 a = le64toh(o->entry_array.next_entry_array_offset);
1137 if (a <= 0 || p <= 0)
1140 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1153 static int generic_array_get_plus_one(JournalFile *f,
1157 Object **ret, uint64_t *offset) {
1166 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1179 return generic_array_get(f, first, i-1, ret, offset);
1188 static int generic_array_bisect(JournalFile *f,
1192 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1193 direction_t direction,
1198 uint64_t a, p, t = 0, i = 0, last_p = 0;
1199 bool subtract_one = false;
1200 Object *o, *array = NULL;
1204 assert(test_object);
1208 uint64_t left, right, k, lp;
1210 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1214 k = journal_file_entry_array_n_items(array);
1220 lp = p = le64toh(array->entry_array.items[i]);
1224 r = test_object(f, p, needle);
1228 if (r == TEST_FOUND)
1229 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1231 if (r == TEST_RIGHT) {
1235 if (left == right) {
1236 if (direction == DIRECTION_UP)
1237 subtract_one = true;
1243 assert(left < right);
1245 i = (left + right) / 2;
1246 p = le64toh(array->entry_array.items[i]);
1250 r = test_object(f, p, needle);
1254 if (r == TEST_FOUND)
1255 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1257 if (r == TEST_RIGHT)
1265 if (direction == DIRECTION_UP) {
1267 subtract_one = true;
1278 a = le64toh(array->entry_array.next_entry_array_offset);
1284 if (subtract_one && t == 0 && i == 0)
1287 if (subtract_one && i == 0)
1289 else if (subtract_one)
1290 p = le64toh(array->entry_array.items[i-1]);
1292 p = le64toh(array->entry_array.items[i]);
1294 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1305 *idx = t + i + (subtract_one ? -1 : 0);
1310 static int generic_array_bisect_plus_one(JournalFile *f,
1315 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1316 direction_t direction,
1322 bool step_back = false;
1326 assert(test_object);
1331 /* This bisects the array in object 'first', but first checks
1333 r = test_object(f, extra, needle);
1337 if (r == TEST_FOUND)
1338 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1340 /* if we are looking with DIRECTION_UP then we need to first
1341 see if in the actual array there is a matching entry, and
1342 return the last one of that. But if there isn't any we need
1343 to return this one. Hence remember this, and return it
1346 step_back = direction == DIRECTION_UP;
1348 if (r == TEST_RIGHT) {
1349 if (direction == DIRECTION_DOWN)
1355 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1357 if (r == 0 && step_back)
1366 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1382 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1388 else if (p < needle)
1394 int journal_file_move_to_entry_by_offset(
1397 direction_t direction,
1401 return generic_array_bisect(f,
1402 le64toh(f->header->entry_array_offset),
1403 le64toh(f->header->n_entries),
1411 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1418 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1422 if (le64toh(o->entry.seqnum) == needle)
1424 else if (le64toh(o->entry.seqnum) < needle)
1430 int journal_file_move_to_entry_by_seqnum(
1433 direction_t direction,
1437 return generic_array_bisect(f,
1438 le64toh(f->header->entry_array_offset),
1439 le64toh(f->header->n_entries),
1446 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1453 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1457 if (le64toh(o->entry.realtime) == needle)
1459 else if (le64toh(o->entry.realtime) < needle)
1465 int journal_file_move_to_entry_by_realtime(
1468 direction_t direction,
1472 return generic_array_bisect(f,
1473 le64toh(f->header->entry_array_offset),
1474 le64toh(f->header->n_entries),
1476 test_object_realtime,
1481 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1488 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1492 if (le64toh(o->entry.monotonic) == needle)
1494 else if (le64toh(o->entry.monotonic) < needle)
1500 int journal_file_move_to_entry_by_monotonic(
1504 direction_t direction,
1508 char t[9+32+1] = "_BOOT_ID=";
1514 sd_id128_to_string(boot_id, t + 9);
1515 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1521 return generic_array_bisect_plus_one(f,
1522 le64toh(o->data.entry_offset),
1523 le64toh(o->data.entry_array_offset),
1524 le64toh(o->data.n_entries),
1526 test_object_monotonic,
1531 int journal_file_next_entry(
1533 Object *o, uint64_t p,
1534 direction_t direction,
1535 Object **ret, uint64_t *offset) {
1541 assert(p > 0 || !o);
1543 n = le64toh(f->header->n_entries);
1548 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1550 if (o->object.type != OBJECT_ENTRY)
1553 r = generic_array_bisect(f,
1554 le64toh(f->header->entry_array_offset),
1555 le64toh(f->header->n_entries),
1564 if (direction == DIRECTION_DOWN) {
1577 /* And jump to it */
1578 return generic_array_get(f,
1579 le64toh(f->header->entry_array_offset),
1584 int journal_file_skip_entry(
1586 Object *o, uint64_t p,
1588 Object **ret, uint64_t *offset) {
1597 if (o->object.type != OBJECT_ENTRY)
1600 r = generic_array_bisect(f,
1601 le64toh(f->header->entry_array_offset),
1602 le64toh(f->header->n_entries),
1611 /* Calculate new index */
1613 if ((uint64_t) -skip >= i)
1616 i = i - (uint64_t) -skip;
1618 i += (uint64_t) skip;
1620 n = le64toh(f->header->n_entries);
1627 return generic_array_get(f,
1628 le64toh(f->header->entry_array_offset),
1633 int journal_file_next_entry_for_data(
1635 Object *o, uint64_t p,
1636 uint64_t data_offset,
1637 direction_t direction,
1638 Object **ret, uint64_t *offset) {
1645 assert(p > 0 || !o);
1647 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1651 n = le64toh(d->data.n_entries);
1656 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1658 if (o->object.type != OBJECT_ENTRY)
1661 r = generic_array_bisect_plus_one(f,
1662 le64toh(d->data.entry_offset),
1663 le64toh(d->data.entry_array_offset),
1664 le64toh(d->data.n_entries),
1674 if (direction == DIRECTION_DOWN) {
1688 return generic_array_get_plus_one(f,
1689 le64toh(d->data.entry_offset),
1690 le64toh(d->data.entry_array_offset),
1695 int journal_file_move_to_entry_by_offset_for_data(
1697 uint64_t data_offset,
1699 direction_t direction,
1700 Object **ret, uint64_t *offset) {
1707 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1711 return generic_array_bisect_plus_one(f,
1712 le64toh(d->data.entry_offset),
1713 le64toh(d->data.entry_array_offset),
1714 le64toh(d->data.n_entries),
1721 int journal_file_move_to_entry_by_monotonic_for_data(
1723 uint64_t data_offset,
1726 direction_t direction,
1727 Object **ret, uint64_t *offset) {
1729 char t[9+32+1] = "_BOOT_ID=";
1736 /* First, seek by time */
1737 sd_id128_to_string(boot_id, t + 9);
1738 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1744 r = generic_array_bisect_plus_one(f,
1745 le64toh(o->data.entry_offset),
1746 le64toh(o->data.entry_array_offset),
1747 le64toh(o->data.n_entries),
1749 test_object_monotonic,
1755 /* And now, continue seeking until we find an entry that
1756 * exists in both bisection arrays */
1762 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1766 r = generic_array_bisect_plus_one(f,
1767 le64toh(d->data.entry_offset),
1768 le64toh(d->data.entry_array_offset),
1769 le64toh(d->data.n_entries),
1777 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1781 r = generic_array_bisect_plus_one(f,
1782 le64toh(o->data.entry_offset),
1783 le64toh(o->data.entry_array_offset),
1784 le64toh(o->data.n_entries),
1808 int journal_file_move_to_entry_by_seqnum_for_data(
1810 uint64_t data_offset,
1812 direction_t direction,
1813 Object **ret, uint64_t *offset) {
1820 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1824 return generic_array_bisect_plus_one(f,
1825 le64toh(d->data.entry_offset),
1826 le64toh(d->data.entry_array_offset),
1827 le64toh(d->data.n_entries),
1834 int journal_file_move_to_entry_by_realtime_for_data(
1836 uint64_t data_offset,
1838 direction_t direction,
1839 Object **ret, uint64_t *offset) {
1846 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1850 return generic_array_bisect_plus_one(f,
1851 le64toh(d->data.entry_offset),
1852 le64toh(d->data.entry_array_offset),
1853 le64toh(d->data.n_entries),
1855 test_object_realtime,
1860 void journal_file_dump(JournalFile *f) {
1867 journal_file_print_header(f);
1869 p = le64toh(f->header->header_size);
1871 r = journal_file_move_to_object(f, -1, p, &o);
1875 switch (o->object.type) {
1878 printf("Type: OBJECT_UNUSED\n");
1882 printf("Type: OBJECT_DATA\n");
1886 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1887 (unsigned long long) le64toh(o->entry.seqnum),
1888 (unsigned long long) le64toh(o->entry.monotonic),
1889 (unsigned long long) le64toh(o->entry.realtime));
1892 case OBJECT_FIELD_HASH_TABLE:
1893 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1896 case OBJECT_DATA_HASH_TABLE:
1897 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1900 case OBJECT_ENTRY_ARRAY:
1901 printf("Type: OBJECT_ENTRY_ARRAY\n");
1905 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1906 (unsigned long long) le64toh(o->tag.seqnum),
1907 (unsigned long long) le64toh(o->tag.epoch));
1911 if (o->object.flags & OBJECT_COMPRESSED)
1912 printf("Flags: COMPRESSED\n");
1914 if (p == le64toh(f->header->tail_object_offset))
1917 p = p + ALIGN64(le64toh(o->object.size));
1922 log_error("File corrupt");
1925 void journal_file_print_header(JournalFile *f) {
1926 char a[33], b[33], c[33];
1927 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1929 char bytes[FORMAT_BYTES_MAX];
1933 printf("File Path: %s\n"
1937 "Sequential Number ID: %s\n"
1939 "Compatible Flags:%s%s\n"
1940 "Incompatible Flags:%s%s\n"
1941 "Header size: %llu\n"
1942 "Arena size: %llu\n"
1943 "Data Hash Table Size: %llu\n"
1944 "Field Hash Table Size: %llu\n"
1945 "Rotate Suggested: %s\n"
1946 "Head Sequential Number: %llu\n"
1947 "Tail Sequential Number: %llu\n"
1948 "Head Realtime Timestamp: %s\n"
1949 "Tail Realtime Timestamp: %s\n"
1951 "Entry Objects: %llu\n",
1953 sd_id128_to_string(f->header->file_id, a),
1954 sd_id128_to_string(f->header->machine_id, b),
1955 sd_id128_to_string(f->header->boot_id, c),
1956 sd_id128_to_string(f->header->seqnum_id, c),
1957 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1958 f->header->state == STATE_ONLINE ? "ONLINE" :
1959 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1960 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1961 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1962 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1963 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1964 (unsigned long long) le64toh(f->header->header_size),
1965 (unsigned long long) le64toh(f->header->arena_size),
1966 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1967 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1968 yes_no(journal_file_rotate_suggested(f)),
1969 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1970 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1971 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1972 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1973 (unsigned long long) le64toh(f->header->n_objects),
1974 (unsigned long long) le64toh(f->header->n_entries));
1976 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1977 printf("Data Objects: %llu\n"
1978 "Data Hash Table Fill: %.1f%%\n",
1979 (unsigned long long) le64toh(f->header->n_data),
1980 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1982 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1983 printf("Field Objects: %llu\n"
1984 "Field Hash Table Fill: %.1f%%\n",
1985 (unsigned long long) le64toh(f->header->n_fields),
1986 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1988 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1989 printf("Tag Objects: %llu\n",
1990 (unsigned long long) le64toh(f->header->n_tags));
1991 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1992 printf("Entry Array Objects: %llu\n",
1993 (unsigned long long) le64toh(f->header->n_entry_arrays));
1995 if (fstat(f->fd, &st) >= 0)
1996 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1999 int journal_file_open(
2005 JournalMetrics *metrics,
2006 MMapCache *mmap_cache,
2007 JournalFile *template,
2008 JournalFile **ret) {
2012 bool newly_created = false;
2017 if ((flags & O_ACCMODE) != O_RDONLY &&
2018 (flags & O_ACCMODE) != O_RDWR)
2021 if (!endswith(fname, ".journal") &&
2022 !endswith(fname, ".journal~"))
2025 f = new0(JournalFile, 1);
2033 f->prot = prot_from_flags(flags);
2034 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2036 f->compress = compress;
2043 f->mmap = mmap_cache_ref(mmap_cache);
2045 f->mmap = mmap_cache_new();
2052 f->path = strdup(fname);
2058 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2064 if (fstat(f->fd, &f->last_stat) < 0) {
2069 if (f->last_stat.st_size == 0 && f->writable) {
2070 newly_created = true;
2073 /* Try to load the FSPRG state, and if we can't, then
2074 * just don't do sealing */
2076 r = journal_file_fss_load(f);
2082 r = journal_file_init_header(f, template);
2086 if (fstat(f->fd, &f->last_stat) < 0) {
2092 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2097 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2098 if (f->header == MAP_FAILED) {
2104 if (!newly_created) {
2105 r = journal_file_verify_header(f);
2111 if (!newly_created && f->writable) {
2112 r = journal_file_fss_load(f);
2120 journal_default_metrics(metrics, f->fd);
2121 f->metrics = *metrics;
2122 } else if (template)
2123 f->metrics = template->metrics;
2125 r = journal_file_refresh_header(f);
2131 r = journal_file_hmac_setup(f);
2136 if (newly_created) {
2137 r = journal_file_setup_field_hash_table(f);
2141 r = journal_file_setup_data_hash_table(f);
2146 r = journal_file_append_first_tag(f);
2152 r = journal_file_map_field_hash_table(f);
2156 r = journal_file_map_data_hash_table(f);
2164 journal_file_close(f);
2169 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2172 JournalFile *old_file, *new_file = NULL;
2180 if (!old_file->writable)
2183 if (!endswith(old_file->path, ".journal"))
2186 l = strlen(old_file->path);
2188 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2192 memcpy(p, old_file->path, l - 8);
2194 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2195 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2196 "-%016llx-%016llx.journal",
2197 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2198 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2200 r = rename(old_file->path, p);
2206 old_file->header->state = STATE_ARCHIVED;
2208 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2209 journal_file_close(old_file);
2215 int journal_file_open_reliably(
2221 JournalMetrics *metrics,
2222 MMapCache *mmap_cache,
2223 JournalFile *template,
2224 JournalFile **ret) {
2230 r = journal_file_open(fname, flags, mode, compress, seal,
2231 metrics, mmap_cache, template, ret);
2232 if (r != -EBADMSG && /* corrupted */
2233 r != -ENODATA && /* truncated */
2234 r != -EHOSTDOWN && /* other machine */
2235 r != -EPROTONOSUPPORT && /* incompatible feature */
2236 r != -EBUSY && /* unclean shutdown */
2237 r != -ESHUTDOWN /* already archived */)
2240 if ((flags & O_ACCMODE) == O_RDONLY)
2243 if (!(flags & O_CREAT))
2246 if (!endswith(fname, ".journal"))
2249 /* The file is corrupted. Rotate it away and try it again (but only once) */
2252 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2254 (unsigned long long) now(CLOCK_REALTIME),
2258 r = rename(fname, p);
2263 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2265 return journal_file_open(fname, flags, mode, compress, seal,
2266 metrics, mmap_cache, template, ret);
2270 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2272 uint64_t q, xor_hash = 0;
2285 ts.monotonic = le64toh(o->entry.monotonic);
2286 ts.realtime = le64toh(o->entry.realtime);
2288 if (to->tail_entry_monotonic_valid &&
2289 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2292 n = journal_file_entry_n_items(o);
2293 items = alloca(sizeof(EntryItem) * n);
2295 for (i = 0; i < n; i++) {
2302 q = le64toh(o->entry.items[i].object_offset);
2303 le_hash = o->entry.items[i].hash;
2305 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2309 if (le_hash != o->data.hash)
2312 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2315 /* We hit the limit on 32bit machines */
2316 if ((uint64_t) t != l)
2319 if (o->object.flags & OBJECT_COMPRESSED) {
2323 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2326 data = from->compress_buffer;
2329 return -EPROTONOSUPPORT;
2332 data = o->data.payload;
2334 r = journal_file_append_data(to, data, l, &u, &h);
2338 xor_hash ^= le64toh(u->data.hash);
2339 items[i].object_offset = htole64(h);
2340 items[i].hash = u->data.hash;
2342 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2347 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2350 void journal_default_metrics(JournalMetrics *m, int fd) {
2351 uint64_t fs_size = 0;
2353 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2358 if (fstatvfs(fd, &ss) >= 0)
2359 fs_size = ss.f_frsize * ss.f_blocks;
2361 if (m->max_use == (uint64_t) -1) {
2364 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2366 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2367 m->max_use = DEFAULT_MAX_USE_UPPER;
2369 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2370 m->max_use = DEFAULT_MAX_USE_LOWER;
2372 m->max_use = DEFAULT_MAX_USE_LOWER;
2374 m->max_use = PAGE_ALIGN(m->max_use);
2376 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2377 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2380 if (m->max_size == (uint64_t) -1) {
2381 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2383 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2384 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2386 m->max_size = PAGE_ALIGN(m->max_size);
2388 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2389 m->max_size = JOURNAL_FILE_SIZE_MIN;
2391 if (m->max_size*2 > m->max_use)
2392 m->max_use = m->max_size*2;
2394 if (m->min_size == (uint64_t) -1)
2395 m->min_size = JOURNAL_FILE_SIZE_MIN;
2397 m->min_size = PAGE_ALIGN(m->min_size);
2399 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2400 m->min_size = JOURNAL_FILE_SIZE_MIN;
2402 if (m->min_size > m->max_size)
2403 m->max_size = m->min_size;
2406 if (m->keep_free == (uint64_t) -1) {
2409 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2411 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2412 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2415 m->keep_free = DEFAULT_KEEP_FREE;
2418 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2419 format_bytes(a, sizeof(a), m->max_use),
2420 format_bytes(b, sizeof(b), m->max_size),
2421 format_bytes(c, sizeof(c), m->min_size),
2422 format_bytes(d, sizeof(d), m->keep_free));
2425 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2430 if (f->header->head_entry_realtime == 0)
2433 *from = le64toh(f->header->head_entry_realtime);
2437 if (f->header->tail_entry_realtime == 0)
2440 *to = le64toh(f->header->tail_entry_realtime);
2446 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2447 char t[9+32+1] = "_BOOT_ID=";
2455 sd_id128_to_string(boot_id, t + 9);
2457 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2461 if (le64toh(o->data.n_entries) <= 0)
2465 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2469 *from = le64toh(o->entry.monotonic);
2473 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2477 r = generic_array_get_plus_one(f,
2478 le64toh(o->data.entry_offset),
2479 le64toh(o->data.entry_array_offset),
2480 le64toh(o->data.n_entries)-1,
2485 *to = le64toh(o->entry.monotonic);
2491 bool journal_file_rotate_suggested(JournalFile *f) {
2494 /* If we gained new header fields we gained new features,
2495 * hence suggest a rotation */
2496 if (le64toh(f->header->header_size) < sizeof(Header)) {
2497 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2501 /* Let's check if the hash tables grew over a certain fill
2502 * level (75%, borrowing this value from Java's hash table
2503 * implementation), and if so suggest a rotation. To calculate
2504 * the fill level we need the n_data field, which only exists
2505 * in newer versions. */
2507 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2508 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2509 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2511 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2512 (unsigned long long) le64toh(f->header->n_data),
2513 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2514 (unsigned long long) (f->last_stat.st_size),
2515 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2519 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2520 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2521 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2523 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2524 (unsigned long long) le64toh(f->header->n_fields),
2525 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));