1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 void journal_file_close(JournalFile *f) {
68 /* Write the final tag */
69 if (f->seal && f->writable)
70 journal_file_append_tag(f);
73 /* Sync everything to disk, before we mark the file offline */
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
77 if (f->writable && f->fd >= 0)
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
83 f->header->state = STATE_OFFLINE;
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
89 close_nointr_nofail(f->fd);
94 mmap_cache_unref(f->mmap);
97 free(f->compress_buffer);
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
109 gcry_md_close(f->hmac);
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
123 memcpy(h.signature, HEADER_SIGNATURE, 8);
124 h.header_size = htole64(ALIGN64(sizeof(h)));
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
132 r = sd_id128_randomize(&h.file_id);
137 h.seqnum_id = template->header->seqnum_id;
138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
140 h.seqnum_id = h.file_id;
142 k = pwrite(f->fd, &h, sizeof(h), 0);
152 static int journal_file_refresh_header(JournalFile *f) {
158 r = sd_id128_get_machine(&f->header->machine_id);
162 r = sd_id128_get_boot(&boot_id);
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
169 f->header->boot_id = boot_id;
171 f->header->state = STATE_ONLINE;
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
180 static int journal_file_verify_header(JournalFile *f) {
183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190 return -EPROTONOSUPPORT;
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201 return -EPROTONOSUPPORT;
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
208 if (f->header->state >= _STATE_MAX)
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
238 sd_id128_t machine_id;
241 r = sd_id128_get_machine(&machine_id);
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
248 state = f->header->state;
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
253 } else if (state == STATE_ARCHIVED)
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
263 f->seal = JOURNAL_HEADER_SEALED(f->header);
268 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
269 uint64_t old_size, new_size;
274 /* We assume that this file is not sparse, and we know that
275 * for sure, since we always call posix_fallocate()
279 le64toh(f->header->header_size) +
280 le64toh(f->header->arena_size);
282 new_size = PAGE_ALIGN(offset + size);
283 if (new_size < le64toh(f->header->header_size))
284 new_size = le64toh(f->header->header_size);
286 if (new_size <= old_size)
289 if (f->metrics.max_size > 0 &&
290 new_size > f->metrics.max_size)
293 if (new_size > f->metrics.min_size &&
294 f->metrics.keep_free > 0) {
297 if (fstatvfs(f->fd, &svfs) >= 0) {
300 available = svfs.f_bfree * svfs.f_bsize;
302 if (available >= f->metrics.keep_free)
303 available -= f->metrics.keep_free;
307 if (new_size - old_size > available)
312 /* Note that the glibc fallocate() fallback is very
313 inefficient, hence we try to minimize the allocation area
315 r = posix_fallocate(f->fd, old_size, new_size - old_size);
319 if (fstat(f->fd, &f->last_stat) < 0)
322 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
327 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
334 /* Avoid SIGBUS on invalid accesses */
335 if (offset + size > (uint64_t) f->last_stat.st_size) {
336 /* Hmm, out of range? Let's refresh the fstat() data
337 * first, before we trust that check. */
339 if (fstat(f->fd, &f->last_stat) < 0 ||
340 offset + size > (uint64_t) f->last_stat.st_size)
341 return -EADDRNOTAVAIL;
344 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
347 static uint64_t minimum_header_size(Object *o) {
349 static uint64_t table[] = {
350 [OBJECT_DATA] = sizeof(DataObject),
351 [OBJECT_FIELD] = sizeof(FieldObject),
352 [OBJECT_ENTRY] = sizeof(EntryObject),
353 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
354 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
356 [OBJECT_TAG] = sizeof(TagObject),
359 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
360 return sizeof(ObjectHeader);
362 return table[o->object.type];
365 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
375 /* Objects may only be located at multiple of 64 bit */
376 if (!VALID64(offset))
379 /* One context for each type, plus one catch-all for the rest */
380 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
382 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
387 s = le64toh(o->object.size);
389 if (s < sizeof(ObjectHeader))
392 if (o->object.type <= OBJECT_UNUSED)
395 if (s < minimum_header_size(o))
398 if (type >= 0 && o->object.type != type)
401 if (s > sizeof(ObjectHeader)) {
402 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
413 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
418 r = le64toh(f->header->tail_entry_seqnum) + 1;
421 /* If an external seqnum counter was passed, we update
422 * both the local and the external one, and set it to
423 * the maximum of both */
431 f->header->tail_entry_seqnum = htole64(r);
433 if (f->header->head_entry_seqnum == 0)
434 f->header->head_entry_seqnum = htole64(r);
439 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
446 assert(type > 0 && type < _OBJECT_TYPE_MAX);
447 assert(size >= sizeof(ObjectHeader));
451 p = le64toh(f->header->tail_object_offset);
453 p = le64toh(f->header->header_size);
455 r = journal_file_move_to_object(f, -1, p, &tail);
459 p += ALIGN64(le64toh(tail->object.size));
462 r = journal_file_allocate(f, p, size);
466 r = journal_file_move_to(f, type, false, p, size, &t);
473 o->object.type = type;
474 o->object.size = htole64(size);
476 f->header->tail_object_offset = htole64(p);
477 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
485 static int journal_file_setup_data_hash_table(JournalFile *f) {
492 /* We estimate that we need 1 hash table entry per 768 of
493 journal file and we want to make sure we never get beyond
494 75% fill level. Calculate the hash table size for the
495 maximum file size based on these metrics. */
497 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
498 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
499 s = DEFAULT_DATA_HASH_TABLE_SIZE;
501 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
503 r = journal_file_append_object(f,
504 OBJECT_DATA_HASH_TABLE,
505 offsetof(Object, hash_table.items) + s,
510 memset(o->hash_table.items, 0, s);
512 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
513 f->header->data_hash_table_size = htole64(s);
518 static int journal_file_setup_field_hash_table(JournalFile *f) {
525 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
526 r = journal_file_append_object(f,
527 OBJECT_FIELD_HASH_TABLE,
528 offsetof(Object, hash_table.items) + s,
533 memset(o->hash_table.items, 0, s);
535 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
536 f->header->field_hash_table_size = htole64(s);
541 static int journal_file_map_data_hash_table(JournalFile *f) {
548 p = le64toh(f->header->data_hash_table_offset);
549 s = le64toh(f->header->data_hash_table_size);
551 r = journal_file_move_to(f,
552 OBJECT_DATA_HASH_TABLE,
559 f->data_hash_table = t;
563 static int journal_file_map_field_hash_table(JournalFile *f) {
570 p = le64toh(f->header->field_hash_table_offset);
571 s = le64toh(f->header->field_hash_table_size);
573 r = journal_file_move_to(f,
574 OBJECT_FIELD_HASH_TABLE,
581 f->field_hash_table = t;
585 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
593 if (o->object.type != OBJECT_DATA)
596 /* This might alter the window we are looking at */
598 o->data.next_hash_offset = o->data.next_field_offset = 0;
599 o->data.entry_offset = o->data.entry_array_offset = 0;
600 o->data.n_entries = 0;
602 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
603 p = le64toh(f->data_hash_table[h].tail_hash_offset);
605 /* Only entry in the hash table is easy */
606 f->data_hash_table[h].head_hash_offset = htole64(offset);
608 /* Move back to the previous data object, to patch in
611 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
615 o->data.next_hash_offset = htole64(offset);
618 f->data_hash_table[h].tail_hash_offset = htole64(offset);
620 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
621 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
626 int journal_file_find_data_object_with_hash(
628 const void *data, uint64_t size, uint64_t hash,
629 Object **ret, uint64_t *offset) {
631 uint64_t p, osize, h;
635 assert(data || size == 0);
637 osize = offsetof(Object, data.payload) + size;
639 if (f->header->data_hash_table_size == 0)
642 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
643 p = le64toh(f->data_hash_table[h].head_hash_offset);
648 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
652 if (le64toh(o->data.hash) != hash)
655 if (o->object.flags & OBJECT_COMPRESSED) {
659 l = le64toh(o->object.size);
660 if (l <= offsetof(Object, data.payload))
663 l -= offsetof(Object, data.payload);
665 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
669 memcmp(f->compress_buffer, data, size) == 0) {
680 return -EPROTONOSUPPORT;
683 } else if (le64toh(o->object.size) == osize &&
684 memcmp(o->data.payload, data, size) == 0) {
696 p = le64toh(o->data.next_hash_offset);
702 int journal_file_find_data_object(
704 const void *data, uint64_t size,
705 Object **ret, uint64_t *offset) {
710 assert(data || size == 0);
712 hash = hash64(data, size);
714 return journal_file_find_data_object_with_hash(f,
719 static int journal_file_append_data(
721 const void *data, uint64_t size,
722 Object **ret, uint64_t *offset) {
728 bool compressed = false;
731 assert(data || size == 0);
733 hash = hash64(data, size);
735 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
749 osize = offsetof(Object, data.payload) + size;
750 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
754 o->data.hash = htole64(hash);
758 size >= COMPRESSION_SIZE_THRESHOLD) {
761 compressed = compress_blob(data, size, o->data.payload, &rsize);
764 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
765 o->object.flags |= OBJECT_COMPRESSED;
767 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
772 if (!compressed && size > 0)
773 memcpy(o->data.payload, data, size);
775 r = journal_file_link_data(f, o, p, hash);
779 /* The linking might have altered the window, so let's
780 * refresh our pointer */
781 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
786 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
800 uint64_t journal_file_entry_n_items(Object *o) {
803 if (o->object.type != OBJECT_ENTRY)
806 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
809 uint64_t journal_file_entry_array_n_items(Object *o) {
812 if (o->object.type != OBJECT_ENTRY_ARRAY)
815 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
818 uint64_t journal_file_hash_table_n_items(Object *o) {
821 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
822 o->object.type != OBJECT_FIELD_HASH_TABLE)
825 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
828 static int link_entry_into_array(JournalFile *f,
833 uint64_t n = 0, ap = 0, q, i, a, hidx;
842 i = hidx = le64toh(*idx);
845 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
849 n = journal_file_entry_array_n_items(o);
851 o->entry_array.items[i] = htole64(p);
852 *idx = htole64(hidx + 1);
858 a = le64toh(o->entry_array.next_entry_array_offset);
869 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
870 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
876 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
881 o->entry_array.items[i] = htole64(p);
886 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
890 o->entry_array.next_entry_array_offset = htole64(q);
893 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
894 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
896 *idx = htole64(hidx + 1);
901 static int link_entry_into_array_plus_one(JournalFile *f,
920 i = htole64(le64toh(*idx) - 1);
921 r = link_entry_into_array(f, first, &i, p);
926 *idx = htole64(le64toh(*idx) + 1);
930 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
937 p = le64toh(o->entry.items[i].object_offset);
941 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
945 return link_entry_into_array_plus_one(f,
946 &o->data.entry_offset,
947 &o->data.entry_array_offset,
952 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
960 if (o->object.type != OBJECT_ENTRY)
963 __sync_synchronize();
965 /* Link up the entry itself */
966 r = link_entry_into_array(f,
967 &f->header->entry_array_offset,
968 &f->header->n_entries,
973 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
975 if (f->header->head_entry_realtime == 0)
976 f->header->head_entry_realtime = o->entry.realtime;
978 f->header->tail_entry_realtime = o->entry.realtime;
979 f->header->tail_entry_monotonic = o->entry.monotonic;
981 f->tail_entry_monotonic_valid = true;
983 /* Link up the items */
984 n = journal_file_entry_n_items(o);
985 for (i = 0; i < n; i++) {
986 r = journal_file_link_entry_item(f, o, offset, i);
994 static int journal_file_append_entry_internal(
996 const dual_timestamp *ts,
998 const EntryItem items[], unsigned n_items,
1000 Object **ret, uint64_t *offset) {
1007 assert(items || n_items == 0);
1010 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1012 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1016 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1017 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1018 o->entry.realtime = htole64(ts->realtime);
1019 o->entry.monotonic = htole64(ts->monotonic);
1020 o->entry.xor_hash = htole64(xor_hash);
1021 o->entry.boot_id = f->header->boot_id;
1024 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1029 r = journal_file_link_entry(f, o, np);
1042 void journal_file_post_change(JournalFile *f) {
1045 /* inotify() does not receive IN_MODIFY events from file
1046 * accesses done via mmap(). After each access we hence
1047 * trigger IN_MODIFY by truncating the journal file to its
1048 * current size which triggers IN_MODIFY. */
1050 __sync_synchronize();
1052 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1053 log_error("Failed to truncate file to its own size: %m");
1056 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1060 uint64_t xor_hash = 0;
1061 struct dual_timestamp _ts;
1064 assert(iovec || n_iovec == 0);
1070 dual_timestamp_get(&_ts);
1074 if (f->tail_entry_monotonic_valid &&
1075 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1079 r = journal_file_maybe_append_tag(f, ts->realtime);
1084 /* alloca() can't take 0, hence let's allocate at least one */
1085 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1087 for (i = 0; i < n_iovec; i++) {
1091 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1095 xor_hash ^= le64toh(o->data.hash);
1096 items[i].object_offset = htole64(p);
1097 items[i].hash = o->data.hash;
1100 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1102 journal_file_post_change(f);
1107 static int generic_array_get(JournalFile *f,
1110 Object **ret, uint64_t *offset) {
1122 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1126 n = journal_file_entry_array_n_items(o);
1128 p = le64toh(o->entry_array.items[i]);
1133 a = le64toh(o->entry_array.next_entry_array_offset);
1136 if (a <= 0 || p <= 0)
1139 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1152 static int generic_array_get_plus_one(JournalFile *f,
1156 Object **ret, uint64_t *offset) {
1165 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1178 return generic_array_get(f, first, i-1, ret, offset);
1187 static int generic_array_bisect(JournalFile *f,
1191 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1192 direction_t direction,
1197 uint64_t a, p, t = 0, i = 0, last_p = 0;
1198 bool subtract_one = false;
1199 Object *o, *array = NULL;
1203 assert(test_object);
1207 uint64_t left, right, k, lp;
1209 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1213 k = journal_file_entry_array_n_items(array);
1219 lp = p = le64toh(array->entry_array.items[i]);
1223 r = test_object(f, p, needle);
1227 if (r == TEST_FOUND)
1228 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1230 if (r == TEST_RIGHT) {
1234 if (left == right) {
1235 if (direction == DIRECTION_UP)
1236 subtract_one = true;
1242 assert(left < right);
1244 i = (left + right) / 2;
1245 p = le64toh(array->entry_array.items[i]);
1249 r = test_object(f, p, needle);
1253 if (r == TEST_FOUND)
1254 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1256 if (r == TEST_RIGHT)
1264 if (direction == DIRECTION_UP) {
1266 subtract_one = true;
1277 a = le64toh(array->entry_array.next_entry_array_offset);
1283 if (subtract_one && t == 0 && i == 0)
1286 if (subtract_one && i == 0)
1288 else if (subtract_one)
1289 p = le64toh(array->entry_array.items[i-1]);
1291 p = le64toh(array->entry_array.items[i]);
1293 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1304 *idx = t + i + (subtract_one ? -1 : 0);
1309 static int generic_array_bisect_plus_one(JournalFile *f,
1314 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1315 direction_t direction,
1321 bool step_back = false;
1325 assert(test_object);
1330 /* This bisects the array in object 'first', but first checks
1332 r = test_object(f, extra, needle);
1336 if (r == TEST_FOUND)
1337 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1339 /* if we are looking with DIRECTION_UP then we need to first
1340 see if in the actual array there is a matching entry, and
1341 return the last one of that. But if there isn't any we need
1342 to return this one. Hence remember this, and return it
1345 step_back = direction == DIRECTION_UP;
1347 if (r == TEST_RIGHT) {
1348 if (direction == DIRECTION_DOWN)
1354 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1356 if (r == 0 && step_back)
1365 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1381 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1387 else if (p < needle)
1393 int journal_file_move_to_entry_by_offset(
1396 direction_t direction,
1400 return generic_array_bisect(f,
1401 le64toh(f->header->entry_array_offset),
1402 le64toh(f->header->n_entries),
1410 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1417 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1421 if (le64toh(o->entry.seqnum) == needle)
1423 else if (le64toh(o->entry.seqnum) < needle)
1429 int journal_file_move_to_entry_by_seqnum(
1432 direction_t direction,
1436 return generic_array_bisect(f,
1437 le64toh(f->header->entry_array_offset),
1438 le64toh(f->header->n_entries),
1445 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1452 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1456 if (le64toh(o->entry.realtime) == needle)
1458 else if (le64toh(o->entry.realtime) < needle)
1464 int journal_file_move_to_entry_by_realtime(
1467 direction_t direction,
1471 return generic_array_bisect(f,
1472 le64toh(f->header->entry_array_offset),
1473 le64toh(f->header->n_entries),
1475 test_object_realtime,
1480 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1487 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1491 if (le64toh(o->entry.monotonic) == needle)
1493 else if (le64toh(o->entry.monotonic) < needle)
1499 int journal_file_move_to_entry_by_monotonic(
1503 direction_t direction,
1507 char t[9+32+1] = "_BOOT_ID=";
1513 sd_id128_to_string(boot_id, t + 9);
1514 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1520 return generic_array_bisect_plus_one(f,
1521 le64toh(o->data.entry_offset),
1522 le64toh(o->data.entry_array_offset),
1523 le64toh(o->data.n_entries),
1525 test_object_monotonic,
1530 int journal_file_next_entry(
1532 Object *o, uint64_t p,
1533 direction_t direction,
1534 Object **ret, uint64_t *offset) {
1540 assert(p > 0 || !o);
1542 n = le64toh(f->header->n_entries);
1547 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1549 if (o->object.type != OBJECT_ENTRY)
1552 r = generic_array_bisect(f,
1553 le64toh(f->header->entry_array_offset),
1554 le64toh(f->header->n_entries),
1563 if (direction == DIRECTION_DOWN) {
1576 /* And jump to it */
1577 return generic_array_get(f,
1578 le64toh(f->header->entry_array_offset),
1583 int journal_file_skip_entry(
1585 Object *o, uint64_t p,
1587 Object **ret, uint64_t *offset) {
1596 if (o->object.type != OBJECT_ENTRY)
1599 r = generic_array_bisect(f,
1600 le64toh(f->header->entry_array_offset),
1601 le64toh(f->header->n_entries),
1610 /* Calculate new index */
1612 if ((uint64_t) -skip >= i)
1615 i = i - (uint64_t) -skip;
1617 i += (uint64_t) skip;
1619 n = le64toh(f->header->n_entries);
1626 return generic_array_get(f,
1627 le64toh(f->header->entry_array_offset),
1632 int journal_file_next_entry_for_data(
1634 Object *o, uint64_t p,
1635 uint64_t data_offset,
1636 direction_t direction,
1637 Object **ret, uint64_t *offset) {
1644 assert(p > 0 || !o);
1646 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1650 n = le64toh(d->data.n_entries);
1655 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1657 if (o->object.type != OBJECT_ENTRY)
1660 r = generic_array_bisect_plus_one(f,
1661 le64toh(d->data.entry_offset),
1662 le64toh(d->data.entry_array_offset),
1663 le64toh(d->data.n_entries),
1673 if (direction == DIRECTION_DOWN) {
1687 return generic_array_get_plus_one(f,
1688 le64toh(d->data.entry_offset),
1689 le64toh(d->data.entry_array_offset),
1694 int journal_file_move_to_entry_by_offset_for_data(
1696 uint64_t data_offset,
1698 direction_t direction,
1699 Object **ret, uint64_t *offset) {
1706 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1710 return generic_array_bisect_plus_one(f,
1711 le64toh(d->data.entry_offset),
1712 le64toh(d->data.entry_array_offset),
1713 le64toh(d->data.n_entries),
1720 int journal_file_move_to_entry_by_monotonic_for_data(
1722 uint64_t data_offset,
1725 direction_t direction,
1726 Object **ret, uint64_t *offset) {
1728 char t[9+32+1] = "_BOOT_ID=";
1735 /* First, seek by time */
1736 sd_id128_to_string(boot_id, t + 9);
1737 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1743 r = generic_array_bisect_plus_one(f,
1744 le64toh(o->data.entry_offset),
1745 le64toh(o->data.entry_array_offset),
1746 le64toh(o->data.n_entries),
1748 test_object_monotonic,
1754 /* And now, continue seeking until we find an entry that
1755 * exists in both bisection arrays */
1761 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1765 r = generic_array_bisect_plus_one(f,
1766 le64toh(d->data.entry_offset),
1767 le64toh(d->data.entry_array_offset),
1768 le64toh(d->data.n_entries),
1776 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1780 r = generic_array_bisect_plus_one(f,
1781 le64toh(o->data.entry_offset),
1782 le64toh(o->data.entry_array_offset),
1783 le64toh(o->data.n_entries),
1807 int journal_file_move_to_entry_by_seqnum_for_data(
1809 uint64_t data_offset,
1811 direction_t direction,
1812 Object **ret, uint64_t *offset) {
1819 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1823 return generic_array_bisect_plus_one(f,
1824 le64toh(d->data.entry_offset),
1825 le64toh(d->data.entry_array_offset),
1826 le64toh(d->data.n_entries),
1833 int journal_file_move_to_entry_by_realtime_for_data(
1835 uint64_t data_offset,
1837 direction_t direction,
1838 Object **ret, uint64_t *offset) {
1845 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1849 return generic_array_bisect_plus_one(f,
1850 le64toh(d->data.entry_offset),
1851 le64toh(d->data.entry_array_offset),
1852 le64toh(d->data.n_entries),
1854 test_object_realtime,
1859 void journal_file_dump(JournalFile *f) {
1866 journal_file_print_header(f);
1868 p = le64toh(f->header->header_size);
1870 r = journal_file_move_to_object(f, -1, p, &o);
1874 switch (o->object.type) {
1877 printf("Type: OBJECT_UNUSED\n");
1881 printf("Type: OBJECT_DATA\n");
1885 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1886 (unsigned long long) le64toh(o->entry.seqnum),
1887 (unsigned long long) le64toh(o->entry.monotonic),
1888 (unsigned long long) le64toh(o->entry.realtime));
1891 case OBJECT_FIELD_HASH_TABLE:
1892 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1895 case OBJECT_DATA_HASH_TABLE:
1896 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1899 case OBJECT_ENTRY_ARRAY:
1900 printf("Type: OBJECT_ENTRY_ARRAY\n");
1904 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1905 (unsigned long long) le64toh(o->tag.seqnum),
1906 (unsigned long long) le64toh(o->tag.epoch));
1910 if (o->object.flags & OBJECT_COMPRESSED)
1911 printf("Flags: COMPRESSED\n");
1913 if (p == le64toh(f->header->tail_object_offset))
1916 p = p + ALIGN64(le64toh(o->object.size));
1921 log_error("File corrupt");
1924 void journal_file_print_header(JournalFile *f) {
1925 char a[33], b[33], c[33];
1926 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1928 char bytes[FORMAT_BYTES_MAX];
1932 printf("File Path: %s\n"
1936 "Sequential Number ID: %s\n"
1938 "Compatible Flags:%s%s\n"
1939 "Incompatible Flags:%s%s\n"
1940 "Header size: %llu\n"
1941 "Arena size: %llu\n"
1942 "Data Hash Table Size: %llu\n"
1943 "Field Hash Table Size: %llu\n"
1944 "Rotate Suggested: %s\n"
1945 "Head Sequential Number: %llu\n"
1946 "Tail Sequential Number: %llu\n"
1947 "Head Realtime Timestamp: %s\n"
1948 "Tail Realtime Timestamp: %s\n"
1950 "Entry Objects: %llu\n",
1952 sd_id128_to_string(f->header->file_id, a),
1953 sd_id128_to_string(f->header->machine_id, b),
1954 sd_id128_to_string(f->header->boot_id, c),
1955 sd_id128_to_string(f->header->seqnum_id, c),
1956 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1957 f->header->state == STATE_ONLINE ? "ONLINE" :
1958 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1959 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1960 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1961 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1962 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1963 (unsigned long long) le64toh(f->header->header_size),
1964 (unsigned long long) le64toh(f->header->arena_size),
1965 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1966 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1967 yes_no(journal_file_rotate_suggested(f)),
1968 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1969 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1970 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1971 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1972 (unsigned long long) le64toh(f->header->n_objects),
1973 (unsigned long long) le64toh(f->header->n_entries));
1975 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1976 printf("Data Objects: %llu\n"
1977 "Data Hash Table Fill: %.1f%%\n",
1978 (unsigned long long) le64toh(f->header->n_data),
1979 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1981 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1982 printf("Field Objects: %llu\n"
1983 "Field Hash Table Fill: %.1f%%\n",
1984 (unsigned long long) le64toh(f->header->n_fields),
1985 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1987 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1988 printf("Tag Objects: %llu\n",
1989 (unsigned long long) le64toh(f->header->n_tags));
1990 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1991 printf("Entry Array Objects: %llu\n",
1992 (unsigned long long) le64toh(f->header->n_entry_arrays));
1994 if (fstat(f->fd, &st) >= 0)
1995 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1998 int journal_file_open(
2004 JournalMetrics *metrics,
2005 MMapCache *mmap_cache,
2006 JournalFile *template,
2007 JournalFile **ret) {
2011 bool newly_created = false;
2016 if ((flags & O_ACCMODE) != O_RDONLY &&
2017 (flags & O_ACCMODE) != O_RDWR)
2020 if (!endswith(fname, ".journal") &&
2021 !endswith(fname, ".journal~"))
2024 f = new0(JournalFile, 1);
2032 f->prot = prot_from_flags(flags);
2033 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2035 f->compress = compress;
2042 f->mmap = mmap_cache_ref(mmap_cache);
2044 f->mmap = mmap_cache_new();
2051 f->path = strdup(fname);
2057 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2063 if (fstat(f->fd, &f->last_stat) < 0) {
2068 if (f->last_stat.st_size == 0 && f->writable) {
2069 newly_created = true;
2072 /* Try to load the FSPRG state, and if we can't, then
2073 * just don't do sealing */
2075 r = journal_file_fss_load(f);
2081 r = journal_file_init_header(f, template);
2085 if (fstat(f->fd, &f->last_stat) < 0) {
2091 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2096 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2097 if (f->header == MAP_FAILED) {
2103 if (!newly_created) {
2104 r = journal_file_verify_header(f);
2110 if (!newly_created && f->writable) {
2111 r = journal_file_fss_load(f);
2119 journal_default_metrics(metrics, f->fd);
2120 f->metrics = *metrics;
2121 } else if (template)
2122 f->metrics = template->metrics;
2124 r = journal_file_refresh_header(f);
2130 r = journal_file_hmac_setup(f);
2135 if (newly_created) {
2136 r = journal_file_setup_field_hash_table(f);
2140 r = journal_file_setup_data_hash_table(f);
2145 r = journal_file_append_first_tag(f);
2151 r = journal_file_map_field_hash_table(f);
2155 r = journal_file_map_data_hash_table(f);
2163 journal_file_close(f);
2168 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2171 JournalFile *old_file, *new_file = NULL;
2179 if (!old_file->writable)
2182 if (!endswith(old_file->path, ".journal"))
2185 l = strlen(old_file->path);
2187 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2191 memcpy(p, old_file->path, l - 8);
2193 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2194 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2195 "-%016llx-%016llx.journal",
2196 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2197 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2199 r = rename(old_file->path, p);
2205 old_file->header->state = STATE_ARCHIVED;
2207 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2208 journal_file_close(old_file);
2214 int journal_file_open_reliably(
2220 JournalMetrics *metrics,
2221 MMapCache *mmap_cache,
2222 JournalFile *template,
2223 JournalFile **ret) {
2229 r = journal_file_open(fname, flags, mode, compress, seal,
2230 metrics, mmap_cache, template, ret);
2231 if (r != -EBADMSG && /* corrupted */
2232 r != -ENODATA && /* truncated */
2233 r != -EHOSTDOWN && /* other machine */
2234 r != -EPROTONOSUPPORT && /* incompatible feature */
2235 r != -EBUSY && /* unclean shutdown */
2236 r != -ESHUTDOWN /* already archived */)
2239 if ((flags & O_ACCMODE) == O_RDONLY)
2242 if (!(flags & O_CREAT))
2245 if (!endswith(fname, ".journal"))
2248 /* The file is corrupted. Rotate it away and try it again (but only once) */
2251 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2253 (unsigned long long) now(CLOCK_REALTIME),
2257 r = rename(fname, p);
2262 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2264 return journal_file_open(fname, flags, mode, compress, seal,
2265 metrics, mmap_cache, template, ret);
2269 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2271 uint64_t q, xor_hash = 0;
2284 ts.monotonic = le64toh(o->entry.monotonic);
2285 ts.realtime = le64toh(o->entry.realtime);
2287 if (to->tail_entry_monotonic_valid &&
2288 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2291 n = journal_file_entry_n_items(o);
2292 items = alloca(sizeof(EntryItem) * n);
2294 for (i = 0; i < n; i++) {
2301 q = le64toh(o->entry.items[i].object_offset);
2302 le_hash = o->entry.items[i].hash;
2304 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2308 if (le_hash != o->data.hash)
2311 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2314 /* We hit the limit on 32bit machines */
2315 if ((uint64_t) t != l)
2318 if (o->object.flags & OBJECT_COMPRESSED) {
2322 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2325 data = from->compress_buffer;
2328 return -EPROTONOSUPPORT;
2331 data = o->data.payload;
2333 r = journal_file_append_data(to, data, l, &u, &h);
2337 xor_hash ^= le64toh(u->data.hash);
2338 items[i].object_offset = htole64(h);
2339 items[i].hash = u->data.hash;
2341 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2346 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2349 void journal_default_metrics(JournalMetrics *m, int fd) {
2350 uint64_t fs_size = 0;
2352 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2357 if (fstatvfs(fd, &ss) >= 0)
2358 fs_size = ss.f_frsize * ss.f_blocks;
2360 if (m->max_use == (uint64_t) -1) {
2363 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2365 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2366 m->max_use = DEFAULT_MAX_USE_UPPER;
2368 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2369 m->max_use = DEFAULT_MAX_USE_LOWER;
2371 m->max_use = DEFAULT_MAX_USE_LOWER;
2373 m->max_use = PAGE_ALIGN(m->max_use);
2375 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2376 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2379 if (m->max_size == (uint64_t) -1) {
2380 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2382 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2383 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2385 m->max_size = PAGE_ALIGN(m->max_size);
2387 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2388 m->max_size = JOURNAL_FILE_SIZE_MIN;
2390 if (m->max_size*2 > m->max_use)
2391 m->max_use = m->max_size*2;
2393 if (m->min_size == (uint64_t) -1)
2394 m->min_size = JOURNAL_FILE_SIZE_MIN;
2396 m->min_size = PAGE_ALIGN(m->min_size);
2398 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2399 m->min_size = JOURNAL_FILE_SIZE_MIN;
2401 if (m->min_size > m->max_size)
2402 m->max_size = m->min_size;
2405 if (m->keep_free == (uint64_t) -1) {
2408 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2410 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2411 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2414 m->keep_free = DEFAULT_KEEP_FREE;
2417 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2418 format_bytes(a, sizeof(a), m->max_use),
2419 format_bytes(b, sizeof(b), m->max_size),
2420 format_bytes(c, sizeof(c), m->min_size),
2421 format_bytes(d, sizeof(d), m->keep_free));
2424 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2429 if (f->header->head_entry_realtime == 0)
2432 *from = le64toh(f->header->head_entry_realtime);
2436 if (f->header->tail_entry_realtime == 0)
2439 *to = le64toh(f->header->tail_entry_realtime);
2445 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2446 char t[9+32+1] = "_BOOT_ID=";
2454 sd_id128_to_string(boot_id, t + 9);
2456 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2460 if (le64toh(o->data.n_entries) <= 0)
2464 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2468 *from = le64toh(o->entry.monotonic);
2472 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2476 r = generic_array_get_plus_one(f,
2477 le64toh(o->data.entry_offset),
2478 le64toh(o->data.entry_array_offset),
2479 le64toh(o->data.n_entries)-1,
2484 *to = le64toh(o->entry.monotonic);
2490 bool journal_file_rotate_suggested(JournalFile *f) {
2493 /* If we gained new header fields we gained new features,
2494 * hence suggest a rotation */
2495 if (le64toh(f->header->header_size) < sizeof(Header)) {
2496 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2500 /* Let's check if the hash tables grew over a certain fill
2501 * level (75%, borrowing this value from Java's hash table
2502 * implementation), and if so suggest a rotation. To calculate
2503 * the fill level we need the n_data field, which only exists
2504 * in newer versions. */
2506 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2507 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2508 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2510 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2511 (unsigned long long) le64toh(f->header->n_data),
2512 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2513 (unsigned long long) (f->last_stat.st_size),
2514 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2518 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2519 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2520 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2522 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2523 (unsigned long long) le64toh(f->header->n_fields),
2524 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));