1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 void journal_file_close(JournalFile *f) {
68 /* Write the final tag */
69 if (f->seal && f->writable)
70 journal_file_append_tag(f);
73 /* Sync everything to disk, before we mark the file offline */
74 if (f->mmap && f->fd >= 0)
75 mmap_cache_close_fd(f->mmap, f->fd);
77 if (f->writable && f->fd >= 0)
81 /* Mark the file offline. Don't override the archived state if it already is set */
82 if (f->writable && f->header->state == STATE_ONLINE)
83 f->header->state = STATE_OFFLINE;
85 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
89 close_nointr_nofail(f->fd);
94 mmap_cache_unref(f->mmap);
97 free(f->compress_buffer);
102 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103 else if (f->fsprg_state)
104 free(f->fsprg_state);
109 gcry_md_close(f->hmac);
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
123 memcpy(h.signature, HEADER_SIGNATURE, 8);
124 h.header_size = htole64(ALIGN64(sizeof(h)));
126 h.incompatible_flags =
127 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
130 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
132 r = sd_id128_randomize(&h.file_id);
137 h.seqnum_id = template->header->seqnum_id;
138 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
140 h.seqnum_id = h.file_id;
142 k = pwrite(f->fd, &h, sizeof(h), 0);
152 static int journal_file_refresh_header(JournalFile *f) {
158 r = sd_id128_get_machine(&f->header->machine_id);
162 r = sd_id128_get_boot(&boot_id);
166 if (sd_id128_equal(boot_id, f->header->boot_id))
167 f->tail_entry_monotonic_valid = true;
169 f->header->boot_id = boot_id;
171 f->header->state = STATE_ONLINE;
173 /* Sync the online state to disk */
174 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
180 static int journal_file_verify_header(JournalFile *f) {
183 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
186 /* In both read and write mode we refuse to open files with
187 * incompatible flags we don't know */
189 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190 return -EPROTONOSUPPORT;
192 if (f->header->incompatible_flags != 0)
193 return -EPROTONOSUPPORT;
196 /* When open for writing we refuse to open files with
197 * compatible flags, too */
200 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201 return -EPROTONOSUPPORT;
203 if (f->header->compatible_flags != 0)
204 return -EPROTONOSUPPORT;
208 if (f->header->state >= _STATE_MAX)
211 /* The first addition was n_data, so check that we are at least this large */
212 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
215 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
218 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
221 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
224 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226 !VALID64(le64toh(f->header->tail_object_offset)) ||
227 !VALID64(le64toh(f->header->entry_array_offset)))
230 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
238 sd_id128_t machine_id;
241 r = sd_id128_get_machine(&machine_id);
245 if (!sd_id128_equal(machine_id, f->header->machine_id))
248 state = f->header->state;
250 if (state == STATE_ONLINE) {
251 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
253 } else if (state == STATE_ARCHIVED)
255 else if (state != STATE_OFFLINE) {
256 log_debug("Journal file %s has unknown state %u.", f->path, state);
261 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
264 f->seal = JOURNAL_HEADER_SEALED(f->header);
269 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
270 uint64_t old_size, new_size;
275 /* We assume that this file is not sparse, and we know that
276 * for sure, since we always call posix_fallocate()
280 le64toh(f->header->header_size) +
281 le64toh(f->header->arena_size);
283 new_size = PAGE_ALIGN(offset + size);
284 if (new_size < le64toh(f->header->header_size))
285 new_size = le64toh(f->header->header_size);
287 if (new_size <= old_size)
290 if (f->metrics.max_size > 0 &&
291 new_size > f->metrics.max_size)
294 if (new_size > f->metrics.min_size &&
295 f->metrics.keep_free > 0) {
298 if (fstatvfs(f->fd, &svfs) >= 0) {
301 available = svfs.f_bfree * svfs.f_bsize;
303 if (available >= f->metrics.keep_free)
304 available -= f->metrics.keep_free;
308 if (new_size - old_size > available)
313 /* Note that the glibc fallocate() fallback is very
314 inefficient, hence we try to minimize the allocation area
316 r = posix_fallocate(f->fd, old_size, new_size - old_size);
320 if (fstat(f->fd, &f->last_stat) < 0)
323 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
328 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
335 /* Avoid SIGBUS on invalid accesses */
336 if (offset + size > (uint64_t) f->last_stat.st_size) {
337 /* Hmm, out of range? Let's refresh the fstat() data
338 * first, before we trust that check. */
340 if (fstat(f->fd, &f->last_stat) < 0 ||
341 offset + size > (uint64_t) f->last_stat.st_size)
342 return -EADDRNOTAVAIL;
345 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
348 static uint64_t minimum_header_size(Object *o) {
350 static uint64_t table[] = {
351 [OBJECT_DATA] = sizeof(DataObject),
352 [OBJECT_FIELD] = sizeof(FieldObject),
353 [OBJECT_ENTRY] = sizeof(EntryObject),
354 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357 [OBJECT_TAG] = sizeof(TagObject),
360 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361 return sizeof(ObjectHeader);
363 return table[o->object.type];
366 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
376 /* Objects may only be located at multiple of 64 bit */
377 if (!VALID64(offset))
380 /* One context for each type, plus one catch-all for the rest */
381 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
383 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
388 s = le64toh(o->object.size);
390 if (s < sizeof(ObjectHeader))
393 if (o->object.type <= OBJECT_UNUSED)
396 if (s < minimum_header_size(o))
399 if (type >= 0 && o->object.type != type)
402 if (s > sizeof(ObjectHeader)) {
403 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
414 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
419 r = le64toh(f->header->tail_entry_seqnum) + 1;
422 /* If an external seqnum counter was passed, we update
423 * both the local and the external one, and set it to
424 * the maximum of both */
432 f->header->tail_entry_seqnum = htole64(r);
434 if (f->header->head_entry_seqnum == 0)
435 f->header->head_entry_seqnum = htole64(r);
440 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
447 assert(type > 0 && type < _OBJECT_TYPE_MAX);
448 assert(size >= sizeof(ObjectHeader));
452 p = le64toh(f->header->tail_object_offset);
454 p = le64toh(f->header->header_size);
456 r = journal_file_move_to_object(f, -1, p, &tail);
460 p += ALIGN64(le64toh(tail->object.size));
463 r = journal_file_allocate(f, p, size);
467 r = journal_file_move_to(f, type, false, p, size, &t);
474 o->object.type = type;
475 o->object.size = htole64(size);
477 f->header->tail_object_offset = htole64(p);
478 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
486 static int journal_file_setup_data_hash_table(JournalFile *f) {
493 /* We estimate that we need 1 hash table entry per 768 of
494 journal file and we want to make sure we never get beyond
495 75% fill level. Calculate the hash table size for the
496 maximum file size based on these metrics. */
498 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
499 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500 s = DEFAULT_DATA_HASH_TABLE_SIZE;
502 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
504 r = journal_file_append_object(f,
505 OBJECT_DATA_HASH_TABLE,
506 offsetof(Object, hash_table.items) + s,
511 memset(o->hash_table.items, 0, s);
513 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514 f->header->data_hash_table_size = htole64(s);
519 static int journal_file_setup_field_hash_table(JournalFile *f) {
526 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527 r = journal_file_append_object(f,
528 OBJECT_FIELD_HASH_TABLE,
529 offsetof(Object, hash_table.items) + s,
534 memset(o->hash_table.items, 0, s);
536 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537 f->header->field_hash_table_size = htole64(s);
542 static int journal_file_map_data_hash_table(JournalFile *f) {
549 p = le64toh(f->header->data_hash_table_offset);
550 s = le64toh(f->header->data_hash_table_size);
552 r = journal_file_move_to(f,
553 OBJECT_DATA_HASH_TABLE,
560 f->data_hash_table = t;
564 static int journal_file_map_field_hash_table(JournalFile *f) {
571 p = le64toh(f->header->field_hash_table_offset);
572 s = le64toh(f->header->field_hash_table_size);
574 r = journal_file_move_to(f,
575 OBJECT_FIELD_HASH_TABLE,
582 f->field_hash_table = t;
586 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
593 assert(o->object.type == OBJECT_DATA);
595 /* This might alter the window we are looking at */
597 o->data.next_hash_offset = o->data.next_field_offset = 0;
598 o->data.entry_offset = o->data.entry_array_offset = 0;
599 o->data.n_entries = 0;
601 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
602 p = le64toh(f->data_hash_table[h].tail_hash_offset);
604 /* Only entry in the hash table is easy */
605 f->data_hash_table[h].head_hash_offset = htole64(offset);
607 /* Move back to the previous data object, to patch in
610 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
614 o->data.next_hash_offset = htole64(offset);
617 f->data_hash_table[h].tail_hash_offset = htole64(offset);
619 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
620 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
625 int journal_file_find_data_object_with_hash(
627 const void *data, uint64_t size, uint64_t hash,
628 Object **ret, uint64_t *offset) {
630 uint64_t p, osize, h;
634 assert(data || size == 0);
636 osize = offsetof(Object, data.payload) + size;
638 if (f->header->data_hash_table_size == 0)
641 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
642 p = le64toh(f->data_hash_table[h].head_hash_offset);
647 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
651 if (le64toh(o->data.hash) != hash)
654 if (o->object.flags & OBJECT_COMPRESSED) {
658 l = le64toh(o->object.size);
659 if (l <= offsetof(Object, data.payload))
662 l -= offsetof(Object, data.payload);
664 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
668 memcmp(f->compress_buffer, data, size) == 0) {
679 return -EPROTONOSUPPORT;
682 } else if (le64toh(o->object.size) == osize &&
683 memcmp(o->data.payload, data, size) == 0) {
695 p = le64toh(o->data.next_hash_offset);
701 int journal_file_find_data_object(
703 const void *data, uint64_t size,
704 Object **ret, uint64_t *offset) {
709 assert(data || size == 0);
711 hash = hash64(data, size);
713 return journal_file_find_data_object_with_hash(f,
718 static int journal_file_append_data(
720 const void *data, uint64_t size,
721 Object **ret, uint64_t *offset) {
727 bool compressed = false;
730 assert(data || size == 0);
732 hash = hash64(data, size);
734 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
748 osize = offsetof(Object, data.payload) + size;
749 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
753 o->data.hash = htole64(hash);
757 size >= COMPRESSION_SIZE_THRESHOLD) {
760 compressed = compress_blob(data, size, o->data.payload, &rsize);
763 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
764 o->object.flags |= OBJECT_COMPRESSED;
766 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
771 if (!compressed && size > 0)
772 memcpy(o->data.payload, data, size);
774 r = journal_file_link_data(f, o, p, hash);
779 r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
784 /* The linking might have altered the window, so let's
785 * refresh our pointer */
786 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
799 uint64_t journal_file_entry_n_items(Object *o) {
801 assert(o->object.type == OBJECT_ENTRY);
803 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
806 uint64_t journal_file_entry_array_n_items(Object *o) {
808 assert(o->object.type == OBJECT_ENTRY_ARRAY);
810 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
813 uint64_t journal_file_hash_table_n_items(Object *o) {
815 assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
816 o->object.type == OBJECT_FIELD_HASH_TABLE);
818 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
821 static int link_entry_into_array(JournalFile *f,
826 uint64_t n = 0, ap = 0, q, i, a, hidx;
835 i = hidx = le64toh(*idx);
838 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
842 n = journal_file_entry_array_n_items(o);
844 o->entry_array.items[i] = htole64(p);
845 *idx = htole64(hidx + 1);
851 a = le64toh(o->entry_array.next_entry_array_offset);
862 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
863 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
869 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
874 o->entry_array.items[i] = htole64(p);
879 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
883 o->entry_array.next_entry_array_offset = htole64(q);
886 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
887 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
889 *idx = htole64(hidx + 1);
894 static int link_entry_into_array_plus_one(JournalFile *f,
913 i = htole64(le64toh(*idx) - 1);
914 r = link_entry_into_array(f, first, &i, p);
919 *idx = htole64(le64toh(*idx) + 1);
923 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
930 p = le64toh(o->entry.items[i].object_offset);
934 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
938 return link_entry_into_array_plus_one(f,
939 &o->data.entry_offset,
940 &o->data.entry_array_offset,
945 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
952 assert(o->object.type == OBJECT_ENTRY);
954 __sync_synchronize();
956 /* Link up the entry itself */
957 r = link_entry_into_array(f,
958 &f->header->entry_array_offset,
959 &f->header->n_entries,
964 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
966 if (f->header->head_entry_realtime == 0)
967 f->header->head_entry_realtime = o->entry.realtime;
969 f->header->tail_entry_realtime = o->entry.realtime;
970 f->header->tail_entry_monotonic = o->entry.monotonic;
972 f->tail_entry_monotonic_valid = true;
974 /* Link up the items */
975 n = journal_file_entry_n_items(o);
976 for (i = 0; i < n; i++) {
977 r = journal_file_link_entry_item(f, o, offset, i);
985 static int journal_file_append_entry_internal(
987 const dual_timestamp *ts,
989 const EntryItem items[], unsigned n_items,
991 Object **ret, uint64_t *offset) {
998 assert(items || n_items == 0);
1001 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1003 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1007 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1008 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1009 o->entry.realtime = htole64(ts->realtime);
1010 o->entry.monotonic = htole64(ts->monotonic);
1011 o->entry.xor_hash = htole64(xor_hash);
1012 o->entry.boot_id = f->header->boot_id;
1015 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1020 r = journal_file_link_entry(f, o, np);
1033 void journal_file_post_change(JournalFile *f) {
1036 /* inotify() does not receive IN_MODIFY events from file
1037 * accesses done via mmap(). After each access we hence
1038 * trigger IN_MODIFY by truncating the journal file to its
1039 * current size which triggers IN_MODIFY. */
1041 __sync_synchronize();
1043 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1044 log_error("Failed to to truncate file to its own size: %m");
1047 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1051 uint64_t xor_hash = 0;
1052 struct dual_timestamp _ts;
1055 assert(iovec || n_iovec == 0);
1061 dual_timestamp_get(&_ts);
1065 if (f->tail_entry_monotonic_valid &&
1066 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1070 r = journal_file_maybe_append_tag(f, ts->realtime);
1075 /* alloca() can't take 0, hence let's allocate at least one */
1076 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1078 for (i = 0; i < n_iovec; i++) {
1082 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1086 xor_hash ^= le64toh(o->data.hash);
1087 items[i].object_offset = htole64(p);
1088 items[i].hash = o->data.hash;
1091 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1093 journal_file_post_change(f);
1098 static int generic_array_get(JournalFile *f,
1101 Object **ret, uint64_t *offset) {
1113 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1117 n = journal_file_entry_array_n_items(o);
1119 p = le64toh(o->entry_array.items[i]);
1124 a = le64toh(o->entry_array.next_entry_array_offset);
1127 if (a <= 0 || p <= 0)
1130 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1143 static int generic_array_get_plus_one(JournalFile *f,
1147 Object **ret, uint64_t *offset) {
1156 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1169 return generic_array_get(f, first, i-1, ret, offset);
1178 static int generic_array_bisect(JournalFile *f,
1182 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1183 direction_t direction,
1188 uint64_t a, p, t = 0, i = 0, last_p = 0;
1189 bool subtract_one = false;
1190 Object *o, *array = NULL;
1194 assert(test_object);
1198 uint64_t left, right, k, lp;
1200 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1204 k = journal_file_entry_array_n_items(array);
1210 lp = p = le64toh(array->entry_array.items[i]);
1214 r = test_object(f, p, needle);
1218 if (r == TEST_FOUND)
1219 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1221 if (r == TEST_RIGHT) {
1225 if (left == right) {
1226 if (direction == DIRECTION_UP)
1227 subtract_one = true;
1233 assert(left < right);
1235 i = (left + right) / 2;
1236 p = le64toh(array->entry_array.items[i]);
1240 r = test_object(f, p, needle);
1244 if (r == TEST_FOUND)
1245 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1247 if (r == TEST_RIGHT)
1255 if (direction == DIRECTION_UP) {
1257 subtract_one = true;
1268 a = le64toh(array->entry_array.next_entry_array_offset);
1274 if (subtract_one && t == 0 && i == 0)
1277 if (subtract_one && i == 0)
1279 else if (subtract_one)
1280 p = le64toh(array->entry_array.items[i-1]);
1282 p = le64toh(array->entry_array.items[i]);
1284 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1295 *idx = t + i + (subtract_one ? -1 : 0);
1300 static int generic_array_bisect_plus_one(JournalFile *f,
1305 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1306 direction_t direction,
1312 bool step_back = false;
1316 assert(test_object);
1321 /* This bisects the array in object 'first', but first checks
1323 r = test_object(f, extra, needle);
1327 if (r == TEST_FOUND)
1328 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1330 /* if we are looking with DIRECTION_UP then we need to first
1331 see if in the actual array there is a matching entry, and
1332 return the last one of that. But if there isn't any we need
1333 to return this one. Hence remember this, and return it
1336 step_back = direction == DIRECTION_UP;
1338 if (r == TEST_RIGHT) {
1339 if (direction == DIRECTION_DOWN)
1345 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1347 if (r == 0 && step_back)
1356 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1372 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1378 else if (p < needle)
1384 int journal_file_move_to_entry_by_offset(
1387 direction_t direction,
1391 return generic_array_bisect(f,
1392 le64toh(f->header->entry_array_offset),
1393 le64toh(f->header->n_entries),
1401 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1408 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1412 if (le64toh(o->entry.seqnum) == needle)
1414 else if (le64toh(o->entry.seqnum) < needle)
1420 int journal_file_move_to_entry_by_seqnum(
1423 direction_t direction,
1427 return generic_array_bisect(f,
1428 le64toh(f->header->entry_array_offset),
1429 le64toh(f->header->n_entries),
1436 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1443 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1447 if (le64toh(o->entry.realtime) == needle)
1449 else if (le64toh(o->entry.realtime) < needle)
1455 int journal_file_move_to_entry_by_realtime(
1458 direction_t direction,
1462 return generic_array_bisect(f,
1463 le64toh(f->header->entry_array_offset),
1464 le64toh(f->header->n_entries),
1466 test_object_realtime,
1471 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1478 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1482 if (le64toh(o->entry.monotonic) == needle)
1484 else if (le64toh(o->entry.monotonic) < needle)
1490 int journal_file_move_to_entry_by_monotonic(
1494 direction_t direction,
1498 char t[9+32+1] = "_BOOT_ID=";
1504 sd_id128_to_string(boot_id, t + 9);
1505 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1511 return generic_array_bisect_plus_one(f,
1512 le64toh(o->data.entry_offset),
1513 le64toh(o->data.entry_array_offset),
1514 le64toh(o->data.n_entries),
1516 test_object_monotonic,
1521 int journal_file_next_entry(
1523 Object *o, uint64_t p,
1524 direction_t direction,
1525 Object **ret, uint64_t *offset) {
1531 assert(p > 0 || !o);
1533 n = le64toh(f->header->n_entries);
1538 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1540 if (o->object.type != OBJECT_ENTRY)
1543 r = generic_array_bisect(f,
1544 le64toh(f->header->entry_array_offset),
1545 le64toh(f->header->n_entries),
1554 if (direction == DIRECTION_DOWN) {
1567 /* And jump to it */
1568 return generic_array_get(f,
1569 le64toh(f->header->entry_array_offset),
1574 int journal_file_skip_entry(
1576 Object *o, uint64_t p,
1578 Object **ret, uint64_t *offset) {
1587 if (o->object.type != OBJECT_ENTRY)
1590 r = generic_array_bisect(f,
1591 le64toh(f->header->entry_array_offset),
1592 le64toh(f->header->n_entries),
1601 /* Calculate new index */
1603 if ((uint64_t) -skip >= i)
1606 i = i - (uint64_t) -skip;
1608 i += (uint64_t) skip;
1610 n = le64toh(f->header->n_entries);
1617 return generic_array_get(f,
1618 le64toh(f->header->entry_array_offset),
1623 int journal_file_next_entry_for_data(
1625 Object *o, uint64_t p,
1626 uint64_t data_offset,
1627 direction_t direction,
1628 Object **ret, uint64_t *offset) {
1635 assert(p > 0 || !o);
1637 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1641 n = le64toh(d->data.n_entries);
1646 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1648 if (o->object.type != OBJECT_ENTRY)
1651 r = generic_array_bisect_plus_one(f,
1652 le64toh(d->data.entry_offset),
1653 le64toh(d->data.entry_array_offset),
1654 le64toh(d->data.n_entries),
1664 if (direction == DIRECTION_DOWN) {
1678 return generic_array_get_plus_one(f,
1679 le64toh(d->data.entry_offset),
1680 le64toh(d->data.entry_array_offset),
1685 int journal_file_move_to_entry_by_offset_for_data(
1687 uint64_t data_offset,
1689 direction_t direction,
1690 Object **ret, uint64_t *offset) {
1697 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1701 return generic_array_bisect_plus_one(f,
1702 le64toh(d->data.entry_offset),
1703 le64toh(d->data.entry_array_offset),
1704 le64toh(d->data.n_entries),
1711 int journal_file_move_to_entry_by_monotonic_for_data(
1713 uint64_t data_offset,
1716 direction_t direction,
1717 Object **ret, uint64_t *offset) {
1719 char t[9+32+1] = "_BOOT_ID=";
1726 /* First, seek by time */
1727 sd_id128_to_string(boot_id, t + 9);
1728 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1734 r = generic_array_bisect_plus_one(f,
1735 le64toh(o->data.entry_offset),
1736 le64toh(o->data.entry_array_offset),
1737 le64toh(o->data.n_entries),
1739 test_object_monotonic,
1745 /* And now, continue seeking until we find an entry that
1746 * exists in both bisection arrays */
1752 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1756 r = generic_array_bisect_plus_one(f,
1757 le64toh(d->data.entry_offset),
1758 le64toh(d->data.entry_array_offset),
1759 le64toh(d->data.n_entries),
1767 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1771 r = generic_array_bisect_plus_one(f,
1772 le64toh(o->data.entry_offset),
1773 le64toh(o->data.entry_array_offset),
1774 le64toh(o->data.n_entries),
1798 int journal_file_move_to_entry_by_seqnum_for_data(
1800 uint64_t data_offset,
1802 direction_t direction,
1803 Object **ret, uint64_t *offset) {
1810 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1814 return generic_array_bisect_plus_one(f,
1815 le64toh(d->data.entry_offset),
1816 le64toh(d->data.entry_array_offset),
1817 le64toh(d->data.n_entries),
1824 int journal_file_move_to_entry_by_realtime_for_data(
1826 uint64_t data_offset,
1828 direction_t direction,
1829 Object **ret, uint64_t *offset) {
1836 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1840 return generic_array_bisect_plus_one(f,
1841 le64toh(d->data.entry_offset),
1842 le64toh(d->data.entry_array_offset),
1843 le64toh(d->data.n_entries),
1845 test_object_realtime,
1850 void journal_file_dump(JournalFile *f) {
1857 journal_file_print_header(f);
1859 p = le64toh(f->header->header_size);
1861 r = journal_file_move_to_object(f, -1, p, &o);
1865 switch (o->object.type) {
1868 printf("Type: OBJECT_UNUSED\n");
1872 printf("Type: OBJECT_DATA\n");
1876 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1877 (unsigned long long) le64toh(o->entry.seqnum),
1878 (unsigned long long) le64toh(o->entry.monotonic),
1879 (unsigned long long) le64toh(o->entry.realtime));
1882 case OBJECT_FIELD_HASH_TABLE:
1883 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1886 case OBJECT_DATA_HASH_TABLE:
1887 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1890 case OBJECT_ENTRY_ARRAY:
1891 printf("Type: OBJECT_ENTRY_ARRAY\n");
1895 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1896 (unsigned long long) le64toh(o->tag.seqnum),
1897 (unsigned long long) le64toh(o->tag.epoch));
1901 if (o->object.flags & OBJECT_COMPRESSED)
1902 printf("Flags: COMPRESSED\n");
1904 if (p == le64toh(f->header->tail_object_offset))
1907 p = p + ALIGN64(le64toh(o->object.size));
1912 log_error("File corrupt");
1915 void journal_file_print_header(JournalFile *f) {
1916 char a[33], b[33], c[33];
1917 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1919 char bytes[FORMAT_BYTES_MAX];
1923 printf("File Path: %s\n"
1927 "Sequential Number ID: %s\n"
1929 "Compatible Flags:%s%s\n"
1930 "Incompatible Flags:%s%s\n"
1931 "Header size: %llu\n"
1932 "Arena size: %llu\n"
1933 "Data Hash Table Size: %llu\n"
1934 "Field Hash Table Size: %llu\n"
1935 "Rotate Suggested: %s\n"
1936 "Head Sequential Number: %llu\n"
1937 "Tail Sequential Number: %llu\n"
1938 "Head Realtime Timestamp: %s\n"
1939 "Tail Realtime Timestamp: %s\n"
1941 "Entry Objects: %llu\n",
1943 sd_id128_to_string(f->header->file_id, a),
1944 sd_id128_to_string(f->header->machine_id, b),
1945 sd_id128_to_string(f->header->boot_id, c),
1946 sd_id128_to_string(f->header->seqnum_id, c),
1947 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1948 f->header->state == STATE_ONLINE ? "ONLINE" :
1949 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1950 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1951 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1952 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1953 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1954 (unsigned long long) le64toh(f->header->header_size),
1955 (unsigned long long) le64toh(f->header->arena_size),
1956 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1957 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1958 yes_no(journal_file_rotate_suggested(f)),
1959 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1960 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1961 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1962 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1963 (unsigned long long) le64toh(f->header->n_objects),
1964 (unsigned long long) le64toh(f->header->n_entries));
1966 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1967 printf("Data Objects: %llu\n"
1968 "Data Hash Table Fill: %.1f%%\n",
1969 (unsigned long long) le64toh(f->header->n_data),
1970 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1972 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1973 printf("Field Objects: %llu\n"
1974 "Field Hash Table Fill: %.1f%%\n",
1975 (unsigned long long) le64toh(f->header->n_fields),
1976 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1978 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1979 printf("Tag Objects: %llu\n",
1980 (unsigned long long) le64toh(f->header->n_tags));
1981 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1982 printf("Entry Array Objects: %llu\n",
1983 (unsigned long long) le64toh(f->header->n_entry_arrays));
1985 if (fstat(f->fd, &st) >= 0)
1986 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1989 int journal_file_open(
1995 JournalMetrics *metrics,
1996 MMapCache *mmap_cache,
1997 JournalFile *template,
1998 JournalFile **ret) {
2002 bool newly_created = false;
2006 if ((flags & O_ACCMODE) != O_RDONLY &&
2007 (flags & O_ACCMODE) != O_RDWR)
2010 if (!endswith(fname, ".journal") &&
2011 !endswith(fname, ".journal~"))
2014 f = new0(JournalFile, 1);
2022 f->prot = prot_from_flags(flags);
2023 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2024 f->compress = compress;
2028 f->mmap = mmap_cache_ref(mmap_cache);
2030 f->mmap = mmap_cache_new();
2037 f->path = strdup(fname);
2043 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2049 if (fstat(f->fd, &f->last_stat) < 0) {
2054 if (f->last_stat.st_size == 0 && f->writable) {
2055 newly_created = true;
2058 /* Try to load the FSPRG state, and if we can't, then
2059 * just don't do sealing */
2060 r = journal_file_fss_load(f);
2065 r = journal_file_init_header(f, template);
2069 if (fstat(f->fd, &f->last_stat) < 0) {
2075 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2080 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2081 if (f->header == MAP_FAILED) {
2087 if (!newly_created) {
2088 r = journal_file_verify_header(f);
2094 if (!newly_created && f->writable) {
2095 r = journal_file_fss_load(f);
2103 journal_default_metrics(metrics, f->fd);
2104 f->metrics = *metrics;
2105 } else if (template)
2106 f->metrics = template->metrics;
2108 r = journal_file_refresh_header(f);
2114 r = journal_file_hmac_setup(f);
2119 if (newly_created) {
2120 r = journal_file_setup_field_hash_table(f);
2124 r = journal_file_setup_data_hash_table(f);
2129 r = journal_file_append_first_tag(f);
2135 r = journal_file_map_field_hash_table(f);
2139 r = journal_file_map_data_hash_table(f);
2149 journal_file_close(f);
2154 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2157 JournalFile *old_file, *new_file = NULL;
2165 if (!old_file->writable)
2168 if (!endswith(old_file->path, ".journal"))
2171 l = strlen(old_file->path);
2173 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2177 memcpy(p, old_file->path, l - 8);
2179 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2180 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2181 "-%016llx-%016llx.journal",
2182 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2183 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2185 r = rename(old_file->path, p);
2191 old_file->header->state = STATE_ARCHIVED;
2193 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2194 journal_file_close(old_file);
2200 int journal_file_open_reliably(
2206 JournalMetrics *metrics,
2207 MMapCache *mmap_cache,
2208 JournalFile *template,
2209 JournalFile **ret) {
2215 r = journal_file_open(fname, flags, mode, compress, seal,
2216 metrics, mmap_cache, template, ret);
2217 if (r != -EBADMSG && /* corrupted */
2218 r != -ENODATA && /* truncated */
2219 r != -EHOSTDOWN && /* other machine */
2220 r != -EPROTONOSUPPORT && /* incompatible feature */
2221 r != -EBUSY && /* unclean shutdown */
2222 r != -ESHUTDOWN /* already archived */)
2225 if ((flags & O_ACCMODE) == O_RDONLY)
2228 if (!(flags & O_CREAT))
2231 if (!endswith(fname, ".journal"))
2234 /* The file is corrupted. Rotate it away and try it again (but only once) */
2237 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2239 (unsigned long long) now(CLOCK_REALTIME),
2243 r = rename(fname, p);
2248 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2250 return journal_file_open(fname, flags, mode, compress, seal,
2251 metrics, mmap_cache, template, ret);
2255 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2257 uint64_t q, xor_hash = 0;
2270 ts.monotonic = le64toh(o->entry.monotonic);
2271 ts.realtime = le64toh(o->entry.realtime);
2273 if (to->tail_entry_monotonic_valid &&
2274 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2277 n = journal_file_entry_n_items(o);
2278 items = alloca(sizeof(EntryItem) * n);
2280 for (i = 0; i < n; i++) {
2287 q = le64toh(o->entry.items[i].object_offset);
2288 le_hash = o->entry.items[i].hash;
2290 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2294 if (le_hash != o->data.hash)
2297 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2300 /* We hit the limit on 32bit machines */
2301 if ((uint64_t) t != l)
2304 if (o->object.flags & OBJECT_COMPRESSED) {
2308 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2311 data = from->compress_buffer;
2314 return -EPROTONOSUPPORT;
2317 data = o->data.payload;
2319 r = journal_file_append_data(to, data, l, &u, &h);
2323 xor_hash ^= le64toh(u->data.hash);
2324 items[i].object_offset = htole64(h);
2325 items[i].hash = u->data.hash;
2327 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2332 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2335 void journal_default_metrics(JournalMetrics *m, int fd) {
2336 uint64_t fs_size = 0;
2338 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2343 if (fstatvfs(fd, &ss) >= 0)
2344 fs_size = ss.f_frsize * ss.f_blocks;
2346 if (m->max_use == (uint64_t) -1) {
2349 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2351 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2352 m->max_use = DEFAULT_MAX_USE_UPPER;
2354 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2355 m->max_use = DEFAULT_MAX_USE_LOWER;
2357 m->max_use = DEFAULT_MAX_USE_LOWER;
2359 m->max_use = PAGE_ALIGN(m->max_use);
2361 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2362 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2365 if (m->max_size == (uint64_t) -1) {
2366 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2368 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2369 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2371 m->max_size = PAGE_ALIGN(m->max_size);
2373 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2374 m->max_size = JOURNAL_FILE_SIZE_MIN;
2376 if (m->max_size*2 > m->max_use)
2377 m->max_use = m->max_size*2;
2379 if (m->min_size == (uint64_t) -1)
2380 m->min_size = JOURNAL_FILE_SIZE_MIN;
2382 m->min_size = PAGE_ALIGN(m->min_size);
2384 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2385 m->min_size = JOURNAL_FILE_SIZE_MIN;
2387 if (m->min_size > m->max_size)
2388 m->max_size = m->min_size;
2391 if (m->keep_free == (uint64_t) -1) {
2394 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2396 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2397 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2400 m->keep_free = DEFAULT_KEEP_FREE;
2403 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2404 format_bytes(a, sizeof(a), m->max_use),
2405 format_bytes(b, sizeof(b), m->max_size),
2406 format_bytes(c, sizeof(c), m->min_size),
2407 format_bytes(d, sizeof(d), m->keep_free));
2410 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2415 if (f->header->head_entry_realtime == 0)
2418 *from = le64toh(f->header->head_entry_realtime);
2422 if (f->header->tail_entry_realtime == 0)
2425 *to = le64toh(f->header->tail_entry_realtime);
2431 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2432 char t[9+32+1] = "_BOOT_ID=";
2440 sd_id128_to_string(boot_id, t + 9);
2442 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2446 if (le64toh(o->data.n_entries) <= 0)
2450 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2454 *from = le64toh(o->entry.monotonic);
2458 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2462 r = generic_array_get_plus_one(f,
2463 le64toh(o->data.entry_offset),
2464 le64toh(o->data.entry_array_offset),
2465 le64toh(o->data.n_entries)-1,
2470 *to = le64toh(o->entry.monotonic);
2476 bool journal_file_rotate_suggested(JournalFile *f) {
2479 /* If we gained new header fields we gained new features,
2480 * hence suggest a rotation */
2481 if (le64toh(f->header->header_size) < sizeof(Header)) {
2482 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2486 /* Let's check if the hash tables grew over a certain fill
2487 * level (75%, borrowing this value from Java's hash table
2488 * implementation), and if so suggest a rotation. To calculate
2489 * the fill level we need the n_data field, which only exists
2490 * in newer versions. */
2492 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2493 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2494 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2496 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2497 (unsigned long long) le64toh(f->header->n_data),
2498 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2499 (unsigned long long) (f->last_stat.st_size),
2500 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2504 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2505 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2506 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2508 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2509 (unsigned long long) le64toh(f->header->n_fields),
2510 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));