1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
31 #include <attr/xattr.h>
34 #include "journal-def.h"
35 #include "journal-file.h"
36 #include "journal-authenticate.h"
41 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
42 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
44 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
46 /* This is the minimum journal file size */
47 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
49 /* These are the lower and upper bounds if we deduce the max_use value
50 * from the file system size */
51 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
52 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
54 /* This is the upper bound if we deduce max_size from max_use */
55 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
57 /* This is the upper bound if we deduce the keep_free value from the
59 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
61 /* This is the keep_free value when we can't determine the system
63 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
65 /* n_data was the first entry we added after the initial file format design */
66 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
68 void journal_file_close(JournalFile *f) {
72 /* Write the final tag */
73 if (f->seal && f->writable)
74 journal_file_append_tag(f);
77 /* Sync everything to disk, before we mark the file offline */
78 if (f->mmap && f->fd >= 0)
79 mmap_cache_close_fd(f->mmap, f->fd);
81 if (f->writable && f->fd >= 0)
85 /* Mark the file offline. Don't override the archived state if it already is set */
86 if (f->writable && f->header->state == STATE_ONLINE)
87 f->header->state = STATE_OFFLINE;
89 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
93 close_nointr_nofail(f->fd);
98 mmap_cache_unref(f->mmap);
101 free(f->compress_buffer);
106 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
107 else if (f->fsprg_state)
108 free(f->fsprg_state);
113 gcry_md_close(f->hmac);
119 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
127 memcpy(h.signature, HEADER_SIGNATURE, 8);
128 h.header_size = htole64(ALIGN64(sizeof(h)));
130 h.incompatible_flags =
131 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
134 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
136 r = sd_id128_randomize(&h.file_id);
141 h.seqnum_id = template->header->seqnum_id;
142 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
144 h.seqnum_id = h.file_id;
146 k = pwrite(f->fd, &h, sizeof(h), 0);
156 static int journal_file_refresh_header(JournalFile *f) {
162 r = sd_id128_get_machine(&f->header->machine_id);
166 r = sd_id128_get_boot(&boot_id);
170 if (sd_id128_equal(boot_id, f->header->boot_id))
171 f->tail_entry_monotonic_valid = true;
173 f->header->boot_id = boot_id;
175 f->header->state = STATE_ONLINE;
177 /* Sync the online state to disk */
178 msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
184 static int journal_file_verify_header(JournalFile *f) {
187 if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
190 /* In both read and write mode we refuse to open files with
191 * incompatible flags we don't know */
193 if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
194 return -EPROTONOSUPPORT;
196 if (f->header->incompatible_flags != 0)
197 return -EPROTONOSUPPORT;
200 /* When open for writing we refuse to open files with
201 * compatible flags, too */
204 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
205 return -EPROTONOSUPPORT;
207 if (f->header->compatible_flags != 0)
208 return -EPROTONOSUPPORT;
212 if (f->header->state >= _STATE_MAX)
215 /* The first addition was n_data, so check that we are at least this large */
216 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
219 if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
222 if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
225 if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
228 if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
229 !VALID64(le64toh(f->header->field_hash_table_offset)) ||
230 !VALID64(le64toh(f->header->tail_object_offset)) ||
231 !VALID64(le64toh(f->header->entry_array_offset)))
234 if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
235 le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
236 le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
237 le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
242 sd_id128_t machine_id;
245 r = sd_id128_get_machine(&machine_id);
249 if (!sd_id128_equal(machine_id, f->header->machine_id))
252 state = f->header->state;
254 if (state == STATE_ONLINE) {
255 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
257 } else if (state == STATE_ARCHIVED)
259 else if (state != STATE_OFFLINE) {
260 log_debug("Journal file %s has unknown state %u.", f->path, state);
265 f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
267 f->seal = JOURNAL_HEADER_SEALED(f->header);
272 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
273 uint64_t old_size, new_size;
278 /* We assume that this file is not sparse, and we know that
279 * for sure, since we always call posix_fallocate()
283 le64toh(f->header->header_size) +
284 le64toh(f->header->arena_size);
286 new_size = PAGE_ALIGN(offset + size);
287 if (new_size < le64toh(f->header->header_size))
288 new_size = le64toh(f->header->header_size);
290 if (new_size <= old_size)
293 if (f->metrics.max_size > 0 &&
294 new_size > f->metrics.max_size)
297 if (new_size > f->metrics.min_size &&
298 f->metrics.keep_free > 0) {
301 if (fstatvfs(f->fd, &svfs) >= 0) {
304 available = svfs.f_bfree * svfs.f_bsize;
306 if (available >= f->metrics.keep_free)
307 available -= f->metrics.keep_free;
311 if (new_size - old_size > available)
316 /* Note that the glibc fallocate() fallback is very
317 inefficient, hence we try to minimize the allocation area
319 r = posix_fallocate(f->fd, old_size, new_size - old_size);
323 if (fstat(f->fd, &f->last_stat) < 0)
326 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
331 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
338 /* Avoid SIGBUS on invalid accesses */
339 if (offset + size > (uint64_t) f->last_stat.st_size) {
340 /* Hmm, out of range? Let's refresh the fstat() data
341 * first, before we trust that check. */
343 if (fstat(f->fd, &f->last_stat) < 0 ||
344 offset + size > (uint64_t) f->last_stat.st_size)
345 return -EADDRNOTAVAIL;
348 return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
351 static uint64_t minimum_header_size(Object *o) {
353 static uint64_t table[] = {
354 [OBJECT_DATA] = sizeof(DataObject),
355 [OBJECT_FIELD] = sizeof(FieldObject),
356 [OBJECT_ENTRY] = sizeof(EntryObject),
357 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
358 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
359 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
360 [OBJECT_TAG] = sizeof(TagObject),
363 if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
364 return sizeof(ObjectHeader);
366 return table[o->object.type];
369 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
379 /* Objects may only be located at multiple of 64 bit */
380 if (!VALID64(offset))
383 /* One context for each type, plus one catch-all for the rest */
384 context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
386 r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
391 s = le64toh(o->object.size);
393 if (s < sizeof(ObjectHeader))
396 if (o->object.type <= OBJECT_UNUSED)
399 if (s < minimum_header_size(o))
402 if (type >= 0 && o->object.type != type)
405 if (s > sizeof(ObjectHeader)) {
406 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
417 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
422 r = le64toh(f->header->tail_entry_seqnum) + 1;
425 /* If an external seqnum counter was passed, we update
426 * both the local and the external one, and set it to
427 * the maximum of both */
435 f->header->tail_entry_seqnum = htole64(r);
437 if (f->header->head_entry_seqnum == 0)
438 f->header->head_entry_seqnum = htole64(r);
443 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
450 assert(type > 0 && type < _OBJECT_TYPE_MAX);
451 assert(size >= sizeof(ObjectHeader));
455 p = le64toh(f->header->tail_object_offset);
457 p = le64toh(f->header->header_size);
459 r = journal_file_move_to_object(f, -1, p, &tail);
463 p += ALIGN64(le64toh(tail->object.size));
466 r = journal_file_allocate(f, p, size);
470 r = journal_file_move_to(f, type, false, p, size, &t);
477 o->object.type = type;
478 o->object.size = htole64(size);
480 f->header->tail_object_offset = htole64(p);
481 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
489 static int journal_file_setup_data_hash_table(JournalFile *f) {
496 /* We estimate that we need 1 hash table entry per 768 of
497 journal file and we want to make sure we never get beyond
498 75% fill level. Calculate the hash table size for the
499 maximum file size based on these metrics. */
501 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
502 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
503 s = DEFAULT_DATA_HASH_TABLE_SIZE;
505 log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
507 r = journal_file_append_object(f,
508 OBJECT_DATA_HASH_TABLE,
509 offsetof(Object, hash_table.items) + s,
514 memset(o->hash_table.items, 0, s);
516 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
517 f->header->data_hash_table_size = htole64(s);
522 static int journal_file_setup_field_hash_table(JournalFile *f) {
529 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
530 r = journal_file_append_object(f,
531 OBJECT_FIELD_HASH_TABLE,
532 offsetof(Object, hash_table.items) + s,
537 memset(o->hash_table.items, 0, s);
539 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
540 f->header->field_hash_table_size = htole64(s);
545 static int journal_file_map_data_hash_table(JournalFile *f) {
552 p = le64toh(f->header->data_hash_table_offset);
553 s = le64toh(f->header->data_hash_table_size);
555 r = journal_file_move_to(f,
556 OBJECT_DATA_HASH_TABLE,
563 f->data_hash_table = t;
567 static int journal_file_map_field_hash_table(JournalFile *f) {
574 p = le64toh(f->header->field_hash_table_offset);
575 s = le64toh(f->header->field_hash_table_size);
577 r = journal_file_move_to(f,
578 OBJECT_FIELD_HASH_TABLE,
585 f->field_hash_table = t;
589 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
597 if (o->object.type != OBJECT_DATA)
600 /* This might alter the window we are looking at */
602 o->data.next_hash_offset = o->data.next_field_offset = 0;
603 o->data.entry_offset = o->data.entry_array_offset = 0;
604 o->data.n_entries = 0;
606 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
607 p = le64toh(f->data_hash_table[h].tail_hash_offset);
609 /* Only entry in the hash table is easy */
610 f->data_hash_table[h].head_hash_offset = htole64(offset);
612 /* Move back to the previous data object, to patch in
615 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
619 o->data.next_hash_offset = htole64(offset);
622 f->data_hash_table[h].tail_hash_offset = htole64(offset);
624 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
625 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
630 int journal_file_find_data_object_with_hash(
632 const void *data, uint64_t size, uint64_t hash,
633 Object **ret, uint64_t *offset) {
635 uint64_t p, osize, h;
639 assert(data || size == 0);
641 osize = offsetof(Object, data.payload) + size;
643 if (f->header->data_hash_table_size == 0)
646 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
647 p = le64toh(f->data_hash_table[h].head_hash_offset);
652 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
656 if (le64toh(o->data.hash) != hash)
659 if (o->object.flags & OBJECT_COMPRESSED) {
663 l = le64toh(o->object.size);
664 if (l <= offsetof(Object, data.payload))
667 l -= offsetof(Object, data.payload);
669 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
673 memcmp(f->compress_buffer, data, size) == 0) {
684 return -EPROTONOSUPPORT;
687 } else if (le64toh(o->object.size) == osize &&
688 memcmp(o->data.payload, data, size) == 0) {
700 p = le64toh(o->data.next_hash_offset);
706 int journal_file_find_data_object(
708 const void *data, uint64_t size,
709 Object **ret, uint64_t *offset) {
714 assert(data || size == 0);
716 hash = hash64(data, size);
718 return journal_file_find_data_object_with_hash(f,
723 static int journal_file_append_data(
725 const void *data, uint64_t size,
726 Object **ret, uint64_t *offset) {
732 bool compressed = false;
735 assert(data || size == 0);
737 hash = hash64(data, size);
739 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
753 osize = offsetof(Object, data.payload) + size;
754 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
758 o->data.hash = htole64(hash);
762 size >= COMPRESSION_SIZE_THRESHOLD) {
765 compressed = compress_blob(data, size, o->data.payload, &rsize);
768 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
769 o->object.flags |= OBJECT_COMPRESSED;
771 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
776 if (!compressed && size > 0)
777 memcpy(o->data.payload, data, size);
779 r = journal_file_link_data(f, o, p, hash);
783 /* The linking might have altered the window, so let's
784 * refresh our pointer */
785 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
790 r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
804 uint64_t journal_file_entry_n_items(Object *o) {
807 if (o->object.type != OBJECT_ENTRY)
810 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
813 uint64_t journal_file_entry_array_n_items(Object *o) {
816 if (o->object.type != OBJECT_ENTRY_ARRAY)
819 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
822 uint64_t journal_file_hash_table_n_items(Object *o) {
825 if (o->object.type != OBJECT_DATA_HASH_TABLE &&
826 o->object.type != OBJECT_FIELD_HASH_TABLE)
829 return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
832 static int link_entry_into_array(JournalFile *f,
837 uint64_t n = 0, ap = 0, q, i, a, hidx;
846 i = hidx = le64toh(*idx);
849 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
853 n = journal_file_entry_array_n_items(o);
855 o->entry_array.items[i] = htole64(p);
856 *idx = htole64(hidx + 1);
862 a = le64toh(o->entry_array.next_entry_array_offset);
873 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
874 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
880 r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
885 o->entry_array.items[i] = htole64(p);
890 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
894 o->entry_array.next_entry_array_offset = htole64(q);
897 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
898 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
900 *idx = htole64(hidx + 1);
905 static int link_entry_into_array_plus_one(JournalFile *f,
924 i = htole64(le64toh(*idx) - 1);
925 r = link_entry_into_array(f, first, &i, p);
930 *idx = htole64(le64toh(*idx) + 1);
934 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
941 p = le64toh(o->entry.items[i].object_offset);
945 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
949 return link_entry_into_array_plus_one(f,
950 &o->data.entry_offset,
951 &o->data.entry_array_offset,
956 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
964 if (o->object.type != OBJECT_ENTRY)
967 __sync_synchronize();
969 /* Link up the entry itself */
970 r = link_entry_into_array(f,
971 &f->header->entry_array_offset,
972 &f->header->n_entries,
977 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
979 if (f->header->head_entry_realtime == 0)
980 f->header->head_entry_realtime = o->entry.realtime;
982 f->header->tail_entry_realtime = o->entry.realtime;
983 f->header->tail_entry_monotonic = o->entry.monotonic;
985 f->tail_entry_monotonic_valid = true;
987 /* Link up the items */
988 n = journal_file_entry_n_items(o);
989 for (i = 0; i < n; i++) {
990 r = journal_file_link_entry_item(f, o, offset, i);
998 static int journal_file_append_entry_internal(
1000 const dual_timestamp *ts,
1002 const EntryItem items[], unsigned n_items,
1004 Object **ret, uint64_t *offset) {
1011 assert(items || n_items == 0);
1014 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1016 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1020 o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1021 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1022 o->entry.realtime = htole64(ts->realtime);
1023 o->entry.monotonic = htole64(ts->monotonic);
1024 o->entry.xor_hash = htole64(xor_hash);
1025 o->entry.boot_id = f->header->boot_id;
1028 r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1033 r = journal_file_link_entry(f, o, np);
1046 void journal_file_post_change(JournalFile *f) {
1049 /* inotify() does not receive IN_MODIFY events from file
1050 * accesses done via mmap(). After each access we hence
1051 * trigger IN_MODIFY by truncating the journal file to its
1052 * current size which triggers IN_MODIFY. */
1054 __sync_synchronize();
1056 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1057 log_error("Failed to truncate file to its own size: %m");
1060 static int entry_item_cmp(const void *_a, const void *_b) {
1061 const EntryItem *a = _a, *b = _b;
1063 if (le64toh(a->object_offset) < le64toh(b->object_offset))
1065 if (le64toh(a->object_offset) > le64toh(b->object_offset))
1070 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1074 uint64_t xor_hash = 0;
1075 struct dual_timestamp _ts;
1078 assert(iovec || n_iovec == 0);
1084 dual_timestamp_get(&_ts);
1088 if (f->tail_entry_monotonic_valid &&
1089 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1093 r = journal_file_maybe_append_tag(f, ts->realtime);
1098 /* alloca() can't take 0, hence let's allocate at least one */
1099 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1101 for (i = 0; i < n_iovec; i++) {
1105 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1109 xor_hash ^= le64toh(o->data.hash);
1110 items[i].object_offset = htole64(p);
1111 items[i].hash = o->data.hash;
1114 /* Order by the position on disk, in order to improve seek
1115 * times for rotating media. */
1116 qsort(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1118 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1120 journal_file_post_change(f);
1125 static int generic_array_get(JournalFile *f,
1128 Object **ret, uint64_t *offset) {
1140 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1144 n = journal_file_entry_array_n_items(o);
1146 p = le64toh(o->entry_array.items[i]);
1151 a = le64toh(o->entry_array.next_entry_array_offset);
1154 if (a <= 0 || p <= 0)
1157 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1170 static int generic_array_get_plus_one(JournalFile *f,
1174 Object **ret, uint64_t *offset) {
1183 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1196 return generic_array_get(f, first, i-1, ret, offset);
1205 static int generic_array_bisect(JournalFile *f,
1209 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1210 direction_t direction,
1215 uint64_t a, p, t = 0, i = 0, last_p = 0;
1216 bool subtract_one = false;
1217 Object *o, *array = NULL;
1221 assert(test_object);
1225 uint64_t left, right, k, lp;
1227 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1231 k = journal_file_entry_array_n_items(array);
1237 lp = p = le64toh(array->entry_array.items[i]);
1241 r = test_object(f, p, needle);
1245 if (r == TEST_FOUND)
1246 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1248 if (r == TEST_RIGHT) {
1252 if (left == right) {
1253 if (direction == DIRECTION_UP)
1254 subtract_one = true;
1260 assert(left < right);
1262 i = (left + right) / 2;
1263 p = le64toh(array->entry_array.items[i]);
1267 r = test_object(f, p, needle);
1271 if (r == TEST_FOUND)
1272 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1274 if (r == TEST_RIGHT)
1282 if (direction == DIRECTION_UP) {
1284 subtract_one = true;
1295 a = le64toh(array->entry_array.next_entry_array_offset);
1301 if (subtract_one && t == 0 && i == 0)
1304 if (subtract_one && i == 0)
1306 else if (subtract_one)
1307 p = le64toh(array->entry_array.items[i-1]);
1309 p = le64toh(array->entry_array.items[i]);
1311 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1322 *idx = t + i + (subtract_one ? -1 : 0);
1327 static int generic_array_bisect_plus_one(JournalFile *f,
1332 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1333 direction_t direction,
1339 bool step_back = false;
1343 assert(test_object);
1348 /* This bisects the array in object 'first', but first checks
1350 r = test_object(f, extra, needle);
1354 if (r == TEST_FOUND)
1355 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1357 /* if we are looking with DIRECTION_UP then we need to first
1358 see if in the actual array there is a matching entry, and
1359 return the last one of that. But if there isn't any we need
1360 to return this one. Hence remember this, and return it
1363 step_back = direction == DIRECTION_UP;
1365 if (r == TEST_RIGHT) {
1366 if (direction == DIRECTION_DOWN)
1372 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1374 if (r == 0 && step_back)
1383 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1399 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1405 else if (p < needle)
1411 int journal_file_move_to_entry_by_offset(
1414 direction_t direction,
1418 return generic_array_bisect(f,
1419 le64toh(f->header->entry_array_offset),
1420 le64toh(f->header->n_entries),
1428 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1435 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1439 if (le64toh(o->entry.seqnum) == needle)
1441 else if (le64toh(o->entry.seqnum) < needle)
1447 int journal_file_move_to_entry_by_seqnum(
1450 direction_t direction,
1454 return generic_array_bisect(f,
1455 le64toh(f->header->entry_array_offset),
1456 le64toh(f->header->n_entries),
1463 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1470 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1474 if (le64toh(o->entry.realtime) == needle)
1476 else if (le64toh(o->entry.realtime) < needle)
1482 int journal_file_move_to_entry_by_realtime(
1485 direction_t direction,
1489 return generic_array_bisect(f,
1490 le64toh(f->header->entry_array_offset),
1491 le64toh(f->header->n_entries),
1493 test_object_realtime,
1498 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1505 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1509 if (le64toh(o->entry.monotonic) == needle)
1511 else if (le64toh(o->entry.monotonic) < needle)
1517 int journal_file_move_to_entry_by_monotonic(
1521 direction_t direction,
1525 char t[9+32+1] = "_BOOT_ID=";
1531 sd_id128_to_string(boot_id, t + 9);
1532 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1538 return generic_array_bisect_plus_one(f,
1539 le64toh(o->data.entry_offset),
1540 le64toh(o->data.entry_array_offset),
1541 le64toh(o->data.n_entries),
1543 test_object_monotonic,
1548 int journal_file_next_entry(
1550 Object *o, uint64_t p,
1551 direction_t direction,
1552 Object **ret, uint64_t *offset) {
1558 assert(p > 0 || !o);
1560 n = le64toh(f->header->n_entries);
1565 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1567 if (o->object.type != OBJECT_ENTRY)
1570 r = generic_array_bisect(f,
1571 le64toh(f->header->entry_array_offset),
1572 le64toh(f->header->n_entries),
1581 if (direction == DIRECTION_DOWN) {
1594 /* And jump to it */
1595 return generic_array_get(f,
1596 le64toh(f->header->entry_array_offset),
1601 int journal_file_skip_entry(
1603 Object *o, uint64_t p,
1605 Object **ret, uint64_t *offset) {
1614 if (o->object.type != OBJECT_ENTRY)
1617 r = generic_array_bisect(f,
1618 le64toh(f->header->entry_array_offset),
1619 le64toh(f->header->n_entries),
1628 /* Calculate new index */
1630 if ((uint64_t) -skip >= i)
1633 i = i - (uint64_t) -skip;
1635 i += (uint64_t) skip;
1637 n = le64toh(f->header->n_entries);
1644 return generic_array_get(f,
1645 le64toh(f->header->entry_array_offset),
1650 int journal_file_next_entry_for_data(
1652 Object *o, uint64_t p,
1653 uint64_t data_offset,
1654 direction_t direction,
1655 Object **ret, uint64_t *offset) {
1662 assert(p > 0 || !o);
1664 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1668 n = le64toh(d->data.n_entries);
1673 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1675 if (o->object.type != OBJECT_ENTRY)
1678 r = generic_array_bisect_plus_one(f,
1679 le64toh(d->data.entry_offset),
1680 le64toh(d->data.entry_array_offset),
1681 le64toh(d->data.n_entries),
1691 if (direction == DIRECTION_DOWN) {
1705 return generic_array_get_plus_one(f,
1706 le64toh(d->data.entry_offset),
1707 le64toh(d->data.entry_array_offset),
1712 int journal_file_move_to_entry_by_offset_for_data(
1714 uint64_t data_offset,
1716 direction_t direction,
1717 Object **ret, uint64_t *offset) {
1724 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1728 return generic_array_bisect_plus_one(f,
1729 le64toh(d->data.entry_offset),
1730 le64toh(d->data.entry_array_offset),
1731 le64toh(d->data.n_entries),
1738 int journal_file_move_to_entry_by_monotonic_for_data(
1740 uint64_t data_offset,
1743 direction_t direction,
1744 Object **ret, uint64_t *offset) {
1746 char t[9+32+1] = "_BOOT_ID=";
1753 /* First, seek by time */
1754 sd_id128_to_string(boot_id, t + 9);
1755 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1761 r = generic_array_bisect_plus_one(f,
1762 le64toh(o->data.entry_offset),
1763 le64toh(o->data.entry_array_offset),
1764 le64toh(o->data.n_entries),
1766 test_object_monotonic,
1772 /* And now, continue seeking until we find an entry that
1773 * exists in both bisection arrays */
1779 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1783 r = generic_array_bisect_plus_one(f,
1784 le64toh(d->data.entry_offset),
1785 le64toh(d->data.entry_array_offset),
1786 le64toh(d->data.n_entries),
1794 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1798 r = generic_array_bisect_plus_one(f,
1799 le64toh(o->data.entry_offset),
1800 le64toh(o->data.entry_array_offset),
1801 le64toh(o->data.n_entries),
1825 int journal_file_move_to_entry_by_seqnum_for_data(
1827 uint64_t data_offset,
1829 direction_t direction,
1830 Object **ret, uint64_t *offset) {
1837 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1841 return generic_array_bisect_plus_one(f,
1842 le64toh(d->data.entry_offset),
1843 le64toh(d->data.entry_array_offset),
1844 le64toh(d->data.n_entries),
1851 int journal_file_move_to_entry_by_realtime_for_data(
1853 uint64_t data_offset,
1855 direction_t direction,
1856 Object **ret, uint64_t *offset) {
1863 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1867 return generic_array_bisect_plus_one(f,
1868 le64toh(d->data.entry_offset),
1869 le64toh(d->data.entry_array_offset),
1870 le64toh(d->data.n_entries),
1872 test_object_realtime,
1877 void journal_file_dump(JournalFile *f) {
1884 journal_file_print_header(f);
1886 p = le64toh(f->header->header_size);
1888 r = journal_file_move_to_object(f, -1, p, &o);
1892 switch (o->object.type) {
1895 printf("Type: OBJECT_UNUSED\n");
1899 printf("Type: OBJECT_DATA\n");
1903 printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1904 (unsigned long long) le64toh(o->entry.seqnum),
1905 (unsigned long long) le64toh(o->entry.monotonic),
1906 (unsigned long long) le64toh(o->entry.realtime));
1909 case OBJECT_FIELD_HASH_TABLE:
1910 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1913 case OBJECT_DATA_HASH_TABLE:
1914 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1917 case OBJECT_ENTRY_ARRAY:
1918 printf("Type: OBJECT_ENTRY_ARRAY\n");
1922 printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1923 (unsigned long long) le64toh(o->tag.seqnum),
1924 (unsigned long long) le64toh(o->tag.epoch));
1928 if (o->object.flags & OBJECT_COMPRESSED)
1929 printf("Flags: COMPRESSED\n");
1931 if (p == le64toh(f->header->tail_object_offset))
1934 p = p + ALIGN64(le64toh(o->object.size));
1939 log_error("File corrupt");
1942 void journal_file_print_header(JournalFile *f) {
1943 char a[33], b[33], c[33];
1944 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1946 char bytes[FORMAT_BYTES_MAX];
1950 printf("File Path: %s\n"
1954 "Sequential Number ID: %s\n"
1956 "Compatible Flags:%s%s\n"
1957 "Incompatible Flags:%s%s\n"
1958 "Header size: %llu\n"
1959 "Arena size: %llu\n"
1960 "Data Hash Table Size: %llu\n"
1961 "Field Hash Table Size: %llu\n"
1962 "Rotate Suggested: %s\n"
1963 "Head Sequential Number: %llu\n"
1964 "Tail Sequential Number: %llu\n"
1965 "Head Realtime Timestamp: %s\n"
1966 "Tail Realtime Timestamp: %s\n"
1968 "Entry Objects: %llu\n",
1970 sd_id128_to_string(f->header->file_id, a),
1971 sd_id128_to_string(f->header->machine_id, b),
1972 sd_id128_to_string(f->header->boot_id, c),
1973 sd_id128_to_string(f->header->seqnum_id, c),
1974 f->header->state == STATE_OFFLINE ? "OFFLINE" :
1975 f->header->state == STATE_ONLINE ? "ONLINE" :
1976 f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1977 JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1978 (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1979 JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1980 (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1981 (unsigned long long) le64toh(f->header->header_size),
1982 (unsigned long long) le64toh(f->header->arena_size),
1983 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1984 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1985 yes_no(journal_file_rotate_suggested(f, 0)),
1986 (unsigned long long) le64toh(f->header->head_entry_seqnum),
1987 (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1988 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1989 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1990 (unsigned long long) le64toh(f->header->n_objects),
1991 (unsigned long long) le64toh(f->header->n_entries));
1993 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1994 printf("Data Objects: %llu\n"
1995 "Data Hash Table Fill: %.1f%%\n",
1996 (unsigned long long) le64toh(f->header->n_data),
1997 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1999 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2000 printf("Field Objects: %llu\n"
2001 "Field Hash Table Fill: %.1f%%\n",
2002 (unsigned long long) le64toh(f->header->n_fields),
2003 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2005 if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2006 printf("Tag Objects: %llu\n",
2007 (unsigned long long) le64toh(f->header->n_tags));
2008 if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2009 printf("Entry Array Objects: %llu\n",
2010 (unsigned long long) le64toh(f->header->n_entry_arrays));
2012 if (fstat(f->fd, &st) >= 0)
2013 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2016 int journal_file_open(
2022 JournalMetrics *metrics,
2023 MMapCache *mmap_cache,
2024 JournalFile *template,
2025 JournalFile **ret) {
2029 bool newly_created = false;
2034 if ((flags & O_ACCMODE) != O_RDONLY &&
2035 (flags & O_ACCMODE) != O_RDWR)
2038 if (!endswith(fname, ".journal") &&
2039 !endswith(fname, ".journal~"))
2042 f = new0(JournalFile, 1);
2050 f->prot = prot_from_flags(flags);
2051 f->writable = (flags & O_ACCMODE) != O_RDONLY;
2053 f->compress = compress;
2060 f->mmap = mmap_cache_ref(mmap_cache);
2062 f->mmap = mmap_cache_new();
2069 f->path = strdup(fname);
2075 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2081 if (fstat(f->fd, &f->last_stat) < 0) {
2086 if (f->last_stat.st_size == 0 && f->writable) {
2090 /* Let's attach the creation time to the journal file,
2091 * so that the vacuuming code knows the age of this
2092 * file even if the file might end up corrupted one
2093 * day... Ideally we'd just use the creation time many
2094 * file systems maintain for each file, but there is
2095 * currently no usable API to query this, hence let's
2096 * emulate this via extended attributes. If extended
2097 * attributes are not supported we'll just skip this,
2098 * and rely solely on mtime/atime/ctime of the file.*/
2100 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2101 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2105 /* Try to load the FSPRG state, and if we can't, then
2106 * just don't do sealing */
2108 r = journal_file_fss_load(f);
2114 r = journal_file_init_header(f, template);
2118 if (fstat(f->fd, &f->last_stat) < 0) {
2123 newly_created = true;
2126 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2131 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2132 if (f->header == MAP_FAILED) {
2138 if (!newly_created) {
2139 r = journal_file_verify_header(f);
2145 if (!newly_created && f->writable) {
2146 r = journal_file_fss_load(f);
2154 journal_default_metrics(metrics, f->fd);
2155 f->metrics = *metrics;
2156 } else if (template)
2157 f->metrics = template->metrics;
2159 r = journal_file_refresh_header(f);
2165 r = journal_file_hmac_setup(f);
2170 if (newly_created) {
2171 r = journal_file_setup_field_hash_table(f);
2175 r = journal_file_setup_data_hash_table(f);
2180 r = journal_file_append_first_tag(f);
2186 r = journal_file_map_field_hash_table(f);
2190 r = journal_file_map_data_hash_table(f);
2198 journal_file_close(f);
2203 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2206 JournalFile *old_file, *new_file = NULL;
2214 if (!old_file->writable)
2217 if (!endswith(old_file->path, ".journal"))
2220 l = strlen(old_file->path);
2222 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2226 memcpy(p, old_file->path, l - 8);
2228 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2229 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2230 "-%016llx-%016llx.journal",
2231 (unsigned long long) le64toh((*f)->header->head_entry_seqnum),
2232 (unsigned long long) le64toh((*f)->header->head_entry_realtime));
2234 r = rename(old_file->path, p);
2240 old_file->header->state = STATE_ARCHIVED;
2242 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2243 journal_file_close(old_file);
2249 int journal_file_open_reliably(
2255 JournalMetrics *metrics,
2256 MMapCache *mmap_cache,
2257 JournalFile *template,
2258 JournalFile **ret) {
2264 r = journal_file_open(fname, flags, mode, compress, seal,
2265 metrics, mmap_cache, template, ret);
2266 if (r != -EBADMSG && /* corrupted */
2267 r != -ENODATA && /* truncated */
2268 r != -EHOSTDOWN && /* other machine */
2269 r != -EPROTONOSUPPORT && /* incompatible feature */
2270 r != -EBUSY && /* unclean shutdown */
2271 r != -ESHUTDOWN /* already archived */)
2274 if ((flags & O_ACCMODE) == O_RDONLY)
2277 if (!(flags & O_CREAT))
2280 if (!endswith(fname, ".journal"))
2283 /* The file is corrupted. Rotate it away and try it again (but only once) */
2286 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2288 (unsigned long long) now(CLOCK_REALTIME),
2292 r = rename(fname, p);
2297 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2299 return journal_file_open(fname, flags, mode, compress, seal,
2300 metrics, mmap_cache, template, ret);
2304 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2306 uint64_t q, xor_hash = 0;
2319 ts.monotonic = le64toh(o->entry.monotonic);
2320 ts.realtime = le64toh(o->entry.realtime);
2322 if (to->tail_entry_monotonic_valid &&
2323 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2326 n = journal_file_entry_n_items(o);
2327 items = alloca(sizeof(EntryItem) * n);
2329 for (i = 0; i < n; i++) {
2336 q = le64toh(o->entry.items[i].object_offset);
2337 le_hash = o->entry.items[i].hash;
2339 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2343 if (le_hash != o->data.hash)
2346 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2349 /* We hit the limit on 32bit machines */
2350 if ((uint64_t) t != l)
2353 if (o->object.flags & OBJECT_COMPRESSED) {
2357 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2360 data = from->compress_buffer;
2363 return -EPROTONOSUPPORT;
2366 data = o->data.payload;
2368 r = journal_file_append_data(to, data, l, &u, &h);
2372 xor_hash ^= le64toh(u->data.hash);
2373 items[i].object_offset = htole64(h);
2374 items[i].hash = u->data.hash;
2376 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2381 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2384 void journal_default_metrics(JournalMetrics *m, int fd) {
2385 uint64_t fs_size = 0;
2387 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2392 if (fstatvfs(fd, &ss) >= 0)
2393 fs_size = ss.f_frsize * ss.f_blocks;
2395 if (m->max_use == (uint64_t) -1) {
2398 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2400 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2401 m->max_use = DEFAULT_MAX_USE_UPPER;
2403 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2404 m->max_use = DEFAULT_MAX_USE_LOWER;
2406 m->max_use = DEFAULT_MAX_USE_LOWER;
2408 m->max_use = PAGE_ALIGN(m->max_use);
2410 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2411 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2414 if (m->max_size == (uint64_t) -1) {
2415 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2417 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2418 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2420 m->max_size = PAGE_ALIGN(m->max_size);
2422 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2423 m->max_size = JOURNAL_FILE_SIZE_MIN;
2425 if (m->max_size*2 > m->max_use)
2426 m->max_use = m->max_size*2;
2428 if (m->min_size == (uint64_t) -1)
2429 m->min_size = JOURNAL_FILE_SIZE_MIN;
2431 m->min_size = PAGE_ALIGN(m->min_size);
2433 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2434 m->min_size = JOURNAL_FILE_SIZE_MIN;
2436 if (m->min_size > m->max_size)
2437 m->max_size = m->min_size;
2440 if (m->keep_free == (uint64_t) -1) {
2443 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2445 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2446 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2449 m->keep_free = DEFAULT_KEEP_FREE;
2452 log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2453 format_bytes(a, sizeof(a), m->max_use),
2454 format_bytes(b, sizeof(b), m->max_size),
2455 format_bytes(c, sizeof(c), m->min_size),
2456 format_bytes(d, sizeof(d), m->keep_free));
2459 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2464 if (f->header->head_entry_realtime == 0)
2467 *from = le64toh(f->header->head_entry_realtime);
2471 if (f->header->tail_entry_realtime == 0)
2474 *to = le64toh(f->header->tail_entry_realtime);
2480 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2481 char t[9+32+1] = "_BOOT_ID=";
2489 sd_id128_to_string(boot_id, t + 9);
2491 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2495 if (le64toh(o->data.n_entries) <= 0)
2499 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2503 *from = le64toh(o->entry.monotonic);
2507 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2511 r = generic_array_get_plus_one(f,
2512 le64toh(o->data.entry_offset),
2513 le64toh(o->data.entry_array_offset),
2514 le64toh(o->data.n_entries)-1,
2519 *to = le64toh(o->entry.monotonic);
2525 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2528 /* If we gained new header fields we gained new features,
2529 * hence suggest a rotation */
2530 if (le64toh(f->header->header_size) < sizeof(Header)) {
2531 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2535 /* Let's check if the hash tables grew over a certain fill
2536 * level (75%, borrowing this value from Java's hash table
2537 * implementation), and if so suggest a rotation. To calculate
2538 * the fill level we need the n_data field, which only exists
2539 * in newer versions. */
2541 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2542 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2543 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2545 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2546 (unsigned long long) le64toh(f->header->n_data),
2547 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2548 (unsigned long long) (f->last_stat.st_size),
2549 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2553 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2554 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2555 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2557 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2558 (unsigned long long) le64toh(f->header->n_fields),
2559 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2563 if (max_file_usec > 0) {
2566 h = le64toh(f->header->head_entry_realtime);
2567 t = now(CLOCK_REALTIME);
2569 if (h > 0 && t > h + max_file_usec)