1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2011 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
26 #include <sys/statvfs.h>
30 #include "journal-def.h"
31 #include "journal-file.h"
35 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
36 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
38 #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL) /* 64 KiB */
45 /* These are the lower and upper bounds if we deduce the max_use value
46 * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
53 /* This is the upper bound if we deduce the keep_free value from the
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57 /* This is the keep_free value when we can't determine the system
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66 #define JOURNAL_HEADER_CONTAINS(h, field) \
67 (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69 static const char signature[] = { 'L', 'P', 'K', 'S', 'H', 'H', 'R', 'H' };
71 void journal_file_close(JournalFile *f) {
77 /* Mark the file offline. Don't override the archived state if it already is set */
78 if (f->writable && f->header->state == STATE_ONLINE)
79 f->header->state = STATE_OFFLINE;
81 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84 for (t = 0; t < _WINDOW_MAX; t++)
85 if (f->windows[t].ptr)
86 munmap(f->windows[t].ptr, f->windows[t].size);
89 close_nointr_nofail(f->fd);
94 free(f->compress_buffer);
100 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
108 memcpy(h.signature, signature, 8);
109 h.header_size = htole64(ALIGN64(sizeof(h)));
111 r = sd_id128_randomize(&h.file_id);
116 h.seqnum_id = template->header->seqnum_id;
117 h.tail_seqnum = template->header->tail_seqnum;
119 h.seqnum_id = h.file_id;
121 k = pwrite(f->fd, &h, sizeof(h), 0);
131 static int journal_file_refresh_header(JournalFile *f) {
137 r = sd_id128_get_machine(&f->header->machine_id);
141 r = sd_id128_get_boot(&boot_id);
145 if (sd_id128_equal(boot_id, f->header->boot_id))
146 f->tail_entry_monotonic_valid = true;
148 f->header->boot_id = boot_id;
150 f->header->state = STATE_ONLINE;
152 __sync_synchronize();
157 static int journal_file_verify_header(JournalFile *f) {
160 if (memcmp(f->header, signature, 8))
164 if ((le64toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
165 return -EPROTONOSUPPORT;
167 if (f->header->incompatible_flags != 0)
168 return -EPROTONOSUPPORT;
171 /* The first addition was n_data, so check that we are at least this large */
172 if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
175 if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
180 sd_id128_t machine_id;
183 r = sd_id128_get_machine(&machine_id);
187 if (!sd_id128_equal(machine_id, f->header->machine_id))
190 state = f->header->state;
192 if (state == STATE_ONLINE) {
193 log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
195 } else if (state == STATE_ARCHIVED)
197 else if (state != STATE_OFFLINE) {
198 log_debug("Journal file %s has unknown state %u.", f->path, state);
206 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
207 uint64_t old_size, new_size;
212 /* We assume that this file is not sparse, and we know that
213 * for sure, since we always call posix_fallocate()
217 le64toh(f->header->header_size) +
218 le64toh(f->header->arena_size);
220 new_size = PAGE_ALIGN(offset + size);
221 if (new_size < le64toh(f->header->header_size))
222 new_size = le64toh(f->header->header_size);
224 if (new_size <= old_size)
227 if (f->metrics.max_size > 0 &&
228 new_size > f->metrics.max_size)
231 if (new_size > f->metrics.min_size &&
232 f->metrics.keep_free > 0) {
235 if (fstatvfs(f->fd, &svfs) >= 0) {
238 available = svfs.f_bfree * svfs.f_bsize;
240 if (available >= f->metrics.keep_free)
241 available -= f->metrics.keep_free;
245 if (new_size - old_size > available)
250 /* Note that the glibc fallocate() fallback is very
251 inefficient, hence we try to minimize the allocation area
253 r = posix_fallocate(f->fd, old_size, new_size - old_size);
257 if (fstat(f->fd, &f->last_stat) < 0)
260 f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
265 static int journal_file_map(
274 uint64_t woffset, wsize;
281 woffset = offset & ~((uint64_t) page_size() - 1ULL);
282 wsize = size + (offset - woffset);
283 wsize = PAGE_ALIGN(wsize);
285 /* Avoid SIGBUS on invalid accesses */
286 if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
287 return -EADDRNOTAVAIL;
289 window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
290 if (window == MAP_FAILED)
302 *ret = (uint8_t*) window + (offset - woffset);
307 static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
316 assert(wt < _WINDOW_MAX);
318 if (offset + size > (uint64_t) f->last_stat.st_size) {
319 /* Hmm, out of range? Let's refresh the fstat() data
320 * first, before we trust that check. */
322 if (fstat(f->fd, &f->last_stat) < 0 ||
323 offset + size > (uint64_t) f->last_stat.st_size)
324 return -EADDRNOTAVAIL;
329 if (_likely_(w->ptr &&
330 w->offset <= offset &&
331 w->offset + w->size >= offset + size)) {
333 *ret = (uint8_t*) w->ptr + (offset - w->offset);
338 if (munmap(w->ptr, w->size) < 0)
342 w->size = w->offset = 0;
345 if (size < DEFAULT_WINDOW_SIZE) {
346 /* If the default window size is larger then what was
347 * asked for extend the mapping a bit in the hope to
348 * minimize needed remappings later on. We add half
349 * the window space before and half behind the
350 * requested mapping */
352 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
358 size = DEFAULT_WINDOW_SIZE;
362 if (offset + size > (uint64_t) f->last_stat.st_size)
363 size = (uint64_t) f->last_stat.st_size - offset;
366 return -EADDRNOTAVAIL;
368 r = journal_file_map(f,
370 &w->ptr, &w->offset, &w->size,
376 *ret = (uint8_t*) p + delta;
380 static bool verify_hash(Object *o) {
385 if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
386 h1 = le64toh(o->data.hash);
387 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
388 } else if (o->object.type == OBJECT_FIELD) {
389 h1 = le64toh(o->field.hash);
390 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
397 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
405 assert(type < _OBJECT_TYPE_MAX);
407 r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
412 s = le64toh(o->object.size);
414 if (s < sizeof(ObjectHeader))
417 if (type >= 0 && o->object.type != type)
420 if (s > sizeof(ObjectHeader)) {
421 r = journal_file_move_to(f, o->object.type, offset, s, &t);
435 static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
440 r = le64toh(f->header->tail_seqnum) + 1;
443 /* If an external seqnum counter was passed, we update
444 * both the local and the external one, and set it to
445 * the maximum of both */
453 f->header->tail_seqnum = htole64(r);
455 if (f->header->head_seqnum == 0)
456 f->header->head_seqnum = htole64(r);
461 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
468 assert(size >= sizeof(ObjectHeader));
472 p = le64toh(f->header->tail_object_offset);
474 p = le64toh(f->header->header_size);
476 r = journal_file_move_to_object(f, -1, p, &tail);
480 p += ALIGN64(le64toh(tail->object.size));
483 r = journal_file_allocate(f, p, size);
487 r = journal_file_move_to(f, type, p, size, &t);
494 o->object.type = type;
495 o->object.size = htole64(size);
497 f->header->tail_object_offset = htole64(p);
498 f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
506 static int journal_file_setup_data_hash_table(JournalFile *f) {
513 /* We estimate that we need 1 hash table entry per 768 of
514 journal file and we want to make sure we never get beyond
515 75% fill level. Calculate the hash table size for the
516 maximum file size based on these metrics. */
518 s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
519 if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
520 s = DEFAULT_DATA_HASH_TABLE_SIZE;
522 log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
524 r = journal_file_append_object(f,
525 OBJECT_DATA_HASH_TABLE,
526 offsetof(Object, hash_table.items) + s,
531 memset(o->hash_table.items, 0, s);
533 f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
534 f->header->data_hash_table_size = htole64(s);
539 static int journal_file_setup_field_hash_table(JournalFile *f) {
546 s = DEFAULT_FIELD_HASH_TABLE_SIZE;
547 r = journal_file_append_object(f,
548 OBJECT_FIELD_HASH_TABLE,
549 offsetof(Object, hash_table.items) + s,
554 memset(o->hash_table.items, 0, s);
556 f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
557 f->header->field_hash_table_size = htole64(s);
562 static int journal_file_map_data_hash_table(JournalFile *f) {
569 p = le64toh(f->header->data_hash_table_offset);
570 s = le64toh(f->header->data_hash_table_size);
572 r = journal_file_move_to(f,
573 WINDOW_DATA_HASH_TABLE,
579 f->data_hash_table = t;
583 static int journal_file_map_field_hash_table(JournalFile *f) {
590 p = le64toh(f->header->field_hash_table_offset);
591 s = le64toh(f->header->field_hash_table_size);
593 r = journal_file_move_to(f,
594 WINDOW_FIELD_HASH_TABLE,
600 f->field_hash_table = t;
604 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
611 assert(o->object.type == OBJECT_DATA);
613 /* This might alter the window we are looking at */
615 o->data.next_hash_offset = o->data.next_field_offset = 0;
616 o->data.entry_offset = o->data.entry_array_offset = 0;
617 o->data.n_entries = 0;
619 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
620 p = le64toh(f->data_hash_table[h].tail_hash_offset);
622 /* Only entry in the hash table is easy */
623 f->data_hash_table[h].head_hash_offset = htole64(offset);
625 /* Move back to the previous data object, to patch in
628 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
632 o->data.next_hash_offset = htole64(offset);
635 f->data_hash_table[h].tail_hash_offset = htole64(offset);
637 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
638 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
643 int journal_file_find_data_object_with_hash(
645 const void *data, uint64_t size, uint64_t hash,
646 Object **ret, uint64_t *offset) {
648 uint64_t p, osize, h;
652 assert(data || size == 0);
654 osize = offsetof(Object, data.payload) + size;
656 if (f->header->data_hash_table_size == 0)
659 h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
660 p = le64toh(f->data_hash_table[h].head_hash_offset);
665 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
669 if (le64toh(o->data.hash) != hash)
672 if (o->object.flags & OBJECT_COMPRESSED) {
676 l = le64toh(o->object.size);
677 if (l <= offsetof(Object, data.payload))
680 l -= offsetof(Object, data.payload);
682 if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
686 memcmp(f->compress_buffer, data, size) == 0) {
697 return -EPROTONOSUPPORT;
700 } else if (le64toh(o->object.size) == osize &&
701 memcmp(o->data.payload, data, size) == 0) {
713 p = le64toh(o->data.next_hash_offset);
719 int journal_file_find_data_object(
721 const void *data, uint64_t size,
722 Object **ret, uint64_t *offset) {
727 assert(data || size == 0);
729 hash = hash64(data, size);
731 return journal_file_find_data_object_with_hash(f,
736 static int journal_file_append_data(
738 const void *data, uint64_t size,
739 Object **ret, uint64_t *offset) {
745 bool compressed = false;
748 assert(data || size == 0);
750 hash = hash64(data, size);
752 r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
766 osize = offsetof(Object, data.payload) + size;
767 r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
771 o->data.hash = htole64(hash);
775 size >= COMPRESSION_SIZE_THRESHOLD) {
778 compressed = compress_blob(data, size, o->data.payload, &rsize);
781 o->object.size = htole64(offsetof(Object, data.payload) + rsize);
782 o->object.flags |= OBJECT_COMPRESSED;
784 f->header->incompatible_flags = htole32(le32toh(f->header->incompatible_flags) | HEADER_INCOMPATIBLE_COMPRESSED);
786 log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
791 if (!compressed && size > 0)
792 memcpy(o->data.payload, data, size);
794 r = journal_file_link_data(f, o, p, hash);
798 /* The linking might have altered the window, so let's
799 * refresh our pointer */
800 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
813 uint64_t journal_file_entry_n_items(Object *o) {
815 assert(o->object.type == OBJECT_ENTRY);
817 return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
820 static uint64_t journal_file_entry_array_n_items(Object *o) {
822 assert(o->object.type == OBJECT_ENTRY_ARRAY);
824 return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
827 static int link_entry_into_array(JournalFile *f,
832 uint64_t n = 0, ap = 0, q, i, a, hidx;
841 i = hidx = le64toh(*idx);
844 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
848 n = journal_file_entry_array_n_items(o);
850 o->entry_array.items[i] = htole64(p);
851 *idx = htole64(hidx + 1);
857 a = le64toh(o->entry_array.next_entry_array_offset);
868 r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
869 offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
874 o->entry_array.items[i] = htole64(p);
879 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
883 o->entry_array.next_entry_array_offset = htole64(q);
886 *idx = htole64(hidx + 1);
891 static int link_entry_into_array_plus_one(JournalFile *f,
910 i = htole64(le64toh(*idx) - 1);
911 r = link_entry_into_array(f, first, &i, p);
916 *idx = htole64(le64toh(*idx) + 1);
920 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
927 p = le64toh(o->entry.items[i].object_offset);
931 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
935 return link_entry_into_array_plus_one(f,
936 &o->data.entry_offset,
937 &o->data.entry_array_offset,
942 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
949 assert(o->object.type == OBJECT_ENTRY);
951 __sync_synchronize();
953 /* Link up the entry itself */
954 r = link_entry_into_array(f,
955 &f->header->entry_array_offset,
956 &f->header->n_entries,
961 /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
963 if (f->header->head_entry_realtime == 0)
964 f->header->head_entry_realtime = o->entry.realtime;
966 f->header->tail_entry_realtime = o->entry.realtime;
967 f->header->tail_entry_monotonic = o->entry.monotonic;
969 f->tail_entry_monotonic_valid = true;
971 /* Link up the items */
972 n = journal_file_entry_n_items(o);
973 for (i = 0; i < n; i++) {
974 r = journal_file_link_entry_item(f, o, offset, i);
982 static int journal_file_append_entry_internal(
984 const dual_timestamp *ts,
986 const EntryItem items[], unsigned n_items,
988 Object **ret, uint64_t *offset) {
995 assert(items || n_items == 0);
998 osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1000 r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1004 o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
1005 memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1006 o->entry.realtime = htole64(ts->realtime);
1007 o->entry.monotonic = htole64(ts->monotonic);
1008 o->entry.xor_hash = htole64(xor_hash);
1009 o->entry.boot_id = f->header->boot_id;
1011 r = journal_file_link_entry(f, o, np);
1024 void journal_file_post_change(JournalFile *f) {
1027 /* inotify() does not receive IN_MODIFY events from file
1028 * accesses done via mmap(). After each access we hence
1029 * trigger IN_MODIFY by truncating the journal file to its
1030 * current size which triggers IN_MODIFY. */
1032 __sync_synchronize();
1034 if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1035 log_error("Failed to to truncate file to its own size: %m");
1038 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1042 uint64_t xor_hash = 0;
1043 struct dual_timestamp _ts;
1046 assert(iovec || n_iovec == 0);
1052 dual_timestamp_get(&_ts);
1056 if (f->tail_entry_monotonic_valid &&
1057 ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1060 /* alloca() can't take 0, hence let's allocate at least one */
1061 items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1063 for (i = 0; i < n_iovec; i++) {
1067 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1071 xor_hash ^= le64toh(o->data.hash);
1072 items[i].object_offset = htole64(p);
1073 items[i].hash = o->data.hash;
1076 r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1078 journal_file_post_change(f);
1083 static int generic_array_get(JournalFile *f,
1086 Object **ret, uint64_t *offset) {
1098 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1102 n = journal_file_entry_array_n_items(o);
1104 p = le64toh(o->entry_array.items[i]);
1109 a = le64toh(o->entry_array.next_entry_array_offset);
1112 if (a <= 0 || p <= 0)
1115 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1128 static int generic_array_get_plus_one(JournalFile *f,
1132 Object **ret, uint64_t *offset) {
1141 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1154 return generic_array_get(f, first, i-1, ret, offset);
1163 static int generic_array_bisect(JournalFile *f,
1167 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1168 direction_t direction,
1173 uint64_t a, p, t = 0, i = 0, last_p = 0;
1174 bool subtract_one = false;
1175 Object *o, *array = NULL;
1179 assert(test_object);
1183 uint64_t left, right, k, lp;
1185 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1189 k = journal_file_entry_array_n_items(array);
1195 lp = p = le64toh(array->entry_array.items[i]);
1199 r = test_object(f, p, needle);
1203 if (r == TEST_FOUND)
1204 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1206 if (r == TEST_RIGHT) {
1210 if (left == right) {
1211 if (direction == DIRECTION_UP)
1212 subtract_one = true;
1218 assert(left < right);
1220 i = (left + right) / 2;
1221 p = le64toh(array->entry_array.items[i]);
1225 r = test_object(f, p, needle);
1229 if (r == TEST_FOUND)
1230 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1232 if (r == TEST_RIGHT)
1240 if (direction == DIRECTION_UP) {
1242 subtract_one = true;
1253 a = le64toh(array->entry_array.next_entry_array_offset);
1259 if (subtract_one && t == 0 && i == 0)
1262 if (subtract_one && i == 0)
1264 else if (subtract_one)
1265 p = le64toh(array->entry_array.items[i-1]);
1267 p = le64toh(array->entry_array.items[i]);
1269 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1280 *idx = t + i + (subtract_one ? -1 : 0);
1285 static int generic_array_bisect_plus_one(JournalFile *f,
1290 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1291 direction_t direction,
1297 bool step_back = false;
1301 assert(test_object);
1306 /* This bisects the array in object 'first', but first checks
1308 r = test_object(f, extra, needle);
1312 if (r == TEST_FOUND)
1313 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1315 /* if we are looking with DIRECTION_UP then we need to first
1316 see if in the actual array there is a matching entry, and
1317 return the last one of that. But if there isn't any we need
1318 to return this one. Hence remember this, and return it
1321 step_back = direction == DIRECTION_UP;
1323 if (r == TEST_RIGHT) {
1324 if (direction == DIRECTION_DOWN)
1330 r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1332 if (r == 0 && step_back)
1341 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1357 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1363 else if (p < needle)
1369 int journal_file_move_to_entry_by_offset(
1372 direction_t direction,
1376 return generic_array_bisect(f,
1377 le64toh(f->header->entry_array_offset),
1378 le64toh(f->header->n_entries),
1386 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1393 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1397 if (le64toh(o->entry.seqnum) == needle)
1399 else if (le64toh(o->entry.seqnum) < needle)
1405 int journal_file_move_to_entry_by_seqnum(
1408 direction_t direction,
1412 return generic_array_bisect(f,
1413 le64toh(f->header->entry_array_offset),
1414 le64toh(f->header->n_entries),
1421 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1428 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1432 if (le64toh(o->entry.realtime) == needle)
1434 else if (le64toh(o->entry.realtime) < needle)
1440 int journal_file_move_to_entry_by_realtime(
1443 direction_t direction,
1447 return generic_array_bisect(f,
1448 le64toh(f->header->entry_array_offset),
1449 le64toh(f->header->n_entries),
1451 test_object_realtime,
1456 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1463 r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1467 if (le64toh(o->entry.monotonic) == needle)
1469 else if (le64toh(o->entry.monotonic) < needle)
1475 int journal_file_move_to_entry_by_monotonic(
1479 direction_t direction,
1483 char t[9+32+1] = "_BOOT_ID=";
1489 sd_id128_to_string(boot_id, t + 9);
1490 r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1496 return generic_array_bisect_plus_one(f,
1497 le64toh(o->data.entry_offset),
1498 le64toh(o->data.entry_array_offset),
1499 le64toh(o->data.n_entries),
1501 test_object_monotonic,
1506 int journal_file_next_entry(
1508 Object *o, uint64_t p,
1509 direction_t direction,
1510 Object **ret, uint64_t *offset) {
1516 assert(p > 0 || !o);
1518 n = le64toh(f->header->n_entries);
1523 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1525 if (o->object.type != OBJECT_ENTRY)
1528 r = generic_array_bisect(f,
1529 le64toh(f->header->entry_array_offset),
1530 le64toh(f->header->n_entries),
1539 if (direction == DIRECTION_DOWN) {
1552 /* And jump to it */
1553 return generic_array_get(f,
1554 le64toh(f->header->entry_array_offset),
1559 int journal_file_skip_entry(
1561 Object *o, uint64_t p,
1563 Object **ret, uint64_t *offset) {
1572 if (o->object.type != OBJECT_ENTRY)
1575 r = generic_array_bisect(f,
1576 le64toh(f->header->entry_array_offset),
1577 le64toh(f->header->n_entries),
1586 /* Calculate new index */
1588 if ((uint64_t) -skip >= i)
1591 i = i - (uint64_t) -skip;
1593 i += (uint64_t) skip;
1595 n = le64toh(f->header->n_entries);
1602 return generic_array_get(f,
1603 le64toh(f->header->entry_array_offset),
1608 int journal_file_next_entry_for_data(
1610 Object *o, uint64_t p,
1611 uint64_t data_offset,
1612 direction_t direction,
1613 Object **ret, uint64_t *offset) {
1620 assert(p > 0 || !o);
1622 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1626 n = le64toh(d->data.n_entries);
1631 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1633 if (o->object.type != OBJECT_ENTRY)
1636 r = generic_array_bisect_plus_one(f,
1637 le64toh(d->data.entry_offset),
1638 le64toh(d->data.entry_array_offset),
1639 le64toh(d->data.n_entries),
1649 if (direction == DIRECTION_DOWN) {
1663 return generic_array_get_plus_one(f,
1664 le64toh(d->data.entry_offset),
1665 le64toh(d->data.entry_array_offset),
1670 int journal_file_move_to_entry_by_offset_for_data(
1672 uint64_t data_offset,
1674 direction_t direction,
1675 Object **ret, uint64_t *offset) {
1682 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1686 return generic_array_bisect_plus_one(f,
1687 le64toh(d->data.entry_offset),
1688 le64toh(d->data.entry_array_offset),
1689 le64toh(d->data.n_entries),
1696 int journal_file_move_to_entry_by_monotonic_for_data(
1698 uint64_t data_offset,
1701 direction_t direction,
1702 Object **ret, uint64_t *offset) {
1704 char t[9+32+1] = "_BOOT_ID=";
1711 /* First, seek by time */
1712 sd_id128_to_string(boot_id, t + 9);
1713 r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1719 r = generic_array_bisect_plus_one(f,
1720 le64toh(o->data.entry_offset),
1721 le64toh(o->data.entry_array_offset),
1722 le64toh(o->data.n_entries),
1724 test_object_monotonic,
1730 /* And now, continue seeking until we find an entry that
1731 * exists in both bisection arrays */
1737 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1741 r = generic_array_bisect_plus_one(f,
1742 le64toh(d->data.entry_offset),
1743 le64toh(d->data.entry_array_offset),
1744 le64toh(d->data.n_entries),
1752 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1756 r = generic_array_bisect_plus_one(f,
1757 le64toh(o->data.entry_offset),
1758 le64toh(o->data.entry_array_offset),
1759 le64toh(o->data.n_entries),
1783 int journal_file_move_to_entry_by_seqnum_for_data(
1785 uint64_t data_offset,
1787 direction_t direction,
1788 Object **ret, uint64_t *offset) {
1795 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1799 return generic_array_bisect_plus_one(f,
1800 le64toh(d->data.entry_offset),
1801 le64toh(d->data.entry_array_offset),
1802 le64toh(d->data.n_entries),
1809 int journal_file_move_to_entry_by_realtime_for_data(
1811 uint64_t data_offset,
1813 direction_t direction,
1814 Object **ret, uint64_t *offset) {
1821 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1825 return generic_array_bisect_plus_one(f,
1826 le64toh(d->data.entry_offset),
1827 le64toh(d->data.entry_array_offset),
1828 le64toh(d->data.n_entries),
1830 test_object_realtime,
1835 void journal_file_dump(JournalFile *f) {
1842 journal_file_print_header(f);
1844 p = le64toh(f->header->header_size);
1846 r = journal_file_move_to_object(f, -1, p, &o);
1850 switch (o->object.type) {
1853 printf("Type: OBJECT_UNUSED\n");
1857 printf("Type: OBJECT_DATA\n");
1861 printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1862 (unsigned long long) le64toh(o->entry.seqnum),
1863 (unsigned long long) le64toh(o->entry.monotonic),
1864 (unsigned long long) le64toh(o->entry.realtime));
1867 case OBJECT_FIELD_HASH_TABLE:
1868 printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1871 case OBJECT_DATA_HASH_TABLE:
1872 printf("Type: OBJECT_DATA_HASH_TABLE\n");
1875 case OBJECT_ENTRY_ARRAY:
1876 printf("Type: OBJECT_ENTRY_ARRAY\n");
1879 case OBJECT_SIGNATURE:
1880 printf("Type: OBJECT_SIGNATURE\n");
1884 if (o->object.flags & OBJECT_COMPRESSED)
1885 printf("Flags: COMPRESSED\n");
1887 if (p == le64toh(f->header->tail_object_offset))
1890 p = p + ALIGN64(le64toh(o->object.size));
1895 log_error("File corrupt");
1898 void journal_file_print_header(JournalFile *f) {
1899 char a[33], b[33], c[33];
1900 char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1904 printf("File Path: %s\n"
1908 "Sequential Number ID: %s\n"
1910 "Compatible Flags:%s%s\n"
1911 "Incompatible Flags:%s%s\n"
1912 "Header size: %llu\n"
1913 "Arena size: %llu\n"
1914 "Data Hash Table Size: %llu\n"
1915 "Field Hash Table Size: %llu\n"
1917 "Entry Objects: %llu\n"
1918 "Rotate Suggested: %s\n"
1919 "Head Sequential Number: %llu\n"
1920 "Tail Sequential Number: %llu\n"
1921 "Head Realtime Timestamp: %s\n"
1922 "Tail Realtime Timestamp: %s\n",
1924 sd_id128_to_string(f->header->file_id, a),
1925 sd_id128_to_string(f->header->machine_id, b),
1926 sd_id128_to_string(f->header->boot_id, c),
1927 sd_id128_to_string(f->header->seqnum_id, c),
1928 f->header->state == STATE_OFFLINE ? "offline" :
1929 f->header->state == STATE_ONLINE ? "online" :
1930 f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1931 (f->header->compatible_flags & HEADER_COMPATIBLE_SIGNED) ? " SIGNED" : "",
1932 (f->header->compatible_flags & ~HEADER_COMPATIBLE_SIGNED) ? " ???" : "",
1933 (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1934 (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1935 (unsigned long long) le64toh(f->header->header_size),
1936 (unsigned long long) le64toh(f->header->arena_size),
1937 (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1938 (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1939 (unsigned long long) le64toh(f->header->n_objects),
1940 (unsigned long long) le64toh(f->header->n_entries),
1941 yes_no(journal_file_rotate_suggested(f)),
1942 (unsigned long long) le64toh(f->header->head_seqnum),
1943 (unsigned long long) le64toh(f->header->tail_seqnum),
1944 format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1945 format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1947 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1948 printf("Data Objects: %llu\n"
1949 "Data Hash Table Fill: %.1f%%\n",
1950 (unsigned long long) le64toh(f->header->n_data),
1951 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1953 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1954 printf("Field Objects: %llu\n"
1955 "Field Hash Table Fill: %.1f%%\n",
1956 (unsigned long long) le64toh(f->header->n_fields),
1957 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1960 int journal_file_open(
1964 JournalMetrics *metrics,
1965 JournalFile *template,
1966 JournalFile **ret) {
1970 bool newly_created = false;
1974 if ((flags & O_ACCMODE) != O_RDONLY &&
1975 (flags & O_ACCMODE) != O_RDWR)
1978 if (!endswith(fname, ".journal"))
1981 f = new0(JournalFile, 1);
1988 f->writable = (flags & O_ACCMODE) != O_RDONLY;
1989 f->prot = prot_from_flags(flags);
1992 f->compress = template->compress;
1994 f->path = strdup(fname);
2000 f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2006 if (fstat(f->fd, &f->last_stat) < 0) {
2011 if (f->last_stat.st_size == 0 && f->writable) {
2012 newly_created = true;
2014 r = journal_file_init_header(f, template);
2018 if (fstat(f->fd, &f->last_stat) < 0) {
2024 if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2029 f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2030 if (f->header == MAP_FAILED) {
2036 if (!newly_created) {
2037 r = journal_file_verify_header(f);
2044 journal_default_metrics(metrics, f->fd);
2045 f->metrics = *metrics;
2046 } else if (template)
2047 f->metrics = template->metrics;
2049 r = journal_file_refresh_header(f);
2054 if (newly_created) {
2056 r = journal_file_setup_field_hash_table(f);
2060 r = journal_file_setup_data_hash_table(f);
2065 r = journal_file_map_field_hash_table(f);
2069 r = journal_file_map_data_hash_table(f);
2079 journal_file_close(f);
2084 int journal_file_rotate(JournalFile **f) {
2087 JournalFile *old_file, *new_file = NULL;
2095 if (!old_file->writable)
2098 if (!endswith(old_file->path, ".journal"))
2101 l = strlen(old_file->path);
2103 p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2107 memcpy(p, old_file->path, l - 8);
2109 sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2110 snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2111 "-%016llx-%016llx.journal",
2112 (unsigned long long) le64toh((*f)->header->tail_seqnum),
2113 (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2115 r = rename(old_file->path, p);
2121 old_file->header->state = STATE_ARCHIVED;
2123 r = journal_file_open(old_file->path, old_file->flags, old_file->mode, NULL, old_file, &new_file);
2124 journal_file_close(old_file);
2130 int journal_file_open_reliably(
2134 JournalMetrics *metrics,
2135 JournalFile *template,
2136 JournalFile **ret) {
2142 r = journal_file_open(fname, flags, mode, metrics, template, ret);
2143 if (r != -EBADMSG && /* corrupted */
2144 r != -ENODATA && /* truncated */
2145 r != -EHOSTDOWN && /* other machine */
2146 r != -EPROTONOSUPPORT && /* incompatible feature */
2147 r != -EBUSY && /* unclean shutdown */
2148 r != -ESHUTDOWN /* already archived */)
2151 if ((flags & O_ACCMODE) == O_RDONLY)
2154 if (!(flags & O_CREAT))
2157 /* The file is corrupted. Rotate it away and try it again (but only once) */
2160 if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2162 (unsigned long long) now(CLOCK_REALTIME),
2166 r = rename(fname, p);
2171 log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2173 return journal_file_open(fname, flags, mode, metrics, template, ret);
2176 struct vacuum_info {
2181 sd_id128_t seqnum_id;
2187 static int vacuum_compare(const void *_a, const void *_b) {
2188 const struct vacuum_info *a, *b;
2193 if (a->have_seqnum && b->have_seqnum &&
2194 sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2195 if (a->seqnum < b->seqnum)
2197 else if (a->seqnum > b->seqnum)
2203 if (a->realtime < b->realtime)
2205 else if (a->realtime > b->realtime)
2207 else if (a->have_seqnum && b->have_seqnum)
2208 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2210 return strcmp(a->filename, b->filename);
2213 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2216 struct vacuum_info *list = NULL;
2217 unsigned n_list = 0, n_allocated = 0, i;
2225 d = opendir(directory);
2231 struct dirent buf, *de;
2235 unsigned long long seqnum = 0, realtime;
2236 sd_id128_t seqnum_id;
2239 k = readdir_r(d, &buf, &de);
2248 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2251 if (!S_ISREG(st.st_mode))
2254 q = strlen(de->d_name);
2256 if (endswith(de->d_name, ".journal")) {
2258 /* Vacuum archived files */
2260 if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2263 if (de->d_name[q-8-16-1] != '-' ||
2264 de->d_name[q-8-16-1-16-1] != '-' ||
2265 de->d_name[q-8-16-1-16-1-32-1] != '@')
2268 p = strdup(de->d_name);
2274 de->d_name[q-8-16-1-16-1] = 0;
2275 if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2280 if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2287 } else if (endswith(de->d_name, ".journal~")) {
2288 unsigned long long tmp;
2290 /* Vacuum corrupted files */
2292 if (q < 1 + 16 + 1 + 16 + 8 + 1)
2295 if (de->d_name[q-1-8-16-1] != '-' ||
2296 de->d_name[q-1-8-16-1-16-1] != '@')
2299 p = strdup(de->d_name);
2305 if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2310 have_seqnum = false;
2314 if (n_list >= n_allocated) {
2315 struct vacuum_info *j;
2317 n_allocated = MAX(n_allocated * 2U, 8U);
2318 j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2328 list[n_list].filename = p;
2329 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2330 list[n_list].seqnum = seqnum;
2331 list[n_list].realtime = realtime;
2332 list[n_list].seqnum_id = seqnum_id;
2333 list[n_list].have_seqnum = have_seqnum;
2335 sum += list[n_list].usage;
2341 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2343 for(i = 0; i < n_list; i++) {
2346 if (fstatvfs(dirfd(d), &ss) < 0) {
2351 if (sum <= max_use &&
2352 (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2355 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2356 log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2357 sum -= list[i].usage;
2358 } else if (errno != ENOENT)
2359 log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2363 for (i = 0; i < n_list; i++)
2364 free(list[i].filename);
2374 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2376 uint64_t q, xor_hash = 0;
2389 ts.monotonic = le64toh(o->entry.monotonic);
2390 ts.realtime = le64toh(o->entry.realtime);
2392 if (to->tail_entry_monotonic_valid &&
2393 ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2396 n = journal_file_entry_n_items(o);
2397 items = alloca(sizeof(EntryItem) * n);
2399 for (i = 0; i < n; i++) {
2406 q = le64toh(o->entry.items[i].object_offset);
2407 le_hash = o->entry.items[i].hash;
2409 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2413 if (le_hash != o->data.hash)
2416 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2419 /* We hit the limit on 32bit machines */
2420 if ((uint64_t) t != l)
2423 if (o->object.flags & OBJECT_COMPRESSED) {
2427 if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2430 data = from->compress_buffer;
2433 return -EPROTONOSUPPORT;
2436 data = o->data.payload;
2438 r = journal_file_append_data(to, data, l, &u, &h);
2442 xor_hash ^= le64toh(u->data.hash);
2443 items[i].object_offset = htole64(h);
2444 items[i].hash = u->data.hash;
2446 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2451 return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2454 void journal_default_metrics(JournalMetrics *m, int fd) {
2455 uint64_t fs_size = 0;
2457 char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2462 if (fstatvfs(fd, &ss) >= 0)
2463 fs_size = ss.f_frsize * ss.f_blocks;
2465 if (m->max_use == (uint64_t) -1) {
2468 m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2470 if (m->max_use > DEFAULT_MAX_USE_UPPER)
2471 m->max_use = DEFAULT_MAX_USE_UPPER;
2473 if (m->max_use < DEFAULT_MAX_USE_LOWER)
2474 m->max_use = DEFAULT_MAX_USE_LOWER;
2476 m->max_use = DEFAULT_MAX_USE_LOWER;
2478 m->max_use = PAGE_ALIGN(m->max_use);
2480 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2481 m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2484 if (m->max_size == (uint64_t) -1) {
2485 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2487 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2488 m->max_size = DEFAULT_MAX_SIZE_UPPER;
2490 m->max_size = PAGE_ALIGN(m->max_size);
2492 if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2493 m->max_size = JOURNAL_FILE_SIZE_MIN;
2495 if (m->max_size*2 > m->max_use)
2496 m->max_use = m->max_size*2;
2498 if (m->min_size == (uint64_t) -1)
2499 m->min_size = JOURNAL_FILE_SIZE_MIN;
2501 m->min_size = PAGE_ALIGN(m->min_size);
2503 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2504 m->min_size = JOURNAL_FILE_SIZE_MIN;
2506 if (m->min_size > m->max_size)
2507 m->max_size = m->min_size;
2510 if (m->keep_free == (uint64_t) -1) {
2513 m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2515 if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2516 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2519 m->keep_free = DEFAULT_KEEP_FREE;
2522 log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2523 format_bytes(a, sizeof(a), m->max_use),
2524 format_bytes(b, sizeof(b), m->max_size),
2525 format_bytes(c, sizeof(c), m->min_size),
2526 format_bytes(d, sizeof(d), m->keep_free));
2529 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2534 if (f->header->head_entry_realtime == 0)
2537 *from = le64toh(f->header->head_entry_realtime);
2541 if (f->header->tail_entry_realtime == 0)
2544 *to = le64toh(f->header->tail_entry_realtime);
2550 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2551 char t[9+32+1] = "_BOOT_ID=";
2559 sd_id128_to_string(boot_id, t + 9);
2561 r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2565 if (le64toh(o->data.n_entries) <= 0)
2569 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2573 *from = le64toh(o->entry.monotonic);
2577 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2581 r = generic_array_get_plus_one(f,
2582 le64toh(o->data.entry_offset),
2583 le64toh(o->data.entry_array_offset),
2584 le64toh(o->data.n_entries)-1,
2589 *to = le64toh(o->entry.monotonic);
2595 bool journal_file_rotate_suggested(JournalFile *f) {
2598 /* If we gained new header fields we gained new features,
2599 * hence suggest a rotation */
2600 if (le64toh(f->header->header_size) < sizeof(Header)) {
2601 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2605 /* Let's check if the hash tables grew over a certain fill
2606 * level (75%, borrowing this value from Java's hash table
2607 * implementation), and if so suggest a rotation. To calculate
2608 * the fill level we need the n_data field, which only exists
2609 * in newer versions. */
2611 if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2612 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2613 log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2615 100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2616 (unsigned long long) le64toh(f->header->n_data),
2617 (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2618 (unsigned long long) (f->last_stat.st_size),
2619 (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2623 if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2624 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2625 log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2627 100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2628 (unsigned long long) le64toh(f->header->n_fields),
2629 (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));