chiark / gitweb /
0e4889378c8699eb07825c3f7e8b8b88588d1f42
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "lookup3.h"
33 #include "compress.h"
34 #include "fsprg.h"
35
36 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
38
39 #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66
67 #define JOURNAL_HEADER_CONTAINS(h, field) \
68         (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69
70 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
71
72 void journal_file_close(JournalFile *f) {
73         int t;
74
75         assert(f);
76
77         /* Sync everything to disk, before we mark the file offline */
78         for (t = 0; t < _WINDOW_MAX; t++)
79                 if (f->windows[t].ptr)
80                         munmap(f->windows[t].ptr, f->windows[t].size);
81
82         if (f->writable && f->fd >= 0)
83                 fdatasync(f->fd);
84
85         if (f->header) {
86                 /* Mark the file offline. Don't override the archived state if it already is set */
87                 if (f->writable && f->header->state == STATE_ONLINE)
88                         f->header->state = STATE_OFFLINE;
89
90                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
91         }
92
93         if (f->fd >= 0)
94                 close_nointr_nofail(f->fd);
95
96         free(f->path);
97
98 #ifdef HAVE_XZ
99         free(f->compress_buffer);
100 #endif
101
102 #ifdef HAVE_GCRYPT
103         if (f->fsprg_header)
104                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_seqnum = template->header->tail_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         /* The first addition was n_data, so check that we are at least this large */
207         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
208                 return -EBADMSG;
209
210         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
211                 return -ENODATA;
212
213         if (f->writable) {
214                 uint8_t state;
215                 sd_id128_t machine_id;
216                 int r;
217
218                 r = sd_id128_get_machine(&machine_id);
219                 if (r < 0)
220                         return r;
221
222                 if (!sd_id128_equal(machine_id, f->header->machine_id))
223                         return -EHOSTDOWN;
224
225                 state = f->header->state;
226
227                 if (state == STATE_ONLINE) {
228                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229                         return -EBUSY;
230                 } else if (state == STATE_ARCHIVED)
231                         return -ESHUTDOWN;
232                 else if (state != STATE_OFFLINE) {
233                         log_debug("Journal file %s has unknown state %u.", f->path, state);
234                         return -EBUSY;
235                 }
236         }
237
238         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
241         return 0;
242 }
243
244 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
245         uint64_t old_size, new_size;
246         int r;
247
248         assert(f);
249
250         /* We assume that this file is not sparse, and we know that
251          * for sure, since we always call posix_fallocate()
252          * ourselves */
253
254         old_size =
255                 le64toh(f->header->header_size) +
256                 le64toh(f->header->arena_size);
257
258         new_size = PAGE_ALIGN(offset + size);
259         if (new_size < le64toh(f->header->header_size))
260                 new_size = le64toh(f->header->header_size);
261
262         if (new_size <= old_size)
263                 return 0;
264
265         if (f->metrics.max_size > 0 &&
266             new_size > f->metrics.max_size)
267                 return -E2BIG;
268
269         if (new_size > f->metrics.min_size &&
270             f->metrics.keep_free > 0) {
271                 struct statvfs svfs;
272
273                 if (fstatvfs(f->fd, &svfs) >= 0) {
274                         uint64_t available;
275
276                         available = svfs.f_bfree * svfs.f_bsize;
277
278                         if (available >= f->metrics.keep_free)
279                                 available -= f->metrics.keep_free;
280                         else
281                                 available = 0;
282
283                         if (new_size - old_size > available)
284                                 return -E2BIG;
285                 }
286         }
287
288         /* Note that the glibc fallocate() fallback is very
289            inefficient, hence we try to minimize the allocation area
290            as we can. */
291         r = posix_fallocate(f->fd, old_size, new_size - old_size);
292         if (r != 0)
293                 return -r;
294
295         if (fstat(f->fd, &f->last_stat) < 0)
296                 return -errno;
297
298         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
299
300         return 0;
301 }
302
303 static int journal_file_map(
304                 JournalFile *f,
305                 uint64_t offset,
306                 uint64_t size,
307                 void **_window,
308                 uint64_t *_woffset,
309                 uint64_t *_wsize,
310                 void **ret) {
311
312         uint64_t woffset, wsize;
313         void *window;
314
315         assert(f);
316         assert(size > 0);
317         assert(ret);
318
319         woffset = offset & ~((uint64_t) page_size() - 1ULL);
320         wsize = size + (offset - woffset);
321         wsize = PAGE_ALIGN(wsize);
322
323         /* Avoid SIGBUS on invalid accesses */
324         if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
325                 return -EADDRNOTAVAIL;
326
327         window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
328         if (window == MAP_FAILED)
329                 return -errno;
330
331         if (_window)
332                 *_window = window;
333
334         if (_woffset)
335                 *_woffset = woffset;
336
337         if (_wsize)
338                 *_wsize = wsize;
339
340         *ret = (uint8_t*) window + (offset - woffset);
341
342         return 0;
343 }
344
345 static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
346         void *p = NULL;
347         uint64_t delta;
348         int r;
349         Window *w;
350
351         assert(f);
352         assert(ret);
353         assert(wt >= 0);
354         assert(wt < _WINDOW_MAX);
355
356         if (offset + size > (uint64_t) f->last_stat.st_size) {
357                 /* Hmm, out of range? Let's refresh the fstat() data
358                  * first, before we trust that check. */
359
360                 if (fstat(f->fd, &f->last_stat) < 0 ||
361                     offset + size > (uint64_t) f->last_stat.st_size)
362                         return -EADDRNOTAVAIL;
363         }
364
365         w = f->windows + wt;
366
367         if (_likely_(w->ptr &&
368                      w->offset <= offset &&
369                      w->offset + w->size >= offset + size)) {
370
371                 *ret = (uint8_t*) w->ptr + (offset - w->offset);
372                 return 0;
373         }
374
375         if (w->ptr) {
376                 if (munmap(w->ptr, w->size) < 0)
377                         return -errno;
378
379                 w->ptr = NULL;
380                 w->size = w->offset = 0;
381         }
382
383         if (size < DEFAULT_WINDOW_SIZE) {
384                 /* If the default window size is larger then what was
385                  * asked for extend the mapping a bit in the hope to
386                  * minimize needed remappings later on. We add half
387                  * the window space before and half behind the
388                  * requested mapping */
389
390                 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
391
392                 if (delta > offset)
393                         delta = offset;
394
395                 offset -= delta;
396                 size = DEFAULT_WINDOW_SIZE;
397         } else
398                 delta = 0;
399
400         if (offset + size > (uint64_t) f->last_stat.st_size)
401                 size = (uint64_t) f->last_stat.st_size - offset;
402
403         if (size <= 0)
404                 return -EADDRNOTAVAIL;
405
406         r = journal_file_map(f,
407                              offset, size,
408                              &w->ptr, &w->offset, &w->size,
409                              &p);
410
411         if (r < 0)
412                 return r;
413
414         *ret = (uint8_t*) p + delta;
415         return 0;
416 }
417
418 static bool verify_hash(Object *o) {
419         uint64_t h1, h2;
420
421         assert(o);
422
423         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
424                 h1 = le64toh(o->data.hash);
425                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
426         } else if (o->object.type == OBJECT_FIELD) {
427                 h1 = le64toh(o->field.hash);
428                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
429         } else
430                 return true;
431
432         return h1 == h2;
433 }
434
435 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
436         int r;
437         void *t;
438         Object *o;
439         uint64_t s;
440
441         assert(f);
442         assert(ret);
443         assert(type < _OBJECT_TYPE_MAX);
444
445         r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
446         if (r < 0)
447                 return r;
448
449         o = (Object*) t;
450         s = le64toh(o->object.size);
451
452         if (s < sizeof(ObjectHeader))
453                 return -EBADMSG;
454
455         if (type >= 0 && o->object.type != type)
456                 return -EBADMSG;
457
458         if (s > sizeof(ObjectHeader)) {
459                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
460                 if (r < 0)
461                         return r;
462
463                 o = (Object*) t;
464         }
465
466         if (!verify_hash(o))
467                 return -EBADMSG;
468
469         *ret = o;
470         return 0;
471 }
472
473 static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
474         uint64_t r;
475
476         assert(f);
477
478         r = le64toh(f->header->tail_seqnum) + 1;
479
480         if (seqnum) {
481                 /* If an external seqnum counter was passed, we update
482                  * both the local and the external one, and set it to
483                  * the maximum of both */
484
485                 if (*seqnum + 1 > r)
486                         r = *seqnum + 1;
487
488                 *seqnum = r;
489         }
490
491         f->header->tail_seqnum = htole64(r);
492
493         if (f->header->head_seqnum == 0)
494                 f->header->head_seqnum = htole64(r);
495
496         return r;
497 }
498
499 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
500         int r;
501         uint64_t p;
502         Object *tail, *o;
503         void *t;
504
505         assert(f);
506         assert(size >= sizeof(ObjectHeader));
507         assert(offset);
508         assert(ret);
509
510         p = le64toh(f->header->tail_object_offset);
511         if (p == 0)
512                 p = le64toh(f->header->header_size);
513         else {
514                 r = journal_file_move_to_object(f, -1, p, &tail);
515                 if (r < 0)
516                         return r;
517
518                 p += ALIGN64(le64toh(tail->object.size));
519         }
520
521         r = journal_file_allocate(f, p, size);
522         if (r < 0)
523                 return r;
524
525         r = journal_file_move_to(f, type, p, size, &t);
526         if (r < 0)
527                 return r;
528
529         o = (Object*) t;
530
531         zero(o->object);
532         o->object.type = type;
533         o->object.size = htole64(size);
534
535         f->header->tail_object_offset = htole64(p);
536         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
537
538         *ret = o;
539         *offset = p;
540
541         return 0;
542 }
543
544 static int journal_file_setup_data_hash_table(JournalFile *f) {
545         uint64_t s, p;
546         Object *o;
547         int r;
548
549         assert(f);
550
551         /* We estimate that we need 1 hash table entry per 768 of
552            journal file and we want to make sure we never get beyond
553            75% fill level. Calculate the hash table size for the
554            maximum file size based on these metrics. */
555
556         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
557         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
558                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
559
560         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
561
562         r = journal_file_append_object(f,
563                                        OBJECT_DATA_HASH_TABLE,
564                                        offsetof(Object, hash_table.items) + s,
565                                        &o, &p);
566         if (r < 0)
567                 return r;
568
569         memset(o->hash_table.items, 0, s);
570
571         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
572         f->header->data_hash_table_size = htole64(s);
573
574         return 0;
575 }
576
577 static int journal_file_setup_field_hash_table(JournalFile *f) {
578         uint64_t s, p;
579         Object *o;
580         int r;
581
582         assert(f);
583
584         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
585         r = journal_file_append_object(f,
586                                        OBJECT_FIELD_HASH_TABLE,
587                                        offsetof(Object, hash_table.items) + s,
588                                        &o, &p);
589         if (r < 0)
590                 return r;
591
592         memset(o->hash_table.items, 0, s);
593
594         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
595         f->header->field_hash_table_size = htole64(s);
596
597         return 0;
598 }
599
600 static int journal_file_map_data_hash_table(JournalFile *f) {
601         uint64_t s, p;
602         void *t;
603         int r;
604
605         assert(f);
606
607         p = le64toh(f->header->data_hash_table_offset);
608         s = le64toh(f->header->data_hash_table_size);
609
610         r = journal_file_move_to(f,
611                                  WINDOW_DATA_HASH_TABLE,
612                                  p, s,
613                                  &t);
614         if (r < 0)
615                 return r;
616
617         f->data_hash_table = t;
618         return 0;
619 }
620
621 static int journal_file_map_field_hash_table(JournalFile *f) {
622         uint64_t s, p;
623         void *t;
624         int r;
625
626         assert(f);
627
628         p = le64toh(f->header->field_hash_table_offset);
629         s = le64toh(f->header->field_hash_table_size);
630
631         r = journal_file_move_to(f,
632                                  WINDOW_FIELD_HASH_TABLE,
633                                  p, s,
634                                  &t);
635         if (r < 0)
636                 return r;
637
638         f->field_hash_table = t;
639         return 0;
640 }
641
642 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
643         uint64_t p, h;
644         int r;
645
646         assert(f);
647         assert(o);
648         assert(offset > 0);
649         assert(o->object.type == OBJECT_DATA);
650
651         /* This might alter the window we are looking at */
652
653         o->data.next_hash_offset = o->data.next_field_offset = 0;
654         o->data.entry_offset = o->data.entry_array_offset = 0;
655         o->data.n_entries = 0;
656
657         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
658         p = le64toh(f->data_hash_table[h].tail_hash_offset);
659         if (p == 0) {
660                 /* Only entry in the hash table is easy */
661                 f->data_hash_table[h].head_hash_offset = htole64(offset);
662         } else {
663                 /* Move back to the previous data object, to patch in
664                  * pointer */
665
666                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
667                 if (r < 0)
668                         return r;
669
670                 o->data.next_hash_offset = htole64(offset);
671         }
672
673         f->data_hash_table[h].tail_hash_offset = htole64(offset);
674
675         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
676                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
677
678         return 0;
679 }
680
681 int journal_file_find_data_object_with_hash(
682                 JournalFile *f,
683                 const void *data, uint64_t size, uint64_t hash,
684                 Object **ret, uint64_t *offset) {
685
686         uint64_t p, osize, h;
687         int r;
688
689         assert(f);
690         assert(data || size == 0);
691
692         osize = offsetof(Object, data.payload) + size;
693
694         if (f->header->data_hash_table_size == 0)
695                 return -EBADMSG;
696
697         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
698         p = le64toh(f->data_hash_table[h].head_hash_offset);
699
700         while (p > 0) {
701                 Object *o;
702
703                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
704                 if (r < 0)
705                         return r;
706
707                 if (le64toh(o->data.hash) != hash)
708                         goto next;
709
710                 if (o->object.flags & OBJECT_COMPRESSED) {
711 #ifdef HAVE_XZ
712                         uint64_t l, rsize;
713
714                         l = le64toh(o->object.size);
715                         if (l <= offsetof(Object, data.payload))
716                                 return -EBADMSG;
717
718                         l -= offsetof(Object, data.payload);
719
720                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
721                                 return -EBADMSG;
722
723                         if (rsize == size &&
724                             memcmp(f->compress_buffer, data, size) == 0) {
725
726                                 if (ret)
727                                         *ret = o;
728
729                                 if (offset)
730                                         *offset = p;
731
732                                 return 1;
733                         }
734 #else
735                         return -EPROTONOSUPPORT;
736 #endif
737
738                 } else if (le64toh(o->object.size) == osize &&
739                            memcmp(o->data.payload, data, size) == 0) {
740
741                         if (ret)
742                                 *ret = o;
743
744                         if (offset)
745                                 *offset = p;
746
747                         return 1;
748                 }
749
750         next:
751                 p = le64toh(o->data.next_hash_offset);
752         }
753
754         return 0;
755 }
756
757 int journal_file_find_data_object(
758                 JournalFile *f,
759                 const void *data, uint64_t size,
760                 Object **ret, uint64_t *offset) {
761
762         uint64_t hash;
763
764         assert(f);
765         assert(data || size == 0);
766
767         hash = hash64(data, size);
768
769         return journal_file_find_data_object_with_hash(f,
770                                                        data, size, hash,
771                                                        ret, offset);
772 }
773
774 static int journal_file_append_data(
775                 JournalFile *f,
776                 const void *data, uint64_t size,
777                 Object **ret, uint64_t *offset) {
778
779         uint64_t hash, p;
780         uint64_t osize;
781         Object *o;
782         int r;
783         bool compressed = false;
784
785         assert(f);
786         assert(data || size == 0);
787
788         hash = hash64(data, size);
789
790         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
791         if (r < 0)
792                 return r;
793         else if (r > 0) {
794
795                 if (ret)
796                         *ret = o;
797
798                 if (offset)
799                         *offset = p;
800
801                 return 0;
802         }
803
804         osize = offsetof(Object, data.payload) + size;
805         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
806         if (r < 0)
807                 return r;
808
809         o->data.hash = htole64(hash);
810
811 #ifdef HAVE_XZ
812         if (f->compress &&
813             size >= COMPRESSION_SIZE_THRESHOLD) {
814                 uint64_t rsize;
815
816                 compressed = compress_blob(data, size, o->data.payload, &rsize);
817
818                 if (compressed) {
819                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
820                         o->object.flags |= OBJECT_COMPRESSED;
821
822                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
823                 }
824         }
825 #endif
826
827         if (!compressed && size > 0)
828                 memcpy(o->data.payload, data, size);
829
830         r = journal_file_link_data(f, o, p, hash);
831         if (r < 0)
832                 return r;
833
834         /* The linking might have altered the window, so let's
835          * refresh our pointer */
836         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
837         if (r < 0)
838                 return r;
839
840         if (ret)
841                 *ret = o;
842
843         if (offset)
844                 *offset = p;
845
846         return 0;
847 }
848
849 uint64_t journal_file_entry_n_items(Object *o) {
850         assert(o);
851         assert(o->object.type == OBJECT_ENTRY);
852
853         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
854 }
855
856 static uint64_t journal_file_entry_array_n_items(Object *o) {
857         assert(o);
858         assert(o->object.type == OBJECT_ENTRY_ARRAY);
859
860         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
861 }
862
863 static int link_entry_into_array(JournalFile *f,
864                                  le64_t *first,
865                                  le64_t *idx,
866                                  uint64_t p) {
867         int r;
868         uint64_t n = 0, ap = 0, q, i, a, hidx;
869         Object *o;
870
871         assert(f);
872         assert(first);
873         assert(idx);
874         assert(p > 0);
875
876         a = le64toh(*first);
877         i = hidx = le64toh(*idx);
878         while (a > 0) {
879
880                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
881                 if (r < 0)
882                         return r;
883
884                 n = journal_file_entry_array_n_items(o);
885                 if (i < n) {
886                         o->entry_array.items[i] = htole64(p);
887                         *idx = htole64(hidx + 1);
888                         return 0;
889                 }
890
891                 i -= n;
892                 ap = a;
893                 a = le64toh(o->entry_array.next_entry_array_offset);
894         }
895
896         if (hidx > n)
897                 n = (hidx+1) * 2;
898         else
899                 n = n * 2;
900
901         if (n < 4)
902                 n = 4;
903
904         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
905                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
906                                        &o, &q);
907         if (r < 0)
908                 return r;
909
910         o->entry_array.items[i] = htole64(p);
911
912         if (ap == 0)
913                 *first = htole64(q);
914         else {
915                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
916                 if (r < 0)
917                         return r;
918
919                 o->entry_array.next_entry_array_offset = htole64(q);
920         }
921
922         *idx = htole64(hidx + 1);
923
924         return 0;
925 }
926
927 static int link_entry_into_array_plus_one(JournalFile *f,
928                                           le64_t *extra,
929                                           le64_t *first,
930                                           le64_t *idx,
931                                           uint64_t p) {
932
933         int r;
934
935         assert(f);
936         assert(extra);
937         assert(first);
938         assert(idx);
939         assert(p > 0);
940
941         if (*idx == 0)
942                 *extra = htole64(p);
943         else {
944                 le64_t i;
945
946                 i = htole64(le64toh(*idx) - 1);
947                 r = link_entry_into_array(f, first, &i, p);
948                 if (r < 0)
949                         return r;
950         }
951
952         *idx = htole64(le64toh(*idx) + 1);
953         return 0;
954 }
955
956 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
957         uint64_t p;
958         int r;
959         assert(f);
960         assert(o);
961         assert(offset > 0);
962
963         p = le64toh(o->entry.items[i].object_offset);
964         if (p == 0)
965                 return -EINVAL;
966
967         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
968         if (r < 0)
969                 return r;
970
971         return link_entry_into_array_plus_one(f,
972                                               &o->data.entry_offset,
973                                               &o->data.entry_array_offset,
974                                               &o->data.n_entries,
975                                               offset);
976 }
977
978 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
979         uint64_t n, i;
980         int r;
981
982         assert(f);
983         assert(o);
984         assert(offset > 0);
985         assert(o->object.type == OBJECT_ENTRY);
986
987         __sync_synchronize();
988
989         /* Link up the entry itself */
990         r = link_entry_into_array(f,
991                                   &f->header->entry_array_offset,
992                                   &f->header->n_entries,
993                                   offset);
994         if (r < 0)
995                 return r;
996
997         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
998
999         if (f->header->head_entry_realtime == 0)
1000                 f->header->head_entry_realtime = o->entry.realtime;
1001
1002         f->header->tail_entry_realtime = o->entry.realtime;
1003         f->header->tail_entry_monotonic = o->entry.monotonic;
1004
1005         f->tail_entry_monotonic_valid = true;
1006
1007         /* Link up the items */
1008         n = journal_file_entry_n_items(o);
1009         for (i = 0; i < n; i++) {
1010                 r = journal_file_link_entry_item(f, o, offset, i);
1011                 if (r < 0)
1012                         return r;
1013         }
1014
1015         return 0;
1016 }
1017
1018 static int journal_file_append_entry_internal(
1019                 JournalFile *f,
1020                 const dual_timestamp *ts,
1021                 uint64_t xor_hash,
1022                 const EntryItem items[], unsigned n_items,
1023                 uint64_t *seqnum,
1024                 Object **ret, uint64_t *offset) {
1025         uint64_t np;
1026         uint64_t osize;
1027         Object *o;
1028         int r;
1029
1030         assert(f);
1031         assert(items || n_items == 0);
1032         assert(ts);
1033
1034         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1035
1036         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1037         if (r < 0)
1038                 return r;
1039
1040         o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
1041         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1042         o->entry.realtime = htole64(ts->realtime);
1043         o->entry.monotonic = htole64(ts->monotonic);
1044         o->entry.xor_hash = htole64(xor_hash);
1045         o->entry.boot_id = f->header->boot_id;
1046
1047         r = journal_file_link_entry(f, o, np);
1048         if (r < 0)
1049                 return r;
1050
1051         if (ret)
1052                 *ret = o;
1053
1054         if (offset)
1055                 *offset = np;
1056
1057         return 0;
1058 }
1059
1060 void journal_file_post_change(JournalFile *f) {
1061         assert(f);
1062
1063         /* inotify() does not receive IN_MODIFY events from file
1064          * accesses done via mmap(). After each access we hence
1065          * trigger IN_MODIFY by truncating the journal file to its
1066          * current size which triggers IN_MODIFY. */
1067
1068         __sync_synchronize();
1069
1070         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1071                 log_error("Failed to to truncate file to its own size: %m");
1072 }
1073
1074 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1075         unsigned i;
1076         EntryItem *items;
1077         int r;
1078         uint64_t xor_hash = 0;
1079         struct dual_timestamp _ts;
1080
1081         assert(f);
1082         assert(iovec || n_iovec == 0);
1083
1084         if (!f->writable)
1085                 return -EPERM;
1086
1087         if (!ts) {
1088                 dual_timestamp_get(&_ts);
1089                 ts = &_ts;
1090         }
1091
1092         if (f->tail_entry_monotonic_valid &&
1093             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1094                 return -EINVAL;
1095
1096         r = journal_file_maybe_append_tag(f, ts->realtime);
1097         if (r < 0)
1098                 return r;
1099
1100         /* alloca() can't take 0, hence let's allocate at least one */
1101         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1102
1103         for (i = 0; i < n_iovec; i++) {
1104                 uint64_t p;
1105                 Object *o;
1106
1107                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1108                 if (r < 0)
1109                         return r;
1110
1111                 xor_hash ^= le64toh(o->data.hash);
1112                 items[i].object_offset = htole64(p);
1113                 items[i].hash = o->data.hash;
1114         }
1115
1116         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1117
1118         journal_file_post_change(f);
1119
1120         return r;
1121 }
1122
1123 static int generic_array_get(JournalFile *f,
1124                              uint64_t first,
1125                              uint64_t i,
1126                              Object **ret, uint64_t *offset) {
1127
1128         Object *o;
1129         uint64_t p = 0, a;
1130         int r;
1131
1132         assert(f);
1133
1134         a = first;
1135         while (a > 0) {
1136                 uint64_t n;
1137
1138                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1139                 if (r < 0)
1140                         return r;
1141
1142                 n = journal_file_entry_array_n_items(o);
1143                 if (i < n) {
1144                         p = le64toh(o->entry_array.items[i]);
1145                         break;
1146                 }
1147
1148                 i -= n;
1149                 a = le64toh(o->entry_array.next_entry_array_offset);
1150         }
1151
1152         if (a <= 0 || p <= 0)
1153                 return 0;
1154
1155         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1156         if (r < 0)
1157                 return r;
1158
1159         if (ret)
1160                 *ret = o;
1161
1162         if (offset)
1163                 *offset = p;
1164
1165         return 1;
1166 }
1167
1168 static int generic_array_get_plus_one(JournalFile *f,
1169                                       uint64_t extra,
1170                                       uint64_t first,
1171                                       uint64_t i,
1172                                       Object **ret, uint64_t *offset) {
1173
1174         Object *o;
1175
1176         assert(f);
1177
1178         if (i == 0) {
1179                 int r;
1180
1181                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1182                 if (r < 0)
1183                         return r;
1184
1185                 if (ret)
1186                         *ret = o;
1187
1188                 if (offset)
1189                         *offset = extra;
1190
1191                 return 1;
1192         }
1193
1194         return generic_array_get(f, first, i-1, ret, offset);
1195 }
1196
1197 enum {
1198         TEST_FOUND,
1199         TEST_LEFT,
1200         TEST_RIGHT
1201 };
1202
1203 static int generic_array_bisect(JournalFile *f,
1204                                 uint64_t first,
1205                                 uint64_t n,
1206                                 uint64_t needle,
1207                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1208                                 direction_t direction,
1209                                 Object **ret,
1210                                 uint64_t *offset,
1211                                 uint64_t *idx) {
1212
1213         uint64_t a, p, t = 0, i = 0, last_p = 0;
1214         bool subtract_one = false;
1215         Object *o, *array = NULL;
1216         int r;
1217
1218         assert(f);
1219         assert(test_object);
1220
1221         a = first;
1222         while (a > 0) {
1223                 uint64_t left, right, k, lp;
1224
1225                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1226                 if (r < 0)
1227                         return r;
1228
1229                 k = journal_file_entry_array_n_items(array);
1230                 right = MIN(k, n);
1231                 if (right <= 0)
1232                         return 0;
1233
1234                 i = right - 1;
1235                 lp = p = le64toh(array->entry_array.items[i]);
1236                 if (p <= 0)
1237                         return -EBADMSG;
1238
1239                 r = test_object(f, p, needle);
1240                 if (r < 0)
1241                         return r;
1242
1243                 if (r == TEST_FOUND)
1244                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1245
1246                 if (r == TEST_RIGHT) {
1247                         left = 0;
1248                         right -= 1;
1249                         for (;;) {
1250                                 if (left == right) {
1251                                         if (direction == DIRECTION_UP)
1252                                                 subtract_one = true;
1253
1254                                         i = left;
1255                                         goto found;
1256                                 }
1257
1258                                 assert(left < right);
1259
1260                                 i = (left + right) / 2;
1261                                 p = le64toh(array->entry_array.items[i]);
1262                                 if (p <= 0)
1263                                         return -EBADMSG;
1264
1265                                 r = test_object(f, p, needle);
1266                                 if (r < 0)
1267                                         return r;
1268
1269                                 if (r == TEST_FOUND)
1270                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1271
1272                                 if (r == TEST_RIGHT)
1273                                         right = i;
1274                                 else
1275                                         left = i + 1;
1276                         }
1277                 }
1278
1279                 if (k > n) {
1280                         if (direction == DIRECTION_UP) {
1281                                 i = n;
1282                                 subtract_one = true;
1283                                 goto found;
1284                         }
1285
1286                         return 0;
1287                 }
1288
1289                 last_p = lp;
1290
1291                 n -= k;
1292                 t += k;
1293                 a = le64toh(array->entry_array.next_entry_array_offset);
1294         }
1295
1296         return 0;
1297
1298 found:
1299         if (subtract_one && t == 0 && i == 0)
1300                 return 0;
1301
1302         if (subtract_one && i == 0)
1303                 p = last_p;
1304         else if (subtract_one)
1305                 p = le64toh(array->entry_array.items[i-1]);
1306         else
1307                 p = le64toh(array->entry_array.items[i]);
1308
1309         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1310         if (r < 0)
1311                 return r;
1312
1313         if (ret)
1314                 *ret = o;
1315
1316         if (offset)
1317                 *offset = p;
1318
1319         if (idx)
1320                 *idx = t + i + (subtract_one ? -1 : 0);
1321
1322         return 1;
1323 }
1324
1325 static int generic_array_bisect_plus_one(JournalFile *f,
1326                                          uint64_t extra,
1327                                          uint64_t first,
1328                                          uint64_t n,
1329                                          uint64_t needle,
1330                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1331                                          direction_t direction,
1332                                          Object **ret,
1333                                          uint64_t *offset,
1334                                          uint64_t *idx) {
1335
1336         int r;
1337         bool step_back = false;
1338         Object *o;
1339
1340         assert(f);
1341         assert(test_object);
1342
1343         if (n <= 0)
1344                 return 0;
1345
1346         /* This bisects the array in object 'first', but first checks
1347          * an extra  */
1348         r = test_object(f, extra, needle);
1349         if (r < 0)
1350                 return r;
1351
1352         if (r == TEST_FOUND)
1353                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1354
1355         /* if we are looking with DIRECTION_UP then we need to first
1356            see if in the actual array there is a matching entry, and
1357            return the last one of that. But if there isn't any we need
1358            to return this one. Hence remember this, and return it
1359            below. */
1360         if (r == TEST_LEFT)
1361                 step_back = direction == DIRECTION_UP;
1362
1363         if (r == TEST_RIGHT) {
1364                 if (direction == DIRECTION_DOWN)
1365                         goto found;
1366                 else
1367                         return 0;
1368         }
1369
1370         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1371
1372         if (r == 0 && step_back)
1373                 goto found;
1374
1375         if (r > 0 && idx)
1376                 (*idx) ++;
1377
1378         return r;
1379
1380 found:
1381         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1382         if (r < 0)
1383                 return r;
1384
1385         if (ret)
1386                 *ret = o;
1387
1388         if (offset)
1389                 *offset = extra;
1390
1391         if (idx)
1392                 *idx = 0;
1393
1394         return 1;
1395 }
1396
1397 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1398         assert(f);
1399         assert(p > 0);
1400
1401         if (p == needle)
1402                 return TEST_FOUND;
1403         else if (p < needle)
1404                 return TEST_LEFT;
1405         else
1406                 return TEST_RIGHT;
1407 }
1408
1409 int journal_file_move_to_entry_by_offset(
1410                 JournalFile *f,
1411                 uint64_t p,
1412                 direction_t direction,
1413                 Object **ret,
1414                 uint64_t *offset) {
1415
1416         return generic_array_bisect(f,
1417                                     le64toh(f->header->entry_array_offset),
1418                                     le64toh(f->header->n_entries),
1419                                     p,
1420                                     test_object_offset,
1421                                     direction,
1422                                     ret, offset, NULL);
1423 }
1424
1425
1426 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1427         Object *o;
1428         int r;
1429
1430         assert(f);
1431         assert(p > 0);
1432
1433         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1434         if (r < 0)
1435                 return r;
1436
1437         if (le64toh(o->entry.seqnum) == needle)
1438                 return TEST_FOUND;
1439         else if (le64toh(o->entry.seqnum) < needle)
1440                 return TEST_LEFT;
1441         else
1442                 return TEST_RIGHT;
1443 }
1444
1445 int journal_file_move_to_entry_by_seqnum(
1446                 JournalFile *f,
1447                 uint64_t seqnum,
1448                 direction_t direction,
1449                 Object **ret,
1450                 uint64_t *offset) {
1451
1452         return generic_array_bisect(f,
1453                                     le64toh(f->header->entry_array_offset),
1454                                     le64toh(f->header->n_entries),
1455                                     seqnum,
1456                                     test_object_seqnum,
1457                                     direction,
1458                                     ret, offset, NULL);
1459 }
1460
1461 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1462         Object *o;
1463         int r;
1464
1465         assert(f);
1466         assert(p > 0);
1467
1468         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1469         if (r < 0)
1470                 return r;
1471
1472         if (le64toh(o->entry.realtime) == needle)
1473                 return TEST_FOUND;
1474         else if (le64toh(o->entry.realtime) < needle)
1475                 return TEST_LEFT;
1476         else
1477                 return TEST_RIGHT;
1478 }
1479
1480 int journal_file_move_to_entry_by_realtime(
1481                 JournalFile *f,
1482                 uint64_t realtime,
1483                 direction_t direction,
1484                 Object **ret,
1485                 uint64_t *offset) {
1486
1487         return generic_array_bisect(f,
1488                                     le64toh(f->header->entry_array_offset),
1489                                     le64toh(f->header->n_entries),
1490                                     realtime,
1491                                     test_object_realtime,
1492                                     direction,
1493                                     ret, offset, NULL);
1494 }
1495
1496 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1497         Object *o;
1498         int r;
1499
1500         assert(f);
1501         assert(p > 0);
1502
1503         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1504         if (r < 0)
1505                 return r;
1506
1507         if (le64toh(o->entry.monotonic) == needle)
1508                 return TEST_FOUND;
1509         else if (le64toh(o->entry.monotonic) < needle)
1510                 return TEST_LEFT;
1511         else
1512                 return TEST_RIGHT;
1513 }
1514
1515 int journal_file_move_to_entry_by_monotonic(
1516                 JournalFile *f,
1517                 sd_id128_t boot_id,
1518                 uint64_t monotonic,
1519                 direction_t direction,
1520                 Object **ret,
1521                 uint64_t *offset) {
1522
1523         char t[9+32+1] = "_BOOT_ID=";
1524         Object *o;
1525         int r;
1526
1527         assert(f);
1528
1529         sd_id128_to_string(boot_id, t + 9);
1530         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1531         if (r < 0)
1532                 return r;
1533         if (r == 0)
1534                 return -ENOENT;
1535
1536         return generic_array_bisect_plus_one(f,
1537                                              le64toh(o->data.entry_offset),
1538                                              le64toh(o->data.entry_array_offset),
1539                                              le64toh(o->data.n_entries),
1540                                              monotonic,
1541                                              test_object_monotonic,
1542                                              direction,
1543                                              ret, offset, NULL);
1544 }
1545
1546 int journal_file_next_entry(
1547                 JournalFile *f,
1548                 Object *o, uint64_t p,
1549                 direction_t direction,
1550                 Object **ret, uint64_t *offset) {
1551
1552         uint64_t i, n;
1553         int r;
1554
1555         assert(f);
1556         assert(p > 0 || !o);
1557
1558         n = le64toh(f->header->n_entries);
1559         if (n <= 0)
1560                 return 0;
1561
1562         if (!o)
1563                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1564         else {
1565                 if (o->object.type != OBJECT_ENTRY)
1566                         return -EINVAL;
1567
1568                 r = generic_array_bisect(f,
1569                                          le64toh(f->header->entry_array_offset),
1570                                          le64toh(f->header->n_entries),
1571                                          p,
1572                                          test_object_offset,
1573                                          DIRECTION_DOWN,
1574                                          NULL, NULL,
1575                                          &i);
1576                 if (r <= 0)
1577                         return r;
1578
1579                 if (direction == DIRECTION_DOWN) {
1580                         if (i >= n - 1)
1581                                 return 0;
1582
1583                         i++;
1584                 } else {
1585                         if (i <= 0)
1586                                 return 0;
1587
1588                         i--;
1589                 }
1590         }
1591
1592         /* And jump to it */
1593         return generic_array_get(f,
1594                                  le64toh(f->header->entry_array_offset),
1595                                  i,
1596                                  ret, offset);
1597 }
1598
1599 int journal_file_skip_entry(
1600                 JournalFile *f,
1601                 Object *o, uint64_t p,
1602                 int64_t skip,
1603                 Object **ret, uint64_t *offset) {
1604
1605         uint64_t i, n;
1606         int r;
1607
1608         assert(f);
1609         assert(o);
1610         assert(p > 0);
1611
1612         if (o->object.type != OBJECT_ENTRY)
1613                 return -EINVAL;
1614
1615         r = generic_array_bisect(f,
1616                                  le64toh(f->header->entry_array_offset),
1617                                  le64toh(f->header->n_entries),
1618                                  p,
1619                                  test_object_offset,
1620                                  DIRECTION_DOWN,
1621                                  NULL, NULL,
1622                                  &i);
1623         if (r <= 0)
1624                 return r;
1625
1626         /* Calculate new index */
1627         if (skip < 0) {
1628                 if ((uint64_t) -skip >= i)
1629                         i = 0;
1630                 else
1631                         i = i - (uint64_t) -skip;
1632         } else
1633                 i  += (uint64_t) skip;
1634
1635         n = le64toh(f->header->n_entries);
1636         if (n <= 0)
1637                 return -EBADMSG;
1638
1639         if (i >= n)
1640                 i = n-1;
1641
1642         return generic_array_get(f,
1643                                  le64toh(f->header->entry_array_offset),
1644                                  i,
1645                                  ret, offset);
1646 }
1647
1648 int journal_file_next_entry_for_data(
1649                 JournalFile *f,
1650                 Object *o, uint64_t p,
1651                 uint64_t data_offset,
1652                 direction_t direction,
1653                 Object **ret, uint64_t *offset) {
1654
1655         uint64_t n, i;
1656         int r;
1657         Object *d;
1658
1659         assert(f);
1660         assert(p > 0 || !o);
1661
1662         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1663         if (r < 0)
1664                 return r;
1665
1666         n = le64toh(d->data.n_entries);
1667         if (n <= 0)
1668                 return n;
1669
1670         if (!o)
1671                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1672         else {
1673                 if (o->object.type != OBJECT_ENTRY)
1674                         return -EINVAL;
1675
1676                 r = generic_array_bisect_plus_one(f,
1677                                                   le64toh(d->data.entry_offset),
1678                                                   le64toh(d->data.entry_array_offset),
1679                                                   le64toh(d->data.n_entries),
1680                                                   p,
1681                                                   test_object_offset,
1682                                                   DIRECTION_DOWN,
1683                                                   NULL, NULL,
1684                                                   &i);
1685
1686                 if (r <= 0)
1687                         return r;
1688
1689                 if (direction == DIRECTION_DOWN) {
1690                         if (i >= n - 1)
1691                                 return 0;
1692
1693                         i++;
1694                 } else {
1695                         if (i <= 0)
1696                                 return 0;
1697
1698                         i--;
1699                 }
1700
1701         }
1702
1703         return generic_array_get_plus_one(f,
1704                                           le64toh(d->data.entry_offset),
1705                                           le64toh(d->data.entry_array_offset),
1706                                           i,
1707                                           ret, offset);
1708 }
1709
1710 int journal_file_move_to_entry_by_offset_for_data(
1711                 JournalFile *f,
1712                 uint64_t data_offset,
1713                 uint64_t p,
1714                 direction_t direction,
1715                 Object **ret, uint64_t *offset) {
1716
1717         int r;
1718         Object *d;
1719
1720         assert(f);
1721
1722         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1723         if (r < 0)
1724                 return r;
1725
1726         return generic_array_bisect_plus_one(f,
1727                                              le64toh(d->data.entry_offset),
1728                                              le64toh(d->data.entry_array_offset),
1729                                              le64toh(d->data.n_entries),
1730                                              p,
1731                                              test_object_offset,
1732                                              direction,
1733                                              ret, offset, NULL);
1734 }
1735
1736 int journal_file_move_to_entry_by_monotonic_for_data(
1737                 JournalFile *f,
1738                 uint64_t data_offset,
1739                 sd_id128_t boot_id,
1740                 uint64_t monotonic,
1741                 direction_t direction,
1742                 Object **ret, uint64_t *offset) {
1743
1744         char t[9+32+1] = "_BOOT_ID=";
1745         Object *o, *d;
1746         int r;
1747         uint64_t b, z;
1748
1749         assert(f);
1750
1751         /* First, seek by time */
1752         sd_id128_to_string(boot_id, t + 9);
1753         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1754         if (r < 0)
1755                 return r;
1756         if (r == 0)
1757                 return -ENOENT;
1758
1759         r = generic_array_bisect_plus_one(f,
1760                                           le64toh(o->data.entry_offset),
1761                                           le64toh(o->data.entry_array_offset),
1762                                           le64toh(o->data.n_entries),
1763                                           monotonic,
1764                                           test_object_monotonic,
1765                                           direction,
1766                                           NULL, &z, NULL);
1767         if (r <= 0)
1768                 return r;
1769
1770         /* And now, continue seeking until we find an entry that
1771          * exists in both bisection arrays */
1772
1773         for (;;) {
1774                 Object *qo;
1775                 uint64_t p, q;
1776
1777                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1778                 if (r < 0)
1779                         return r;
1780
1781                 r = generic_array_bisect_plus_one(f,
1782                                                   le64toh(d->data.entry_offset),
1783                                                   le64toh(d->data.entry_array_offset),
1784                                                   le64toh(d->data.n_entries),
1785                                                   z,
1786                                                   test_object_offset,
1787                                                   direction,
1788                                                   NULL, &p, NULL);
1789                 if (r <= 0)
1790                         return r;
1791
1792                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1793                 if (r < 0)
1794                         return r;
1795
1796                 r = generic_array_bisect_plus_one(f,
1797                                                   le64toh(o->data.entry_offset),
1798                                                   le64toh(o->data.entry_array_offset),
1799                                                   le64toh(o->data.n_entries),
1800                                                   p,
1801                                                   test_object_offset,
1802                                                   direction,
1803                                                   &qo, &q, NULL);
1804
1805                 if (r <= 0)
1806                         return r;
1807
1808                 if (p == q) {
1809                         if (ret)
1810                                 *ret = qo;
1811                         if (offset)
1812                                 *offset = q;
1813
1814                         return 1;
1815                 }
1816
1817                 z = q;
1818         }
1819
1820         return 0;
1821 }
1822
1823 int journal_file_move_to_entry_by_seqnum_for_data(
1824                 JournalFile *f,
1825                 uint64_t data_offset,
1826                 uint64_t seqnum,
1827                 direction_t direction,
1828                 Object **ret, uint64_t *offset) {
1829
1830         Object *d;
1831         int r;
1832
1833         assert(f);
1834
1835         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1836         if (r < 0)
1837                 return r;
1838
1839         return generic_array_bisect_plus_one(f,
1840                                              le64toh(d->data.entry_offset),
1841                                              le64toh(d->data.entry_array_offset),
1842                                              le64toh(d->data.n_entries),
1843                                              seqnum,
1844                                              test_object_seqnum,
1845                                              direction,
1846                                              ret, offset, NULL);
1847 }
1848
1849 int journal_file_move_to_entry_by_realtime_for_data(
1850                 JournalFile *f,
1851                 uint64_t data_offset,
1852                 uint64_t realtime,
1853                 direction_t direction,
1854                 Object **ret, uint64_t *offset) {
1855
1856         Object *d;
1857         int r;
1858
1859         assert(f);
1860
1861         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1862         if (r < 0)
1863                 return r;
1864
1865         return generic_array_bisect_plus_one(f,
1866                                              le64toh(d->data.entry_offset),
1867                                              le64toh(d->data.entry_array_offset),
1868                                              le64toh(d->data.n_entries),
1869                                              realtime,
1870                                              test_object_realtime,
1871                                              direction,
1872                                              ret, offset, NULL);
1873 }
1874
1875 static void *fsprg_state(JournalFile *f) {
1876         uint64_t a, b;
1877         assert(f);
1878
1879         if (!f->authenticate)
1880                 return NULL;
1881
1882         a = le64toh(f->fsprg_header->header_size);
1883         b = le64toh(f->fsprg_header->state_size);
1884
1885         if (a + b > f->fsprg_size)
1886                 return NULL;
1887
1888         return (uint8_t*) f->fsprg_header + a;
1889 }
1890
1891 static int journal_file_append_tag(JournalFile *f) {
1892         Object *o;
1893         uint64_t p;
1894         int r;
1895
1896         assert(f);
1897
1898         if (!f->authenticate)
1899                 return 0;
1900
1901         if (!f->hmac_running)
1902                 return 0;
1903
1904         log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1905
1906         assert(f->hmac);
1907
1908         r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1909         if (r < 0)
1910                 return r;
1911
1912         /* Get the HMAC tag and store it in the object */
1913         memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1914         f->hmac_running = false;
1915
1916         return 0;
1917 }
1918
1919 static int journal_file_hmac_start(JournalFile *f) {
1920         uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1921
1922         assert(f);
1923
1924         if (!f->authenticate)
1925                 return 0;
1926
1927         if (f->hmac_running)
1928                 return 0;
1929
1930         /* Prepare HMAC for next cycle */
1931         gcry_md_reset(f->hmac);
1932         FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1933         gcry_md_setkey(f->hmac, key, sizeof(key));
1934
1935         f->hmac_running = true;
1936
1937         return 0;
1938 }
1939
1940 static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1941         uint64_t t;
1942
1943         assert(f);
1944         assert(epoch);
1945         assert(f->authenticate);
1946
1947         if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1948             le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1949                 return -ENOTSUP;
1950
1951         if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1952                 return -ESTALE;
1953
1954         t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1955         t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1956
1957         *epoch = t;
1958         return 0;
1959 }
1960
1961 static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1962         uint64_t goal, epoch;
1963         int r;
1964         assert(f);
1965
1966         if (!f->authenticate)
1967                 return 0;
1968
1969         r = journal_file_get_epoch(f, realtime, &goal);
1970         if (r < 0)
1971                 return r;
1972
1973         epoch = FSPRG_GetEpoch(fsprg_state(f));
1974         if (epoch > goal)
1975                 return -ESTALE;
1976
1977         return epoch != goal;
1978 }
1979
1980 static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1981         uint64_t goal, epoch;
1982         int r;
1983
1984         assert(f);
1985
1986         if (!f->authenticate)
1987                 return 0;
1988
1989         r = journal_file_get_epoch(f, realtime, &goal);
1990         if (r < 0)
1991                 return r;
1992
1993         epoch = FSPRG_GetEpoch(fsprg_state(f));
1994         if (epoch < goal)
1995                 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
1996
1997         for (;;) {
1998                 if (epoch > goal)
1999                         return -ESTALE;
2000                 if (epoch == goal)
2001                         return 0;
2002
2003                 FSPRG_Evolve(fsprg_state(f));
2004                 epoch = FSPRG_GetEpoch(fsprg_state(f));
2005         }
2006 }
2007
2008 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
2009         int r;
2010
2011         assert(f);
2012
2013         if (!f->authenticate)
2014                 return 0;
2015
2016         r = journal_file_need_evolve(f, realtime);
2017         if (r <= 0)
2018                 return 0;
2019
2020         r = journal_file_append_tag(f);
2021         if (r < 0)
2022                 return r;
2023
2024         r = journal_file_evolve(f, realtime);
2025         if (r < 0)
2026                 return r;
2027
2028         r = journal_file_hmac_start(f);
2029         if (r < 0)
2030                 return r;
2031
2032         return 0;
2033 }
2034
2035 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2036         int r;
2037         Object *o;
2038
2039         assert(f);
2040
2041         if (!f->authenticate)
2042                 return 0;
2043
2044         r = journal_file_hmac_start(f);
2045         if (r < 0)
2046                 return r;
2047
2048         r = journal_file_move_to_object(f, type, p, &o);
2049         if (r < 0)
2050                 return r;
2051
2052         gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2053
2054         switch (o->object.type) {
2055
2056         case OBJECT_DATA:
2057                 /* All but: entry_array_offset, n_entries are mutable */
2058                 gcry_md_write(f->hmac, &o->data.hash, offsetof(DataObject, entry_array_offset) - offsetof(DataObject, hash));
2059                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2060                 break;
2061
2062         case OBJECT_ENTRY:
2063                 /* All */
2064                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2065                 break;
2066
2067         case OBJECT_FIELD_HASH_TABLE:
2068         case OBJECT_DATA_HASH_TABLE:
2069         case OBJECT_ENTRY_ARRAY:
2070                 /* Nothing: everything is mutable */
2071                 break;
2072
2073         case OBJECT_TAG:
2074                 /* All */
2075                 gcry_md_write(f->hmac, o->tag.tag, le64toh(o->object.size) - offsetof(TagObject, tag));
2076                 break;
2077
2078         default:
2079                 return -EINVAL;
2080         }
2081
2082         return 0;
2083 }
2084
2085 static int journal_file_hmac_put_header(JournalFile *f) {
2086         int r;
2087
2088         assert(f);
2089
2090         if (!f->authenticate)
2091                 return 0;
2092
2093         r = journal_file_hmac_start(f);
2094         if (r < 0)
2095                 return r;
2096
2097         /* All but state+reserved, boot_id, arena_size,
2098          * tail_object_offset, n_objects, n_entries, tail_seqnum,
2099          * head_entry_realtime, tail_entry_realtime,
2100          * tail_entry_monotonic, n_data, n_fields, header_tag */
2101
2102         gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2103         gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2104         gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2105         gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2106         gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2107
2108         return 0;
2109 }
2110
2111 static int journal_file_load_fsprg(JournalFile *f) {
2112         int r, fd = -1;
2113         char *p = NULL;
2114         struct stat st;
2115         FSPRGHeader *m = NULL;
2116         sd_id128_t machine;
2117
2118         assert(f);
2119
2120         if (!f->authenticate)
2121                 return 0;
2122
2123         r = sd_id128_get_machine(&machine);
2124         if (r < 0)
2125                 return r;
2126
2127         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2128                      SD_ID128_FORMAT_VAL(machine)) < 0)
2129                 return -ENOMEM;
2130
2131         fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2132         if (fd < 0) {
2133                 log_error("Failed to open %s: %m", p);
2134                 r = -errno;
2135                 goto finish;
2136         }
2137
2138         if (fstat(fd, &st) < 0) {
2139                 r = -errno;
2140                 goto finish;
2141         }
2142
2143         if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2144                 r = -ENODATA;
2145                 goto finish;
2146         }
2147
2148         m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2149         if (m == MAP_FAILED) {
2150                 m = NULL;
2151                 r = -errno;
2152                 goto finish;
2153         }
2154
2155         if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2156                 r = -EBADMSG;
2157                 goto finish;
2158         }
2159
2160         if (m->incompatible_flags != 0) {
2161                 r = -EPROTONOSUPPORT;
2162                 goto finish;
2163         }
2164
2165         if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2166                 r = -EBADMSG;
2167                 goto finish;
2168         }
2169
2170         if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2171                 r = -EBADMSG;
2172                 goto finish;
2173         }
2174
2175         f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2176         if ((uint64_t) st.st_size < f->fsprg_size) {
2177                 r = -ENODATA;
2178                 goto finish;
2179         }
2180
2181         if (!sd_id128_equal(machine, m->machine_id)) {
2182                 r = -EHOSTDOWN;
2183                 goto finish;
2184         }
2185
2186         if (le64toh(m->fsprg_start_usec) <= 0 ||
2187             le64toh(m->fsprg_interval_usec) <= 0) {
2188                 r = -EBADMSG;
2189                 goto finish;
2190         }
2191
2192         f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2193         if (f->fsprg_header == MAP_FAILED) {
2194                 f->fsprg_header = NULL;
2195                 r = -errno;
2196                 goto finish;
2197         }
2198
2199         r = 0;
2200
2201 finish:
2202         if (m)
2203                 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2204
2205         if (fd >= 0)
2206                 close_nointr_nofail(fd);
2207
2208         free(p);
2209         return r;
2210 }
2211
2212 static int journal_file_setup_hmac(JournalFile *f) {
2213         gcry_error_t e;
2214
2215         if (!f->authenticate)
2216                 return 0;
2217
2218         e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2219         if (e != 0)
2220                 return -ENOTSUP;
2221
2222         return 0;
2223 }
2224
2225 static int journal_file_append_first_tag(JournalFile *f) {
2226         int r;
2227         uint64_t p;
2228
2229         if (!f->authenticate)
2230                 return 0;
2231
2232         log_debug("Calculating first tag...");
2233
2234         r = journal_file_hmac_put_header(f);
2235         if (r < 0)
2236                 return r;
2237
2238         p = le64toh(f->header->field_hash_table_offset);
2239         if (p < offsetof(Object, hash_table.items))
2240                 return -EINVAL;
2241         p -= offsetof(Object, hash_table.items);
2242
2243         r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2244         if (r < 0)
2245                 return r;
2246
2247         p = le64toh(f->header->data_hash_table_offset);
2248         if (p < offsetof(Object, hash_table.items))
2249                 return -EINVAL;
2250         p -= offsetof(Object, hash_table.items);
2251
2252         r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2253         if (r < 0)
2254                 return r;
2255
2256         r = journal_file_append_tag(f);
2257         if (r < 0)
2258                 return r;
2259
2260         return 0;
2261 }
2262
2263 void journal_file_dump(JournalFile *f) {
2264         Object *o;
2265         int r;
2266         uint64_t p;
2267
2268         assert(f);
2269
2270         journal_file_print_header(f);
2271
2272         p = le64toh(f->header->header_size);
2273         while (p != 0) {
2274                 r = journal_file_move_to_object(f, -1, p, &o);
2275                 if (r < 0)
2276                         goto fail;
2277
2278                 switch (o->object.type) {
2279
2280                 case OBJECT_UNUSED:
2281                         printf("Type: OBJECT_UNUSED\n");
2282                         break;
2283
2284                 case OBJECT_DATA:
2285                         printf("Type: OBJECT_DATA\n");
2286                         break;
2287
2288                 case OBJECT_ENTRY:
2289                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2290                                (unsigned long long) le64toh(o->entry.seqnum),
2291                                (unsigned long long) le64toh(o->entry.monotonic),
2292                                (unsigned long long) le64toh(o->entry.realtime));
2293                         break;
2294
2295                 case OBJECT_FIELD_HASH_TABLE:
2296                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2297                         break;
2298
2299                 case OBJECT_DATA_HASH_TABLE:
2300                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2301                         break;
2302
2303                 case OBJECT_ENTRY_ARRAY:
2304                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2305                         break;
2306
2307                 case OBJECT_TAG:
2308                         printf("Type: OBJECT_TAG\n");
2309                         break;
2310                 }
2311
2312                 if (o->object.flags & OBJECT_COMPRESSED)
2313                         printf("Flags: COMPRESSED\n");
2314
2315                 if (p == le64toh(f->header->tail_object_offset))
2316                         p = 0;
2317                 else
2318                         p = p + ALIGN64(le64toh(o->object.size));
2319         }
2320
2321         return;
2322 fail:
2323         log_error("File corrupt");
2324 }
2325
2326 void journal_file_print_header(JournalFile *f) {
2327         char a[33], b[33], c[33];
2328         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2329
2330         assert(f);
2331
2332         printf("File Path: %s\n"
2333                "File ID: %s\n"
2334                "Machine ID: %s\n"
2335                "Boot ID: %s\n"
2336                "Sequential Number ID: %s\n"
2337                "State: %s\n"
2338                "Compatible Flags:%s%s\n"
2339                "Incompatible Flags:%s%s\n"
2340                "Header size: %llu\n"
2341                "Arena size: %llu\n"
2342                "Data Hash Table Size: %llu\n"
2343                "Field Hash Table Size: %llu\n"
2344                "Objects: %llu\n"
2345                "Entry Objects: %llu\n"
2346                "Rotate Suggested: %s\n"
2347                "Head Sequential Number: %llu\n"
2348                "Tail Sequential Number: %llu\n"
2349                "Head Realtime Timestamp: %s\n"
2350                "Tail Realtime Timestamp: %s\n",
2351                f->path,
2352                sd_id128_to_string(f->header->file_id, a),
2353                sd_id128_to_string(f->header->machine_id, b),
2354                sd_id128_to_string(f->header->boot_id, c),
2355                sd_id128_to_string(f->header->seqnum_id, c),
2356                f->header->state == STATE_OFFLINE ? "offline" :
2357                f->header->state == STATE_ONLINE ? "online" :
2358                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
2359                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2360                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
2361                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2362                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
2363                (unsigned long long) le64toh(f->header->header_size),
2364                (unsigned long long) le64toh(f->header->arena_size),
2365                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2366                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2367                (unsigned long long) le64toh(f->header->n_objects),
2368                (unsigned long long) le64toh(f->header->n_entries),
2369                yes_no(journal_file_rotate_suggested(f)),
2370                (unsigned long long) le64toh(f->header->head_seqnum),
2371                (unsigned long long) le64toh(f->header->tail_seqnum),
2372                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2373                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2374
2375         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2376                 printf("Data Objects: %llu\n"
2377                        "Data Hash Table Fill: %.1f%%\n",
2378                        (unsigned long long) le64toh(f->header->n_data),
2379                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2380
2381         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2382                 printf("Field Objects: %llu\n"
2383                        "Field Hash Table Fill: %.1f%%\n",
2384                        (unsigned long long) le64toh(f->header->n_fields),
2385                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2386 }
2387
2388 int journal_file_open(
2389                 const char *fname,
2390                 int flags,
2391                 mode_t mode,
2392                 bool compress,
2393                 bool authenticate,
2394                 JournalMetrics *metrics,
2395                 JournalFile *template,
2396                 JournalFile **ret) {
2397
2398         JournalFile *f;
2399         int r;
2400         bool newly_created = false;
2401
2402         assert(fname);
2403
2404         if ((flags & O_ACCMODE) != O_RDONLY &&
2405             (flags & O_ACCMODE) != O_RDWR)
2406                 return -EINVAL;
2407
2408         if (!endswith(fname, ".journal"))
2409                 return -EINVAL;
2410
2411         f = new0(JournalFile, 1);
2412         if (!f)
2413                 return -ENOMEM;
2414
2415         f->fd = -1;
2416         f->mode = mode;
2417
2418         f->flags = flags;
2419         f->prot = prot_from_flags(flags);
2420         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2421         f->compress = compress;
2422         f->authenticate = authenticate;
2423
2424         f->path = strdup(fname);
2425         if (!f->path) {
2426                 r = -ENOMEM;
2427                 goto fail;
2428         }
2429
2430         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2431         if (f->fd < 0) {
2432                 r = -errno;
2433                 goto fail;
2434         }
2435
2436         if (fstat(f->fd, &f->last_stat) < 0) {
2437                 r = -errno;
2438                 goto fail;
2439         }
2440
2441         if (f->last_stat.st_size == 0 && f->writable) {
2442                 newly_created = true;
2443
2444                 /* Try to load the FSPRG state, and if we can't, then
2445                  * just don't do authentication */
2446                 r = journal_file_load_fsprg(f);
2447                 if (r < 0)
2448                         f->authenticate = false;
2449
2450                 r = journal_file_init_header(f, template);
2451                 if (r < 0)
2452                         goto fail;
2453
2454                 if (fstat(f->fd, &f->last_stat) < 0) {
2455                         r = -errno;
2456                         goto fail;
2457                 }
2458         }
2459
2460         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2461                 r = -EIO;
2462                 goto fail;
2463         }
2464
2465         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2466         if (f->header == MAP_FAILED) {
2467                 f->header = NULL;
2468                 r = -errno;
2469                 goto fail;
2470         }
2471
2472         if (!newly_created) {
2473                 r = journal_file_verify_header(f);
2474                 if (r < 0)
2475                         goto fail;
2476
2477                 r = journal_file_load_fsprg(f);
2478                 if (r < 0)
2479                         goto fail;
2480         }
2481
2482         if (f->writable) {
2483                 if (metrics) {
2484                         journal_default_metrics(metrics, f->fd);
2485                         f->metrics = *metrics;
2486                 } else if (template)
2487                         f->metrics = template->metrics;
2488
2489                 r = journal_file_refresh_header(f);
2490                 if (r < 0)
2491                         goto fail;
2492
2493                 r = journal_file_setup_hmac(f);
2494                 if (r < 0)
2495                         goto fail;
2496         }
2497
2498         if (newly_created) {
2499                 r = journal_file_setup_field_hash_table(f);
2500                 if (r < 0)
2501                         goto fail;
2502
2503                 r = journal_file_setup_data_hash_table(f);
2504                 if (r < 0)
2505                         goto fail;
2506
2507                 r = journal_file_append_first_tag(f);
2508                 if (r < 0)
2509                         goto fail;
2510         }
2511
2512         r = journal_file_map_field_hash_table(f);
2513         if (r < 0)
2514                 goto fail;
2515
2516         r = journal_file_map_data_hash_table(f);
2517         if (r < 0)
2518                 goto fail;
2519
2520         if (ret)
2521                 *ret = f;
2522
2523         return 0;
2524
2525 fail:
2526         journal_file_close(f);
2527
2528         return r;
2529 }
2530
2531 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2532         char *p;
2533         size_t l;
2534         JournalFile *old_file, *new_file = NULL;
2535         int r;
2536
2537         assert(f);
2538         assert(*f);
2539
2540         old_file = *f;
2541
2542         if (!old_file->writable)
2543                 return -EINVAL;
2544
2545         if (!endswith(old_file->path, ".journal"))
2546                 return -EINVAL;
2547
2548         l = strlen(old_file->path);
2549
2550         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2551         if (!p)
2552                 return -ENOMEM;
2553
2554         memcpy(p, old_file->path, l - 8);
2555         p[l-8] = '@';
2556         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2557         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2558                  "-%016llx-%016llx.journal",
2559                  (unsigned long long) le64toh((*f)->header->tail_seqnum),
2560                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2561
2562         r = rename(old_file->path, p);
2563         free(p);
2564
2565         if (r < 0)
2566                 return -errno;
2567
2568         old_file->header->state = STATE_ARCHIVED;
2569
2570         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
2571         journal_file_close(old_file);
2572
2573         *f = new_file;
2574         return r;
2575 }
2576
2577 int journal_file_open_reliably(
2578                 const char *fname,
2579                 int flags,
2580                 mode_t mode,
2581                 bool compress,
2582                 bool authenticate,
2583                 JournalMetrics *metrics,
2584                 JournalFile *template,
2585                 JournalFile **ret) {
2586
2587         int r;
2588         size_t l;
2589         char *p;
2590
2591         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2592         if (r != -EBADMSG && /* corrupted */
2593             r != -ENODATA && /* truncated */
2594             r != -EHOSTDOWN && /* other machine */
2595             r != -EPROTONOSUPPORT && /* incompatible feature */
2596             r != -EBUSY && /* unclean shutdown */
2597             r != -ESHUTDOWN /* already archived */)
2598                 return r;
2599
2600         if ((flags & O_ACCMODE) == O_RDONLY)
2601                 return r;
2602
2603         if (!(flags & O_CREAT))
2604                 return r;
2605
2606         if (!endswith(fname, ".journal"))
2607                 return r;
2608
2609         /* The file is corrupted. Rotate it away and try it again (but only once) */
2610
2611         l = strlen(fname);
2612         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2613                      (int) (l-8), fname,
2614                      (unsigned long long) now(CLOCK_REALTIME),
2615                      random_ull()) < 0)
2616                 return -ENOMEM;
2617
2618         r = rename(fname, p);
2619         free(p);
2620         if (r < 0)
2621                 return -errno;
2622
2623         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2624
2625         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2626 }
2627
2628 struct vacuum_info {
2629         off_t usage;
2630         char *filename;
2631
2632         uint64_t realtime;
2633         sd_id128_t seqnum_id;
2634         uint64_t seqnum;
2635
2636         bool have_seqnum;
2637 };
2638
2639 static int vacuum_compare(const void *_a, const void *_b) {
2640         const struct vacuum_info *a, *b;
2641
2642         a = _a;
2643         b = _b;
2644
2645         if (a->have_seqnum && b->have_seqnum &&
2646             sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2647                 if (a->seqnum < b->seqnum)
2648                         return -1;
2649                 else if (a->seqnum > b->seqnum)
2650                         return 1;
2651                 else
2652                         return 0;
2653         }
2654
2655         if (a->realtime < b->realtime)
2656                 return -1;
2657         else if (a->realtime > b->realtime)
2658                 return 1;
2659         else if (a->have_seqnum && b->have_seqnum)
2660                 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2661         else
2662                 return strcmp(a->filename, b->filename);
2663 }
2664
2665 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2666         DIR *d;
2667         int r = 0;
2668         struct vacuum_info *list = NULL;
2669         unsigned n_list = 0, n_allocated = 0, i;
2670         uint64_t sum = 0;
2671
2672         assert(directory);
2673
2674         if (max_use <= 0)
2675                 return 0;
2676
2677         d = opendir(directory);
2678         if (!d)
2679                 return -errno;
2680
2681         for (;;) {
2682                 int k;
2683                 struct dirent buf, *de;
2684                 size_t q;
2685                 struct stat st;
2686                 char *p;
2687                 unsigned long long seqnum = 0, realtime;
2688                 sd_id128_t seqnum_id;
2689                 bool have_seqnum;
2690
2691                 k = readdir_r(d, &buf, &de);
2692                 if (k != 0) {
2693                         r = -k;
2694                         goto finish;
2695                 }
2696
2697                 if (!de)
2698                         break;
2699
2700                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2701                         continue;
2702
2703                 if (!S_ISREG(st.st_mode))
2704                         continue;
2705
2706                 q = strlen(de->d_name);
2707
2708                 if (endswith(de->d_name, ".journal")) {
2709
2710                         /* Vacuum archived files */
2711
2712                         if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2713                                 continue;
2714
2715                         if (de->d_name[q-8-16-1] != '-' ||
2716                             de->d_name[q-8-16-1-16-1] != '-' ||
2717                             de->d_name[q-8-16-1-16-1-32-1] != '@')
2718                                 continue;
2719
2720                         p = strdup(de->d_name);
2721                         if (!p) {
2722                                 r = -ENOMEM;
2723                                 goto finish;
2724                         }
2725
2726                         de->d_name[q-8-16-1-16-1] = 0;
2727                         if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2728                                 free(p);
2729                                 continue;
2730                         }
2731
2732                         if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2733                                 free(p);
2734                                 continue;
2735                         }
2736
2737                         have_seqnum = true;
2738
2739                 } else if (endswith(de->d_name, ".journal~")) {
2740                         unsigned long long tmp;
2741
2742                         /* Vacuum corrupted files */
2743
2744                         if (q < 1 + 16 + 1 + 16 + 8 + 1)
2745                                 continue;
2746
2747                         if (de->d_name[q-1-8-16-1] != '-' ||
2748                             de->d_name[q-1-8-16-1-16-1] != '@')
2749                                 continue;
2750
2751                         p = strdup(de->d_name);
2752                         if (!p) {
2753                                 r = -ENOMEM;
2754                                 goto finish;
2755                         }
2756
2757                         if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2758                                 free(p);
2759                                 continue;
2760                         }
2761
2762                         have_seqnum = false;
2763                 } else
2764                         continue;
2765
2766                 if (n_list >= n_allocated) {
2767                         struct vacuum_info *j;
2768
2769                         n_allocated = MAX(n_allocated * 2U, 8U);
2770                         j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2771                         if (!j) {
2772                                 free(p);
2773                                 r = -ENOMEM;
2774                                 goto finish;
2775                         }
2776
2777                         list = j;
2778                 }
2779
2780                 list[n_list].filename = p;
2781                 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2782                 list[n_list].seqnum = seqnum;
2783                 list[n_list].realtime = realtime;
2784                 list[n_list].seqnum_id = seqnum_id;
2785                 list[n_list].have_seqnum = have_seqnum;
2786
2787                 sum += list[n_list].usage;
2788
2789                 n_list ++;
2790         }
2791
2792         if (n_list > 0)
2793                 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2794
2795         for(i = 0; i < n_list; i++) {
2796                 struct statvfs ss;
2797
2798                 if (fstatvfs(dirfd(d), &ss) < 0) {
2799                         r = -errno;
2800                         goto finish;
2801                 }
2802
2803                 if (sum <= max_use &&
2804                     (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2805                         break;
2806
2807                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2808                         log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2809                         sum -= list[i].usage;
2810                 } else if (errno != ENOENT)
2811                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2812         }
2813
2814 finish:
2815         for (i = 0; i < n_list; i++)
2816                 free(list[i].filename);
2817
2818         free(list);
2819
2820         if (d)
2821                 closedir(d);
2822
2823         return r;
2824 }
2825
2826 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2827         uint64_t i, n;
2828         uint64_t q, xor_hash = 0;
2829         int r;
2830         EntryItem *items;
2831         dual_timestamp ts;
2832
2833         assert(from);
2834         assert(to);
2835         assert(o);
2836         assert(p);
2837
2838         if (!to->writable)
2839                 return -EPERM;
2840
2841         ts.monotonic = le64toh(o->entry.monotonic);
2842         ts.realtime = le64toh(o->entry.realtime);
2843
2844         if (to->tail_entry_monotonic_valid &&
2845             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2846                 return -EINVAL;
2847
2848         n = journal_file_entry_n_items(o);
2849         items = alloca(sizeof(EntryItem) * n);
2850
2851         for (i = 0; i < n; i++) {
2852                 uint64_t l, h;
2853                 le64_t le_hash;
2854                 size_t t;
2855                 void *data;
2856                 Object *u;
2857
2858                 q = le64toh(o->entry.items[i].object_offset);
2859                 le_hash = o->entry.items[i].hash;
2860
2861                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2862                 if (r < 0)
2863                         return r;
2864
2865                 if (le_hash != o->data.hash)
2866                         return -EBADMSG;
2867
2868                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2869                 t = (size_t) l;
2870
2871                 /* We hit the limit on 32bit machines */
2872                 if ((uint64_t) t != l)
2873                         return -E2BIG;
2874
2875                 if (o->object.flags & OBJECT_COMPRESSED) {
2876 #ifdef HAVE_XZ
2877                         uint64_t rsize;
2878
2879                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2880                                 return -EBADMSG;
2881
2882                         data = from->compress_buffer;
2883                         l = rsize;
2884 #else
2885                         return -EPROTONOSUPPORT;
2886 #endif
2887                 } else
2888                         data = o->data.payload;
2889
2890                 r = journal_file_append_data(to, data, l, &u, &h);
2891                 if (r < 0)
2892                         return r;
2893
2894                 xor_hash ^= le64toh(u->data.hash);
2895                 items[i].object_offset = htole64(h);
2896                 items[i].hash = u->data.hash;
2897
2898                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2899                 if (r < 0)
2900                         return r;
2901         }
2902
2903         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2904 }
2905
2906 void journal_default_metrics(JournalMetrics *m, int fd) {
2907         uint64_t fs_size = 0;
2908         struct statvfs ss;
2909         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2910
2911         assert(m);
2912         assert(fd >= 0);
2913
2914         if (fstatvfs(fd, &ss) >= 0)
2915                 fs_size = ss.f_frsize * ss.f_blocks;
2916
2917         if (m->max_use == (uint64_t) -1) {
2918
2919                 if (fs_size > 0) {
2920                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2921
2922                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2923                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2924
2925                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2926                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2927                 } else
2928                         m->max_use = DEFAULT_MAX_USE_LOWER;
2929         } else {
2930                 m->max_use = PAGE_ALIGN(m->max_use);
2931
2932                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2933                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2934         }
2935
2936         if (m->max_size == (uint64_t) -1) {
2937                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2938
2939                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2940                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2941         } else
2942                 m->max_size = PAGE_ALIGN(m->max_size);
2943
2944         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2945                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2946
2947         if (m->max_size*2 > m->max_use)
2948                 m->max_use = m->max_size*2;
2949
2950         if (m->min_size == (uint64_t) -1)
2951                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2952         else {
2953                 m->min_size = PAGE_ALIGN(m->min_size);
2954
2955                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2956                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2957
2958                 if (m->min_size > m->max_size)
2959                         m->max_size = m->min_size;
2960         }
2961
2962         if (m->keep_free == (uint64_t) -1) {
2963
2964                 if (fs_size > 0) {
2965                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2966
2967                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2968                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2969
2970                 } else
2971                         m->keep_free = DEFAULT_KEEP_FREE;
2972         }
2973
2974         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2975                  format_bytes(a, sizeof(a), m->max_use),
2976                  format_bytes(b, sizeof(b), m->max_size),
2977                  format_bytes(c, sizeof(c), m->min_size),
2978                  format_bytes(d, sizeof(d), m->keep_free));
2979 }
2980
2981 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2982         assert(f);
2983         assert(from || to);
2984
2985         if (from) {
2986                 if (f->header->head_entry_realtime == 0)
2987                         return -ENOENT;
2988
2989                 *from = le64toh(f->header->head_entry_realtime);
2990         }
2991
2992         if (to) {
2993                 if (f->header->tail_entry_realtime == 0)
2994                         return -ENOENT;
2995
2996                 *to = le64toh(f->header->tail_entry_realtime);
2997         }
2998
2999         return 1;
3000 }
3001
3002 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3003         char t[9+32+1] = "_BOOT_ID=";
3004         Object *o;
3005         uint64_t p;
3006         int r;
3007
3008         assert(f);
3009         assert(from || to);
3010
3011         sd_id128_to_string(boot_id, t + 9);
3012
3013         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3014         if (r <= 0)
3015                 return r;
3016
3017         if (le64toh(o->data.n_entries) <= 0)
3018                 return 0;
3019
3020         if (from) {
3021                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3022                 if (r < 0)
3023                         return r;
3024
3025                 *from = le64toh(o->entry.monotonic);
3026         }
3027
3028         if (to) {
3029                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3030                 if (r < 0)
3031                         return r;
3032
3033                 r = generic_array_get_plus_one(f,
3034                                                le64toh(o->data.entry_offset),
3035                                                le64toh(o->data.entry_array_offset),
3036                                                le64toh(o->data.n_entries)-1,
3037                                                &o, NULL);
3038                 if (r <= 0)
3039                         return r;
3040
3041                 *to = le64toh(o->entry.monotonic);
3042         }
3043
3044         return 1;
3045 }
3046
3047 bool journal_file_rotate_suggested(JournalFile *f) {
3048         assert(f);
3049
3050         /* If we gained new header fields we gained new features,
3051          * hence suggest a rotation */
3052         if (le64toh(f->header->header_size) < sizeof(Header)) {
3053                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3054                 return true;
3055         }
3056
3057         /* Let's check if the hash tables grew over a certain fill
3058          * level (75%, borrowing this value from Java's hash table
3059          * implementation), and if so suggest a rotation. To calculate
3060          * the fill level we need the n_data field, which only exists
3061          * in newer versions. */
3062
3063         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3064                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3065                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3066                                   f->path,
3067                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3068                                   (unsigned long long) le64toh(f->header->n_data),
3069                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3070                                   (unsigned long long) (f->last_stat.st_size),
3071                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
3072                         return true;
3073                 }
3074
3075         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3076                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3077                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3078                                   f->path,
3079                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3080                                   (unsigned long long) le64toh(f->header->n_fields),
3081                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
3082                         return true;
3083                 }
3084
3085         return false;
3086 }