chiark / gitweb /
journal: add all objects we add to HMAC
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "lookup3.h"
33 #include "compress.h"
34 #include "fsprg.h"
35
36 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
38
39 #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66
67 #define JOURNAL_HEADER_CONTAINS(h, field) \
68         (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69
70 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
71 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
72
73 void journal_file_close(JournalFile *f) {
74         int t;
75
76         assert(f);
77
78         /* Write the final tag */
79         if (f->authenticate)
80                 journal_file_append_tag(f);
81
82         /* Sync everything to disk, before we mark the file offline */
83         for (t = 0; t < _WINDOW_MAX; t++)
84                 if (f->windows[t].ptr)
85                         munmap(f->windows[t].ptr, f->windows[t].size);
86
87         if (f->writable && f->fd >= 0)
88                 fdatasync(f->fd);
89
90         if (f->header) {
91                 /* Mark the file offline. Don't override the archived state if it already is set */
92                 if (f->writable && f->header->state == STATE_ONLINE)
93                         f->header->state = STATE_OFFLINE;
94
95                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
96         }
97
98         if (f->fd >= 0)
99                 close_nointr_nofail(f->fd);
100
101         free(f->path);
102
103 #ifdef HAVE_XZ
104         free(f->compress_buffer);
105 #endif
106
107 #ifdef HAVE_GCRYPT
108         if (f->fsprg_header)
109                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
110
111         if (f->hmac)
112                 gcry_md_close(f->hmac);
113 #endif
114
115         free(f);
116 }
117
118 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
119         Header h;
120         ssize_t k;
121         int r;
122
123         assert(f);
124
125         zero(h);
126         memcpy(h.signature, HEADER_SIGNATURE, 8);
127         h.header_size = htole64(ALIGN64(sizeof(h)));
128
129         h.incompatible_flags =
130                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
131
132         h.compatible_flags =
133                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
134
135         r = sd_id128_randomize(&h.file_id);
136         if (r < 0)
137                 return r;
138
139         if (template) {
140                 h.seqnum_id = template->header->seqnum_id;
141                 h.tail_seqnum = template->header->tail_seqnum;
142         } else
143                 h.seqnum_id = h.file_id;
144
145         k = pwrite(f->fd, &h, sizeof(h), 0);
146         if (k < 0)
147                 return -errno;
148
149         if (k != sizeof(h))
150                 return -EIO;
151
152         return 0;
153 }
154
155 static int journal_file_refresh_header(JournalFile *f) {
156         int r;
157         sd_id128_t boot_id;
158
159         assert(f);
160
161         r = sd_id128_get_machine(&f->header->machine_id);
162         if (r < 0)
163                 return r;
164
165         r = sd_id128_get_boot(&boot_id);
166         if (r < 0)
167                 return r;
168
169         if (sd_id128_equal(boot_id, f->header->boot_id))
170                 f->tail_entry_monotonic_valid = true;
171
172         f->header->boot_id = boot_id;
173
174         f->header->state = STATE_ONLINE;
175
176         /* Sync the online state to disk */
177         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
178         fdatasync(f->fd);
179
180         return 0;
181 }
182
183 static int journal_file_verify_header(JournalFile *f) {
184         assert(f);
185
186         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
187                 return -EBADMSG;
188
189         /* In both read and write mode we refuse to open files with
190          * incompatible flags we don't know */
191 #ifdef HAVE_XZ
192         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
193                 return -EPROTONOSUPPORT;
194 #else
195         if (f->header->incompatible_flags != 0)
196                 return -EPROTONOSUPPORT;
197 #endif
198
199         /* When open for writing we refuse to open files with
200          * compatible flags, too */
201         if (f->writable) {
202 #ifdef HAVE_GCRYPT
203                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
204                         return -EPROTONOSUPPORT;
205 #else
206                 if (f->header->compatible_flags != 0)
207                         return -EPROTONOSUPPORT;
208 #endif
209         }
210
211         /* The first addition was n_data, so check that we are at least this large */
212         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
213                 return -EBADMSG;
214
215         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
216                 return -ENODATA;
217
218         if (f->writable) {
219                 uint8_t state;
220                 sd_id128_t machine_id;
221                 int r;
222
223                 r = sd_id128_get_machine(&machine_id);
224                 if (r < 0)
225                         return r;
226
227                 if (!sd_id128_equal(machine_id, f->header->machine_id))
228                         return -EHOSTDOWN;
229
230                 state = f->header->state;
231
232                 if (state == STATE_ONLINE) {
233                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
234                         return -EBUSY;
235                 } else if (state == STATE_ARCHIVED)
236                         return -ESHUTDOWN;
237                 else if (state != STATE_OFFLINE) {
238                         log_debug("Journal file %s has unknown state %u.", f->path, state);
239                         return -EBUSY;
240                 }
241         }
242
243         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
244         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
245
246         return 0;
247 }
248
249 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
250         uint64_t old_size, new_size;
251         int r;
252
253         assert(f);
254
255         /* We assume that this file is not sparse, and we know that
256          * for sure, since we always call posix_fallocate()
257          * ourselves */
258
259         old_size =
260                 le64toh(f->header->header_size) +
261                 le64toh(f->header->arena_size);
262
263         new_size = PAGE_ALIGN(offset + size);
264         if (new_size < le64toh(f->header->header_size))
265                 new_size = le64toh(f->header->header_size);
266
267         if (new_size <= old_size)
268                 return 0;
269
270         if (f->metrics.max_size > 0 &&
271             new_size > f->metrics.max_size)
272                 return -E2BIG;
273
274         if (new_size > f->metrics.min_size &&
275             f->metrics.keep_free > 0) {
276                 struct statvfs svfs;
277
278                 if (fstatvfs(f->fd, &svfs) >= 0) {
279                         uint64_t available;
280
281                         available = svfs.f_bfree * svfs.f_bsize;
282
283                         if (available >= f->metrics.keep_free)
284                                 available -= f->metrics.keep_free;
285                         else
286                                 available = 0;
287
288                         if (new_size - old_size > available)
289                                 return -E2BIG;
290                 }
291         }
292
293         /* Note that the glibc fallocate() fallback is very
294            inefficient, hence we try to minimize the allocation area
295            as we can. */
296         r = posix_fallocate(f->fd, old_size, new_size - old_size);
297         if (r != 0)
298                 return -r;
299
300         if (fstat(f->fd, &f->last_stat) < 0)
301                 return -errno;
302
303         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
304
305         return 0;
306 }
307
308 static int journal_file_map(
309                 JournalFile *f,
310                 uint64_t offset,
311                 uint64_t size,
312                 void **_window,
313                 uint64_t *_woffset,
314                 uint64_t *_wsize,
315                 void **ret) {
316
317         uint64_t woffset, wsize;
318         void *window;
319
320         assert(f);
321         assert(size > 0);
322         assert(ret);
323
324         woffset = offset & ~((uint64_t) page_size() - 1ULL);
325         wsize = size + (offset - woffset);
326         wsize = PAGE_ALIGN(wsize);
327
328         /* Avoid SIGBUS on invalid accesses */
329         if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
330                 return -EADDRNOTAVAIL;
331
332         window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
333         if (window == MAP_FAILED)
334                 return -errno;
335
336         if (_window)
337                 *_window = window;
338
339         if (_woffset)
340                 *_woffset = woffset;
341
342         if (_wsize)
343                 *_wsize = wsize;
344
345         *ret = (uint8_t*) window + (offset - woffset);
346
347         return 0;
348 }
349
350 static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
351         void *p = NULL;
352         uint64_t delta;
353         int r;
354         Window *w;
355
356         assert(f);
357         assert(ret);
358         assert(wt >= 0);
359         assert(wt < _WINDOW_MAX);
360
361         if (offset + size > (uint64_t) f->last_stat.st_size) {
362                 /* Hmm, out of range? Let's refresh the fstat() data
363                  * first, before we trust that check. */
364
365                 if (fstat(f->fd, &f->last_stat) < 0 ||
366                     offset + size > (uint64_t) f->last_stat.st_size)
367                         return -EADDRNOTAVAIL;
368         }
369
370         w = f->windows + wt;
371
372         if (_likely_(w->ptr &&
373                      w->offset <= offset &&
374                      w->offset + w->size >= offset + size)) {
375
376                 *ret = (uint8_t*) w->ptr + (offset - w->offset);
377                 return 0;
378         }
379
380         if (w->ptr) {
381                 if (munmap(w->ptr, w->size) < 0)
382                         return -errno;
383
384                 w->ptr = NULL;
385                 w->size = w->offset = 0;
386         }
387
388         if (size < DEFAULT_WINDOW_SIZE) {
389                 /* If the default window size is larger then what was
390                  * asked for extend the mapping a bit in the hope to
391                  * minimize needed remappings later on. We add half
392                  * the window space before and half behind the
393                  * requested mapping */
394
395                 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
396
397                 if (delta > offset)
398                         delta = offset;
399
400                 offset -= delta;
401                 size = DEFAULT_WINDOW_SIZE;
402         } else
403                 delta = 0;
404
405         if (offset + size > (uint64_t) f->last_stat.st_size)
406                 size = (uint64_t) f->last_stat.st_size - offset;
407
408         if (size <= 0)
409                 return -EADDRNOTAVAIL;
410
411         r = journal_file_map(f,
412                              offset, size,
413                              &w->ptr, &w->offset, &w->size,
414                              &p);
415
416         if (r < 0)
417                 return r;
418
419         *ret = (uint8_t*) p + delta;
420         return 0;
421 }
422
423 static bool verify_hash(Object *o) {
424         uint64_t h1, h2;
425
426         assert(o);
427
428         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
429                 h1 = le64toh(o->data.hash);
430                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
431         } else if (o->object.type == OBJECT_FIELD) {
432                 h1 = le64toh(o->field.hash);
433                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
434         } else
435                 return true;
436
437         return h1 == h2;
438 }
439
440 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
441         int r;
442         void *t;
443         Object *o;
444         uint64_t s;
445
446         assert(f);
447         assert(ret);
448         assert(type < _OBJECT_TYPE_MAX);
449
450         r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
451         if (r < 0)
452                 return r;
453
454         o = (Object*) t;
455         s = le64toh(o->object.size);
456
457         if (s < sizeof(ObjectHeader))
458                 return -EBADMSG;
459
460         if (type >= 0 && o->object.type != type)
461                 return -EBADMSG;
462
463         if (s > sizeof(ObjectHeader)) {
464                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
465                 if (r < 0)
466                         return r;
467
468                 o = (Object*) t;
469         }
470
471         if (!verify_hash(o))
472                 return -EBADMSG;
473
474         *ret = o;
475         return 0;
476 }
477
478 static uint64_t journal_file_seqnum(JournalFile *f, uint64_t *seqnum) {
479         uint64_t r;
480
481         assert(f);
482
483         r = le64toh(f->header->tail_seqnum) + 1;
484
485         if (seqnum) {
486                 /* If an external seqnum counter was passed, we update
487                  * both the local and the external one, and set it to
488                  * the maximum of both */
489
490                 if (*seqnum + 1 > r)
491                         r = *seqnum + 1;
492
493                 *seqnum = r;
494         }
495
496         f->header->tail_seqnum = htole64(r);
497
498         if (f->header->head_seqnum == 0)
499                 f->header->head_seqnum = htole64(r);
500
501         return r;
502 }
503
504 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
505         int r;
506         uint64_t p;
507         Object *tail, *o;
508         void *t;
509
510         assert(f);
511         assert(size >= sizeof(ObjectHeader));
512         assert(offset);
513         assert(ret);
514
515         p = le64toh(f->header->tail_object_offset);
516         if (p == 0)
517                 p = le64toh(f->header->header_size);
518         else {
519                 r = journal_file_move_to_object(f, -1, p, &tail);
520                 if (r < 0)
521                         return r;
522
523                 p += ALIGN64(le64toh(tail->object.size));
524         }
525
526         r = journal_file_allocate(f, p, size);
527         if (r < 0)
528                 return r;
529
530         r = journal_file_move_to(f, type, p, size, &t);
531         if (r < 0)
532                 return r;
533
534         o = (Object*) t;
535
536         zero(o->object);
537         o->object.type = type;
538         o->object.size = htole64(size);
539
540         f->header->tail_object_offset = htole64(p);
541         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
542
543         *ret = o;
544         *offset = p;
545
546         return 0;
547 }
548
549 static int journal_file_setup_data_hash_table(JournalFile *f) {
550         uint64_t s, p;
551         Object *o;
552         int r;
553
554         assert(f);
555
556         /* We estimate that we need 1 hash table entry per 768 of
557            journal file and we want to make sure we never get beyond
558            75% fill level. Calculate the hash table size for the
559            maximum file size based on these metrics. */
560
561         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
562         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
563                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
564
565         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
566
567         r = journal_file_append_object(f,
568                                        OBJECT_DATA_HASH_TABLE,
569                                        offsetof(Object, hash_table.items) + s,
570                                        &o, &p);
571         if (r < 0)
572                 return r;
573
574         memset(o->hash_table.items, 0, s);
575
576         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
577         f->header->data_hash_table_size = htole64(s);
578
579         return 0;
580 }
581
582 static int journal_file_setup_field_hash_table(JournalFile *f) {
583         uint64_t s, p;
584         Object *o;
585         int r;
586
587         assert(f);
588
589         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
590         r = journal_file_append_object(f,
591                                        OBJECT_FIELD_HASH_TABLE,
592                                        offsetof(Object, hash_table.items) + s,
593                                        &o, &p);
594         if (r < 0)
595                 return r;
596
597         memset(o->hash_table.items, 0, s);
598
599         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
600         f->header->field_hash_table_size = htole64(s);
601
602         return 0;
603 }
604
605 static int journal_file_map_data_hash_table(JournalFile *f) {
606         uint64_t s, p;
607         void *t;
608         int r;
609
610         assert(f);
611
612         p = le64toh(f->header->data_hash_table_offset);
613         s = le64toh(f->header->data_hash_table_size);
614
615         r = journal_file_move_to(f,
616                                  WINDOW_DATA_HASH_TABLE,
617                                  p, s,
618                                  &t);
619         if (r < 0)
620                 return r;
621
622         f->data_hash_table = t;
623         return 0;
624 }
625
626 static int journal_file_map_field_hash_table(JournalFile *f) {
627         uint64_t s, p;
628         void *t;
629         int r;
630
631         assert(f);
632
633         p = le64toh(f->header->field_hash_table_offset);
634         s = le64toh(f->header->field_hash_table_size);
635
636         r = journal_file_move_to(f,
637                                  WINDOW_FIELD_HASH_TABLE,
638                                  p, s,
639                                  &t);
640         if (r < 0)
641                 return r;
642
643         f->field_hash_table = t;
644         return 0;
645 }
646
647 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
648         uint64_t p, h;
649         int r;
650
651         assert(f);
652         assert(o);
653         assert(offset > 0);
654         assert(o->object.type == OBJECT_DATA);
655
656         /* This might alter the window we are looking at */
657
658         o->data.next_hash_offset = o->data.next_field_offset = 0;
659         o->data.entry_offset = o->data.entry_array_offset = 0;
660         o->data.n_entries = 0;
661
662         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
663         p = le64toh(f->data_hash_table[h].tail_hash_offset);
664         if (p == 0) {
665                 /* Only entry in the hash table is easy */
666                 f->data_hash_table[h].head_hash_offset = htole64(offset);
667         } else {
668                 /* Move back to the previous data object, to patch in
669                  * pointer */
670
671                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
672                 if (r < 0)
673                         return r;
674
675                 o->data.next_hash_offset = htole64(offset);
676         }
677
678         f->data_hash_table[h].tail_hash_offset = htole64(offset);
679
680         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
681                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
682
683         return 0;
684 }
685
686 int journal_file_find_data_object_with_hash(
687                 JournalFile *f,
688                 const void *data, uint64_t size, uint64_t hash,
689                 Object **ret, uint64_t *offset) {
690
691         uint64_t p, osize, h;
692         int r;
693
694         assert(f);
695         assert(data || size == 0);
696
697         osize = offsetof(Object, data.payload) + size;
698
699         if (f->header->data_hash_table_size == 0)
700                 return -EBADMSG;
701
702         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
703         p = le64toh(f->data_hash_table[h].head_hash_offset);
704
705         while (p > 0) {
706                 Object *o;
707
708                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
709                 if (r < 0)
710                         return r;
711
712                 if (le64toh(o->data.hash) != hash)
713                         goto next;
714
715                 if (o->object.flags & OBJECT_COMPRESSED) {
716 #ifdef HAVE_XZ
717                         uint64_t l, rsize;
718
719                         l = le64toh(o->object.size);
720                         if (l <= offsetof(Object, data.payload))
721                                 return -EBADMSG;
722
723                         l -= offsetof(Object, data.payload);
724
725                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
726                                 return -EBADMSG;
727
728                         if (rsize == size &&
729                             memcmp(f->compress_buffer, data, size) == 0) {
730
731                                 if (ret)
732                                         *ret = o;
733
734                                 if (offset)
735                                         *offset = p;
736
737                                 return 1;
738                         }
739 #else
740                         return -EPROTONOSUPPORT;
741 #endif
742
743                 } else if (le64toh(o->object.size) == osize &&
744                            memcmp(o->data.payload, data, size) == 0) {
745
746                         if (ret)
747                                 *ret = o;
748
749                         if (offset)
750                                 *offset = p;
751
752                         return 1;
753                 }
754
755         next:
756                 p = le64toh(o->data.next_hash_offset);
757         }
758
759         return 0;
760 }
761
762 int journal_file_find_data_object(
763                 JournalFile *f,
764                 const void *data, uint64_t size,
765                 Object **ret, uint64_t *offset) {
766
767         uint64_t hash;
768
769         assert(f);
770         assert(data || size == 0);
771
772         hash = hash64(data, size);
773
774         return journal_file_find_data_object_with_hash(f,
775                                                        data, size, hash,
776                                                        ret, offset);
777 }
778
779 static int journal_file_append_data(
780                 JournalFile *f,
781                 const void *data, uint64_t size,
782                 Object **ret, uint64_t *offset) {
783
784         uint64_t hash, p;
785         uint64_t osize;
786         Object *o;
787         int r;
788         bool compressed = false;
789
790         assert(f);
791         assert(data || size == 0);
792
793         hash = hash64(data, size);
794
795         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
796         if (r < 0)
797                 return r;
798         else if (r > 0) {
799
800                 if (ret)
801                         *ret = o;
802
803                 if (offset)
804                         *offset = p;
805
806                 return 0;
807         }
808
809         osize = offsetof(Object, data.payload) + size;
810         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
811         if (r < 0)
812                 return r;
813
814         o->data.hash = htole64(hash);
815
816 #ifdef HAVE_XZ
817         if (f->compress &&
818             size >= COMPRESSION_SIZE_THRESHOLD) {
819                 uint64_t rsize;
820
821                 compressed = compress_blob(data, size, o->data.payload, &rsize);
822
823                 if (compressed) {
824                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
825                         o->object.flags |= OBJECT_COMPRESSED;
826
827                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
828                 }
829         }
830 #endif
831
832         if (!compressed && size > 0)
833                 memcpy(o->data.payload, data, size);
834
835         r = journal_file_link_data(f, o, p, hash);
836         if (r < 0)
837                 return r;
838
839         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
840         if (r < 0)
841                 return r;
842
843         /* The linking might have altered the window, so let's
844          * refresh our pointer */
845         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
846         if (r < 0)
847                 return r;
848
849         if (ret)
850                 *ret = o;
851
852         if (offset)
853                 *offset = p;
854
855         return 0;
856 }
857
858 uint64_t journal_file_entry_n_items(Object *o) {
859         assert(o);
860         assert(o->object.type == OBJECT_ENTRY);
861
862         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
863 }
864
865 static uint64_t journal_file_entry_array_n_items(Object *o) {
866         assert(o);
867         assert(o->object.type == OBJECT_ENTRY_ARRAY);
868
869         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
870 }
871
872 static int link_entry_into_array(JournalFile *f,
873                                  le64_t *first,
874                                  le64_t *idx,
875                                  uint64_t p) {
876         int r;
877         uint64_t n = 0, ap = 0, q, i, a, hidx;
878         Object *o;
879
880         assert(f);
881         assert(first);
882         assert(idx);
883         assert(p > 0);
884
885         a = le64toh(*first);
886         i = hidx = le64toh(*idx);
887         while (a > 0) {
888
889                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
890                 if (r < 0)
891                         return r;
892
893                 n = journal_file_entry_array_n_items(o);
894                 if (i < n) {
895                         o->entry_array.items[i] = htole64(p);
896                         *idx = htole64(hidx + 1);
897                         return 0;
898                 }
899
900                 i -= n;
901                 ap = a;
902                 a = le64toh(o->entry_array.next_entry_array_offset);
903         }
904
905         if (hidx > n)
906                 n = (hidx+1) * 2;
907         else
908                 n = n * 2;
909
910         if (n < 4)
911                 n = 4;
912
913         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
914                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
915                                        &o, &q);
916         if (r < 0)
917                 return r;
918
919         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
920         if (r < 0)
921                 return r;
922
923         o->entry_array.items[i] = htole64(p);
924
925         if (ap == 0)
926                 *first = htole64(q);
927         else {
928                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
929                 if (r < 0)
930                         return r;
931
932                 o->entry_array.next_entry_array_offset = htole64(q);
933         }
934
935         *idx = htole64(hidx + 1);
936
937         return 0;
938 }
939
940 static int link_entry_into_array_plus_one(JournalFile *f,
941                                           le64_t *extra,
942                                           le64_t *first,
943                                           le64_t *idx,
944                                           uint64_t p) {
945
946         int r;
947
948         assert(f);
949         assert(extra);
950         assert(first);
951         assert(idx);
952         assert(p > 0);
953
954         if (*idx == 0)
955                 *extra = htole64(p);
956         else {
957                 le64_t i;
958
959                 i = htole64(le64toh(*idx) - 1);
960                 r = link_entry_into_array(f, first, &i, p);
961                 if (r < 0)
962                         return r;
963         }
964
965         *idx = htole64(le64toh(*idx) + 1);
966         return 0;
967 }
968
969 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
970         uint64_t p;
971         int r;
972         assert(f);
973         assert(o);
974         assert(offset > 0);
975
976         p = le64toh(o->entry.items[i].object_offset);
977         if (p == 0)
978                 return -EINVAL;
979
980         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
981         if (r < 0)
982                 return r;
983
984         return link_entry_into_array_plus_one(f,
985                                               &o->data.entry_offset,
986                                               &o->data.entry_array_offset,
987                                               &o->data.n_entries,
988                                               offset);
989 }
990
991 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
992         uint64_t n, i;
993         int r;
994
995         assert(f);
996         assert(o);
997         assert(offset > 0);
998         assert(o->object.type == OBJECT_ENTRY);
999
1000         __sync_synchronize();
1001
1002         /* Link up the entry itself */
1003         r = link_entry_into_array(f,
1004                                   &f->header->entry_array_offset,
1005                                   &f->header->n_entries,
1006                                   offset);
1007         if (r < 0)
1008                 return r;
1009
1010         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
1011
1012         if (f->header->head_entry_realtime == 0)
1013                 f->header->head_entry_realtime = o->entry.realtime;
1014
1015         f->header->tail_entry_realtime = o->entry.realtime;
1016         f->header->tail_entry_monotonic = o->entry.monotonic;
1017
1018         f->tail_entry_monotonic_valid = true;
1019
1020         /* Link up the items */
1021         n = journal_file_entry_n_items(o);
1022         for (i = 0; i < n; i++) {
1023                 r = journal_file_link_entry_item(f, o, offset, i);
1024                 if (r < 0)
1025                         return r;
1026         }
1027
1028         return 0;
1029 }
1030
1031 static int journal_file_append_entry_internal(
1032                 JournalFile *f,
1033                 const dual_timestamp *ts,
1034                 uint64_t xor_hash,
1035                 const EntryItem items[], unsigned n_items,
1036                 uint64_t *seqnum,
1037                 Object **ret, uint64_t *offset) {
1038         uint64_t np;
1039         uint64_t osize;
1040         Object *o;
1041         int r;
1042
1043         assert(f);
1044         assert(items || n_items == 0);
1045         assert(ts);
1046
1047         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1048
1049         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1050         if (r < 0)
1051                 return r;
1052
1053         o->entry.seqnum = htole64(journal_file_seqnum(f, seqnum));
1054         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1055         o->entry.realtime = htole64(ts->realtime);
1056         o->entry.monotonic = htole64(ts->monotonic);
1057         o->entry.xor_hash = htole64(xor_hash);
1058         o->entry.boot_id = f->header->boot_id;
1059
1060         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1061         if (r < 0)
1062                 return r;
1063
1064         r = journal_file_link_entry(f, o, np);
1065         if (r < 0)
1066                 return r;
1067
1068         if (ret)
1069                 *ret = o;
1070
1071         if (offset)
1072                 *offset = np;
1073
1074         return 0;
1075 }
1076
1077 void journal_file_post_change(JournalFile *f) {
1078         assert(f);
1079
1080         /* inotify() does not receive IN_MODIFY events from file
1081          * accesses done via mmap(). After each access we hence
1082          * trigger IN_MODIFY by truncating the journal file to its
1083          * current size which triggers IN_MODIFY. */
1084
1085         __sync_synchronize();
1086
1087         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1088                 log_error("Failed to to truncate file to its own size: %m");
1089 }
1090
1091 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1092         unsigned i;
1093         EntryItem *items;
1094         int r;
1095         uint64_t xor_hash = 0;
1096         struct dual_timestamp _ts;
1097
1098         assert(f);
1099         assert(iovec || n_iovec == 0);
1100
1101         if (!f->writable)
1102                 return -EPERM;
1103
1104         if (!ts) {
1105                 dual_timestamp_get(&_ts);
1106                 ts = &_ts;
1107         }
1108
1109         if (f->tail_entry_monotonic_valid &&
1110             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1111                 return -EINVAL;
1112
1113         r = journal_file_maybe_append_tag(f, ts->realtime);
1114         if (r < 0)
1115                 return r;
1116
1117         /* alloca() can't take 0, hence let's allocate at least one */
1118         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1119
1120         for (i = 0; i < n_iovec; i++) {
1121                 uint64_t p;
1122                 Object *o;
1123
1124                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1125                 if (r < 0)
1126                         return r;
1127
1128                 xor_hash ^= le64toh(o->data.hash);
1129                 items[i].object_offset = htole64(p);
1130                 items[i].hash = o->data.hash;
1131         }
1132
1133         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1134
1135         journal_file_post_change(f);
1136
1137         return r;
1138 }
1139
1140 static int generic_array_get(JournalFile *f,
1141                              uint64_t first,
1142                              uint64_t i,
1143                              Object **ret, uint64_t *offset) {
1144
1145         Object *o;
1146         uint64_t p = 0, a;
1147         int r;
1148
1149         assert(f);
1150
1151         a = first;
1152         while (a > 0) {
1153                 uint64_t n;
1154
1155                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1156                 if (r < 0)
1157                         return r;
1158
1159                 n = journal_file_entry_array_n_items(o);
1160                 if (i < n) {
1161                         p = le64toh(o->entry_array.items[i]);
1162                         break;
1163                 }
1164
1165                 i -= n;
1166                 a = le64toh(o->entry_array.next_entry_array_offset);
1167         }
1168
1169         if (a <= 0 || p <= 0)
1170                 return 0;
1171
1172         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1173         if (r < 0)
1174                 return r;
1175
1176         if (ret)
1177                 *ret = o;
1178
1179         if (offset)
1180                 *offset = p;
1181
1182         return 1;
1183 }
1184
1185 static int generic_array_get_plus_one(JournalFile *f,
1186                                       uint64_t extra,
1187                                       uint64_t first,
1188                                       uint64_t i,
1189                                       Object **ret, uint64_t *offset) {
1190
1191         Object *o;
1192
1193         assert(f);
1194
1195         if (i == 0) {
1196                 int r;
1197
1198                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1199                 if (r < 0)
1200                         return r;
1201
1202                 if (ret)
1203                         *ret = o;
1204
1205                 if (offset)
1206                         *offset = extra;
1207
1208                 return 1;
1209         }
1210
1211         return generic_array_get(f, first, i-1, ret, offset);
1212 }
1213
1214 enum {
1215         TEST_FOUND,
1216         TEST_LEFT,
1217         TEST_RIGHT
1218 };
1219
1220 static int generic_array_bisect(JournalFile *f,
1221                                 uint64_t first,
1222                                 uint64_t n,
1223                                 uint64_t needle,
1224                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1225                                 direction_t direction,
1226                                 Object **ret,
1227                                 uint64_t *offset,
1228                                 uint64_t *idx) {
1229
1230         uint64_t a, p, t = 0, i = 0, last_p = 0;
1231         bool subtract_one = false;
1232         Object *o, *array = NULL;
1233         int r;
1234
1235         assert(f);
1236         assert(test_object);
1237
1238         a = first;
1239         while (a > 0) {
1240                 uint64_t left, right, k, lp;
1241
1242                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1243                 if (r < 0)
1244                         return r;
1245
1246                 k = journal_file_entry_array_n_items(array);
1247                 right = MIN(k, n);
1248                 if (right <= 0)
1249                         return 0;
1250
1251                 i = right - 1;
1252                 lp = p = le64toh(array->entry_array.items[i]);
1253                 if (p <= 0)
1254                         return -EBADMSG;
1255
1256                 r = test_object(f, p, needle);
1257                 if (r < 0)
1258                         return r;
1259
1260                 if (r == TEST_FOUND)
1261                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1262
1263                 if (r == TEST_RIGHT) {
1264                         left = 0;
1265                         right -= 1;
1266                         for (;;) {
1267                                 if (left == right) {
1268                                         if (direction == DIRECTION_UP)
1269                                                 subtract_one = true;
1270
1271                                         i = left;
1272                                         goto found;
1273                                 }
1274
1275                                 assert(left < right);
1276
1277                                 i = (left + right) / 2;
1278                                 p = le64toh(array->entry_array.items[i]);
1279                                 if (p <= 0)
1280                                         return -EBADMSG;
1281
1282                                 r = test_object(f, p, needle);
1283                                 if (r < 0)
1284                                         return r;
1285
1286                                 if (r == TEST_FOUND)
1287                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1288
1289                                 if (r == TEST_RIGHT)
1290                                         right = i;
1291                                 else
1292                                         left = i + 1;
1293                         }
1294                 }
1295
1296                 if (k > n) {
1297                         if (direction == DIRECTION_UP) {
1298                                 i = n;
1299                                 subtract_one = true;
1300                                 goto found;
1301                         }
1302
1303                         return 0;
1304                 }
1305
1306                 last_p = lp;
1307
1308                 n -= k;
1309                 t += k;
1310                 a = le64toh(array->entry_array.next_entry_array_offset);
1311         }
1312
1313         return 0;
1314
1315 found:
1316         if (subtract_one && t == 0 && i == 0)
1317                 return 0;
1318
1319         if (subtract_one && i == 0)
1320                 p = last_p;
1321         else if (subtract_one)
1322                 p = le64toh(array->entry_array.items[i-1]);
1323         else
1324                 p = le64toh(array->entry_array.items[i]);
1325
1326         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1327         if (r < 0)
1328                 return r;
1329
1330         if (ret)
1331                 *ret = o;
1332
1333         if (offset)
1334                 *offset = p;
1335
1336         if (idx)
1337                 *idx = t + i + (subtract_one ? -1 : 0);
1338
1339         return 1;
1340 }
1341
1342 static int generic_array_bisect_plus_one(JournalFile *f,
1343                                          uint64_t extra,
1344                                          uint64_t first,
1345                                          uint64_t n,
1346                                          uint64_t needle,
1347                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1348                                          direction_t direction,
1349                                          Object **ret,
1350                                          uint64_t *offset,
1351                                          uint64_t *idx) {
1352
1353         int r;
1354         bool step_back = false;
1355         Object *o;
1356
1357         assert(f);
1358         assert(test_object);
1359
1360         if (n <= 0)
1361                 return 0;
1362
1363         /* This bisects the array in object 'first', but first checks
1364          * an extra  */
1365         r = test_object(f, extra, needle);
1366         if (r < 0)
1367                 return r;
1368
1369         if (r == TEST_FOUND)
1370                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1371
1372         /* if we are looking with DIRECTION_UP then we need to first
1373            see if in the actual array there is a matching entry, and
1374            return the last one of that. But if there isn't any we need
1375            to return this one. Hence remember this, and return it
1376            below. */
1377         if (r == TEST_LEFT)
1378                 step_back = direction == DIRECTION_UP;
1379
1380         if (r == TEST_RIGHT) {
1381                 if (direction == DIRECTION_DOWN)
1382                         goto found;
1383                 else
1384                         return 0;
1385         }
1386
1387         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1388
1389         if (r == 0 && step_back)
1390                 goto found;
1391
1392         if (r > 0 && idx)
1393                 (*idx) ++;
1394
1395         return r;
1396
1397 found:
1398         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1399         if (r < 0)
1400                 return r;
1401
1402         if (ret)
1403                 *ret = o;
1404
1405         if (offset)
1406                 *offset = extra;
1407
1408         if (idx)
1409                 *idx = 0;
1410
1411         return 1;
1412 }
1413
1414 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1415         assert(f);
1416         assert(p > 0);
1417
1418         if (p == needle)
1419                 return TEST_FOUND;
1420         else if (p < needle)
1421                 return TEST_LEFT;
1422         else
1423                 return TEST_RIGHT;
1424 }
1425
1426 int journal_file_move_to_entry_by_offset(
1427                 JournalFile *f,
1428                 uint64_t p,
1429                 direction_t direction,
1430                 Object **ret,
1431                 uint64_t *offset) {
1432
1433         return generic_array_bisect(f,
1434                                     le64toh(f->header->entry_array_offset),
1435                                     le64toh(f->header->n_entries),
1436                                     p,
1437                                     test_object_offset,
1438                                     direction,
1439                                     ret, offset, NULL);
1440 }
1441
1442
1443 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1444         Object *o;
1445         int r;
1446
1447         assert(f);
1448         assert(p > 0);
1449
1450         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1451         if (r < 0)
1452                 return r;
1453
1454         if (le64toh(o->entry.seqnum) == needle)
1455                 return TEST_FOUND;
1456         else if (le64toh(o->entry.seqnum) < needle)
1457                 return TEST_LEFT;
1458         else
1459                 return TEST_RIGHT;
1460 }
1461
1462 int journal_file_move_to_entry_by_seqnum(
1463                 JournalFile *f,
1464                 uint64_t seqnum,
1465                 direction_t direction,
1466                 Object **ret,
1467                 uint64_t *offset) {
1468
1469         return generic_array_bisect(f,
1470                                     le64toh(f->header->entry_array_offset),
1471                                     le64toh(f->header->n_entries),
1472                                     seqnum,
1473                                     test_object_seqnum,
1474                                     direction,
1475                                     ret, offset, NULL);
1476 }
1477
1478 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1479         Object *o;
1480         int r;
1481
1482         assert(f);
1483         assert(p > 0);
1484
1485         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1486         if (r < 0)
1487                 return r;
1488
1489         if (le64toh(o->entry.realtime) == needle)
1490                 return TEST_FOUND;
1491         else if (le64toh(o->entry.realtime) < needle)
1492                 return TEST_LEFT;
1493         else
1494                 return TEST_RIGHT;
1495 }
1496
1497 int journal_file_move_to_entry_by_realtime(
1498                 JournalFile *f,
1499                 uint64_t realtime,
1500                 direction_t direction,
1501                 Object **ret,
1502                 uint64_t *offset) {
1503
1504         return generic_array_bisect(f,
1505                                     le64toh(f->header->entry_array_offset),
1506                                     le64toh(f->header->n_entries),
1507                                     realtime,
1508                                     test_object_realtime,
1509                                     direction,
1510                                     ret, offset, NULL);
1511 }
1512
1513 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1514         Object *o;
1515         int r;
1516
1517         assert(f);
1518         assert(p > 0);
1519
1520         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1521         if (r < 0)
1522                 return r;
1523
1524         if (le64toh(o->entry.monotonic) == needle)
1525                 return TEST_FOUND;
1526         else if (le64toh(o->entry.monotonic) < needle)
1527                 return TEST_LEFT;
1528         else
1529                 return TEST_RIGHT;
1530 }
1531
1532 int journal_file_move_to_entry_by_monotonic(
1533                 JournalFile *f,
1534                 sd_id128_t boot_id,
1535                 uint64_t monotonic,
1536                 direction_t direction,
1537                 Object **ret,
1538                 uint64_t *offset) {
1539
1540         char t[9+32+1] = "_BOOT_ID=";
1541         Object *o;
1542         int r;
1543
1544         assert(f);
1545
1546         sd_id128_to_string(boot_id, t + 9);
1547         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1548         if (r < 0)
1549                 return r;
1550         if (r == 0)
1551                 return -ENOENT;
1552
1553         return generic_array_bisect_plus_one(f,
1554                                              le64toh(o->data.entry_offset),
1555                                              le64toh(o->data.entry_array_offset),
1556                                              le64toh(o->data.n_entries),
1557                                              monotonic,
1558                                              test_object_monotonic,
1559                                              direction,
1560                                              ret, offset, NULL);
1561 }
1562
1563 int journal_file_next_entry(
1564                 JournalFile *f,
1565                 Object *o, uint64_t p,
1566                 direction_t direction,
1567                 Object **ret, uint64_t *offset) {
1568
1569         uint64_t i, n;
1570         int r;
1571
1572         assert(f);
1573         assert(p > 0 || !o);
1574
1575         n = le64toh(f->header->n_entries);
1576         if (n <= 0)
1577                 return 0;
1578
1579         if (!o)
1580                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1581         else {
1582                 if (o->object.type != OBJECT_ENTRY)
1583                         return -EINVAL;
1584
1585                 r = generic_array_bisect(f,
1586                                          le64toh(f->header->entry_array_offset),
1587                                          le64toh(f->header->n_entries),
1588                                          p,
1589                                          test_object_offset,
1590                                          DIRECTION_DOWN,
1591                                          NULL, NULL,
1592                                          &i);
1593                 if (r <= 0)
1594                         return r;
1595
1596                 if (direction == DIRECTION_DOWN) {
1597                         if (i >= n - 1)
1598                                 return 0;
1599
1600                         i++;
1601                 } else {
1602                         if (i <= 0)
1603                                 return 0;
1604
1605                         i--;
1606                 }
1607         }
1608
1609         /* And jump to it */
1610         return generic_array_get(f,
1611                                  le64toh(f->header->entry_array_offset),
1612                                  i,
1613                                  ret, offset);
1614 }
1615
1616 int journal_file_skip_entry(
1617                 JournalFile *f,
1618                 Object *o, uint64_t p,
1619                 int64_t skip,
1620                 Object **ret, uint64_t *offset) {
1621
1622         uint64_t i, n;
1623         int r;
1624
1625         assert(f);
1626         assert(o);
1627         assert(p > 0);
1628
1629         if (o->object.type != OBJECT_ENTRY)
1630                 return -EINVAL;
1631
1632         r = generic_array_bisect(f,
1633                                  le64toh(f->header->entry_array_offset),
1634                                  le64toh(f->header->n_entries),
1635                                  p,
1636                                  test_object_offset,
1637                                  DIRECTION_DOWN,
1638                                  NULL, NULL,
1639                                  &i);
1640         if (r <= 0)
1641                 return r;
1642
1643         /* Calculate new index */
1644         if (skip < 0) {
1645                 if ((uint64_t) -skip >= i)
1646                         i = 0;
1647                 else
1648                         i = i - (uint64_t) -skip;
1649         } else
1650                 i  += (uint64_t) skip;
1651
1652         n = le64toh(f->header->n_entries);
1653         if (n <= 0)
1654                 return -EBADMSG;
1655
1656         if (i >= n)
1657                 i = n-1;
1658
1659         return generic_array_get(f,
1660                                  le64toh(f->header->entry_array_offset),
1661                                  i,
1662                                  ret, offset);
1663 }
1664
1665 int journal_file_next_entry_for_data(
1666                 JournalFile *f,
1667                 Object *o, uint64_t p,
1668                 uint64_t data_offset,
1669                 direction_t direction,
1670                 Object **ret, uint64_t *offset) {
1671
1672         uint64_t n, i;
1673         int r;
1674         Object *d;
1675
1676         assert(f);
1677         assert(p > 0 || !o);
1678
1679         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1680         if (r < 0)
1681                 return r;
1682
1683         n = le64toh(d->data.n_entries);
1684         if (n <= 0)
1685                 return n;
1686
1687         if (!o)
1688                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1689         else {
1690                 if (o->object.type != OBJECT_ENTRY)
1691                         return -EINVAL;
1692
1693                 r = generic_array_bisect_plus_one(f,
1694                                                   le64toh(d->data.entry_offset),
1695                                                   le64toh(d->data.entry_array_offset),
1696                                                   le64toh(d->data.n_entries),
1697                                                   p,
1698                                                   test_object_offset,
1699                                                   DIRECTION_DOWN,
1700                                                   NULL, NULL,
1701                                                   &i);
1702
1703                 if (r <= 0)
1704                         return r;
1705
1706                 if (direction == DIRECTION_DOWN) {
1707                         if (i >= n - 1)
1708                                 return 0;
1709
1710                         i++;
1711                 } else {
1712                         if (i <= 0)
1713                                 return 0;
1714
1715                         i--;
1716                 }
1717
1718         }
1719
1720         return generic_array_get_plus_one(f,
1721                                           le64toh(d->data.entry_offset),
1722                                           le64toh(d->data.entry_array_offset),
1723                                           i,
1724                                           ret, offset);
1725 }
1726
1727 int journal_file_move_to_entry_by_offset_for_data(
1728                 JournalFile *f,
1729                 uint64_t data_offset,
1730                 uint64_t p,
1731                 direction_t direction,
1732                 Object **ret, uint64_t *offset) {
1733
1734         int r;
1735         Object *d;
1736
1737         assert(f);
1738
1739         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1740         if (r < 0)
1741                 return r;
1742
1743         return generic_array_bisect_plus_one(f,
1744                                              le64toh(d->data.entry_offset),
1745                                              le64toh(d->data.entry_array_offset),
1746                                              le64toh(d->data.n_entries),
1747                                              p,
1748                                              test_object_offset,
1749                                              direction,
1750                                              ret, offset, NULL);
1751 }
1752
1753 int journal_file_move_to_entry_by_monotonic_for_data(
1754                 JournalFile *f,
1755                 uint64_t data_offset,
1756                 sd_id128_t boot_id,
1757                 uint64_t monotonic,
1758                 direction_t direction,
1759                 Object **ret, uint64_t *offset) {
1760
1761         char t[9+32+1] = "_BOOT_ID=";
1762         Object *o, *d;
1763         int r;
1764         uint64_t b, z;
1765
1766         assert(f);
1767
1768         /* First, seek by time */
1769         sd_id128_to_string(boot_id, t + 9);
1770         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1771         if (r < 0)
1772                 return r;
1773         if (r == 0)
1774                 return -ENOENT;
1775
1776         r = generic_array_bisect_plus_one(f,
1777                                           le64toh(o->data.entry_offset),
1778                                           le64toh(o->data.entry_array_offset),
1779                                           le64toh(o->data.n_entries),
1780                                           monotonic,
1781                                           test_object_monotonic,
1782                                           direction,
1783                                           NULL, &z, NULL);
1784         if (r <= 0)
1785                 return r;
1786
1787         /* And now, continue seeking until we find an entry that
1788          * exists in both bisection arrays */
1789
1790         for (;;) {
1791                 Object *qo;
1792                 uint64_t p, q;
1793
1794                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1795                 if (r < 0)
1796                         return r;
1797
1798                 r = generic_array_bisect_plus_one(f,
1799                                                   le64toh(d->data.entry_offset),
1800                                                   le64toh(d->data.entry_array_offset),
1801                                                   le64toh(d->data.n_entries),
1802                                                   z,
1803                                                   test_object_offset,
1804                                                   direction,
1805                                                   NULL, &p, NULL);
1806                 if (r <= 0)
1807                         return r;
1808
1809                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1810                 if (r < 0)
1811                         return r;
1812
1813                 r = generic_array_bisect_plus_one(f,
1814                                                   le64toh(o->data.entry_offset),
1815                                                   le64toh(o->data.entry_array_offset),
1816                                                   le64toh(o->data.n_entries),
1817                                                   p,
1818                                                   test_object_offset,
1819                                                   direction,
1820                                                   &qo, &q, NULL);
1821
1822                 if (r <= 0)
1823                         return r;
1824
1825                 if (p == q) {
1826                         if (ret)
1827                                 *ret = qo;
1828                         if (offset)
1829                                 *offset = q;
1830
1831                         return 1;
1832                 }
1833
1834                 z = q;
1835         }
1836
1837         return 0;
1838 }
1839
1840 int journal_file_move_to_entry_by_seqnum_for_data(
1841                 JournalFile *f,
1842                 uint64_t data_offset,
1843                 uint64_t seqnum,
1844                 direction_t direction,
1845                 Object **ret, uint64_t *offset) {
1846
1847         Object *d;
1848         int r;
1849
1850         assert(f);
1851
1852         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1853         if (r < 0)
1854                 return r;
1855
1856         return generic_array_bisect_plus_one(f,
1857                                              le64toh(d->data.entry_offset),
1858                                              le64toh(d->data.entry_array_offset),
1859                                              le64toh(d->data.n_entries),
1860                                              seqnum,
1861                                              test_object_seqnum,
1862                                              direction,
1863                                              ret, offset, NULL);
1864 }
1865
1866 int journal_file_move_to_entry_by_realtime_for_data(
1867                 JournalFile *f,
1868                 uint64_t data_offset,
1869                 uint64_t realtime,
1870                 direction_t direction,
1871                 Object **ret, uint64_t *offset) {
1872
1873         Object *d;
1874         int r;
1875
1876         assert(f);
1877
1878         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1879         if (r < 0)
1880                 return r;
1881
1882         return generic_array_bisect_plus_one(f,
1883                                              le64toh(d->data.entry_offset),
1884                                              le64toh(d->data.entry_array_offset),
1885                                              le64toh(d->data.n_entries),
1886                                              realtime,
1887                                              test_object_realtime,
1888                                              direction,
1889                                              ret, offset, NULL);
1890 }
1891
1892 static void *fsprg_state(JournalFile *f) {
1893         uint64_t a, b;
1894         assert(f);
1895
1896         if (!f->authenticate)
1897                 return NULL;
1898
1899         a = le64toh(f->fsprg_header->header_size);
1900         b = le64toh(f->fsprg_header->state_size);
1901
1902         if (a + b > f->fsprg_size)
1903                 return NULL;
1904
1905         return (uint8_t*) f->fsprg_header + a;
1906 }
1907
1908 int journal_file_append_tag(JournalFile *f) {
1909         Object *o;
1910         uint64_t p;
1911         int r;
1912
1913         assert(f);
1914
1915         if (!f->authenticate)
1916                 return 0;
1917
1918         if (!f->hmac_running)
1919                 return 0;
1920
1921         log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1922
1923         assert(f->hmac);
1924
1925         r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1926         if (r < 0)
1927                 return r;
1928
1929         /* Get the HMAC tag and store it in the object */
1930         memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1931         f->hmac_running = false;
1932
1933         return 0;
1934 }
1935
1936 static int journal_file_hmac_start(JournalFile *f) {
1937         uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1938
1939         assert(f);
1940
1941         if (!f->authenticate)
1942                 return 0;
1943
1944         if (f->hmac_running)
1945                 return 0;
1946
1947         /* Prepare HMAC for next cycle */
1948         gcry_md_reset(f->hmac);
1949         FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1950         gcry_md_setkey(f->hmac, key, sizeof(key));
1951
1952         f->hmac_running = true;
1953
1954         return 0;
1955 }
1956
1957 static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1958         uint64_t t;
1959
1960         assert(f);
1961         assert(epoch);
1962         assert(f->authenticate);
1963
1964         if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1965             le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1966                 return -ENOTSUP;
1967
1968         if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1969                 return -ESTALE;
1970
1971         t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1972         t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1973
1974         *epoch = t;
1975         return 0;
1976 }
1977
1978 static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1979         uint64_t goal, epoch;
1980         int r;
1981         assert(f);
1982
1983         if (!f->authenticate)
1984                 return 0;
1985
1986         r = journal_file_get_epoch(f, realtime, &goal);
1987         if (r < 0)
1988                 return r;
1989
1990         epoch = FSPRG_GetEpoch(fsprg_state(f));
1991         if (epoch > goal)
1992                 return -ESTALE;
1993
1994         return epoch != goal;
1995 }
1996
1997 static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1998         uint64_t goal, epoch;
1999         int r;
2000
2001         assert(f);
2002
2003         if (!f->authenticate)
2004                 return 0;
2005
2006         r = journal_file_get_epoch(f, realtime, &goal);
2007         if (r < 0)
2008                 return r;
2009
2010         epoch = FSPRG_GetEpoch(fsprg_state(f));
2011         if (epoch < goal)
2012                 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
2013
2014         for (;;) {
2015                 if (epoch > goal)
2016                         return -ESTALE;
2017                 if (epoch == goal)
2018                         return 0;
2019
2020                 FSPRG_Evolve(fsprg_state(f));
2021                 epoch = FSPRG_GetEpoch(fsprg_state(f));
2022         }
2023 }
2024
2025 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
2026         int r;
2027
2028         assert(f);
2029
2030         if (!f->authenticate)
2031                 return 0;
2032
2033         r = journal_file_need_evolve(f, realtime);
2034         if (r <= 0)
2035                 return 0;
2036
2037         r = journal_file_append_tag(f);
2038         if (r < 0)
2039                 return r;
2040
2041         r = journal_file_evolve(f, realtime);
2042         if (r < 0)
2043                 return r;
2044
2045         r = journal_file_hmac_start(f);
2046         if (r < 0)
2047                 return r;
2048
2049         return 0;
2050 }
2051
2052 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2053         int r;
2054         Object *o;
2055
2056         assert(f);
2057
2058         if (!f->authenticate)
2059                 return 0;
2060
2061         r = journal_file_hmac_start(f);
2062         if (r < 0)
2063                 return r;
2064
2065         r = journal_file_move_to_object(f, type, p, &o);
2066         if (r < 0)
2067                 return r;
2068
2069         gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2070
2071         switch (o->object.type) {
2072
2073         case OBJECT_DATA:
2074                 /* All but: entry_array_offset, n_entries are mutable */
2075                 gcry_md_write(f->hmac, &o->data.hash, offsetof(DataObject, entry_array_offset) - offsetof(DataObject, hash));
2076                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2077                 break;
2078
2079         case OBJECT_ENTRY:
2080                 /* All */
2081                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2082                 break;
2083
2084         case OBJECT_FIELD_HASH_TABLE:
2085         case OBJECT_DATA_HASH_TABLE:
2086         case OBJECT_ENTRY_ARRAY:
2087                 /* Nothing: everything is mutable */
2088                 break;
2089
2090         case OBJECT_TAG:
2091                 /* All */
2092                 gcry_md_write(f->hmac, o->tag.tag, le64toh(o->object.size) - offsetof(TagObject, tag));
2093                 break;
2094
2095         default:
2096                 return -EINVAL;
2097         }
2098
2099         return 0;
2100 }
2101
2102 static int journal_file_hmac_put_header(JournalFile *f) {
2103         int r;
2104
2105         assert(f);
2106
2107         if (!f->authenticate)
2108                 return 0;
2109
2110         r = journal_file_hmac_start(f);
2111         if (r < 0)
2112                 return r;
2113
2114         /* All but state+reserved, boot_id, arena_size,
2115          * tail_object_offset, n_objects, n_entries, tail_seqnum,
2116          * head_entry_realtime, tail_entry_realtime,
2117          * tail_entry_monotonic, n_data, n_fields, header_tag */
2118
2119         gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2120         gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2121         gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2122         gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2123         gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2124
2125         return 0;
2126 }
2127
2128 static int journal_file_load_fsprg(JournalFile *f) {
2129         int r, fd = -1;
2130         char *p = NULL;
2131         struct stat st;
2132         FSPRGHeader *m = NULL;
2133         sd_id128_t machine;
2134
2135         assert(f);
2136
2137         if (!f->authenticate)
2138                 return 0;
2139
2140         r = sd_id128_get_machine(&machine);
2141         if (r < 0)
2142                 return r;
2143
2144         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2145                      SD_ID128_FORMAT_VAL(machine)) < 0)
2146                 return -ENOMEM;
2147
2148         fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2149         if (fd < 0) {
2150                 log_error("Failed to open %s: %m", p);
2151                 r = -errno;
2152                 goto finish;
2153         }
2154
2155         if (fstat(fd, &st) < 0) {
2156                 r = -errno;
2157                 goto finish;
2158         }
2159
2160         if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2161                 r = -ENODATA;
2162                 goto finish;
2163         }
2164
2165         m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2166         if (m == MAP_FAILED) {
2167                 m = NULL;
2168                 r = -errno;
2169                 goto finish;
2170         }
2171
2172         if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2173                 r = -EBADMSG;
2174                 goto finish;
2175         }
2176
2177         if (m->incompatible_flags != 0) {
2178                 r = -EPROTONOSUPPORT;
2179                 goto finish;
2180         }
2181
2182         if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2183                 r = -EBADMSG;
2184                 goto finish;
2185         }
2186
2187         if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2188                 r = -EBADMSG;
2189                 goto finish;
2190         }
2191
2192         f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2193         if ((uint64_t) st.st_size < f->fsprg_size) {
2194                 r = -ENODATA;
2195                 goto finish;
2196         }
2197
2198         if (!sd_id128_equal(machine, m->machine_id)) {
2199                 r = -EHOSTDOWN;
2200                 goto finish;
2201         }
2202
2203         if (le64toh(m->fsprg_start_usec) <= 0 ||
2204             le64toh(m->fsprg_interval_usec) <= 0) {
2205                 r = -EBADMSG;
2206                 goto finish;
2207         }
2208
2209         f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2210         if (f->fsprg_header == MAP_FAILED) {
2211                 f->fsprg_header = NULL;
2212                 r = -errno;
2213                 goto finish;
2214         }
2215
2216         r = 0;
2217
2218 finish:
2219         if (m)
2220                 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2221
2222         if (fd >= 0)
2223                 close_nointr_nofail(fd);
2224
2225         free(p);
2226         return r;
2227 }
2228
2229 static int journal_file_setup_hmac(JournalFile *f) {
2230         gcry_error_t e;
2231
2232         if (!f->authenticate)
2233                 return 0;
2234
2235         e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2236         if (e != 0)
2237                 return -ENOTSUP;
2238
2239         return 0;
2240 }
2241
2242 static int journal_file_append_first_tag(JournalFile *f) {
2243         int r;
2244         uint64_t p;
2245
2246         if (!f->authenticate)
2247                 return 0;
2248
2249         log_debug("Calculating first tag...");
2250
2251         r = journal_file_hmac_put_header(f);
2252         if (r < 0)
2253                 return r;
2254
2255         p = le64toh(f->header->field_hash_table_offset);
2256         if (p < offsetof(Object, hash_table.items))
2257                 return -EINVAL;
2258         p -= offsetof(Object, hash_table.items);
2259
2260         r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2261         if (r < 0)
2262                 return r;
2263
2264         p = le64toh(f->header->data_hash_table_offset);
2265         if (p < offsetof(Object, hash_table.items))
2266                 return -EINVAL;
2267         p -= offsetof(Object, hash_table.items);
2268
2269         r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2270         if (r < 0)
2271                 return r;
2272
2273         r = journal_file_append_tag(f);
2274         if (r < 0)
2275                 return r;
2276
2277         return 0;
2278 }
2279
2280 void journal_file_dump(JournalFile *f) {
2281         Object *o;
2282         int r;
2283         uint64_t p;
2284
2285         assert(f);
2286
2287         journal_file_print_header(f);
2288
2289         p = le64toh(f->header->header_size);
2290         while (p != 0) {
2291                 r = journal_file_move_to_object(f, -1, p, &o);
2292                 if (r < 0)
2293                         goto fail;
2294
2295                 switch (o->object.type) {
2296
2297                 case OBJECT_UNUSED:
2298                         printf("Type: OBJECT_UNUSED\n");
2299                         break;
2300
2301                 case OBJECT_DATA:
2302                         printf("Type: OBJECT_DATA\n");
2303                         break;
2304
2305                 case OBJECT_ENTRY:
2306                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2307                                (unsigned long long) le64toh(o->entry.seqnum),
2308                                (unsigned long long) le64toh(o->entry.monotonic),
2309                                (unsigned long long) le64toh(o->entry.realtime));
2310                         break;
2311
2312                 case OBJECT_FIELD_HASH_TABLE:
2313                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2314                         break;
2315
2316                 case OBJECT_DATA_HASH_TABLE:
2317                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2318                         break;
2319
2320                 case OBJECT_ENTRY_ARRAY:
2321                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2322                         break;
2323
2324                 case OBJECT_TAG:
2325                         printf("Type: OBJECT_TAG\n");
2326                         break;
2327                 }
2328
2329                 if (o->object.flags & OBJECT_COMPRESSED)
2330                         printf("Flags: COMPRESSED\n");
2331
2332                 if (p == le64toh(f->header->tail_object_offset))
2333                         p = 0;
2334                 else
2335                         p = p + ALIGN64(le64toh(o->object.size));
2336         }
2337
2338         return;
2339 fail:
2340         log_error("File corrupt");
2341 }
2342
2343 void journal_file_print_header(JournalFile *f) {
2344         char a[33], b[33], c[33];
2345         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2346
2347         assert(f);
2348
2349         printf("File Path: %s\n"
2350                "File ID: %s\n"
2351                "Machine ID: %s\n"
2352                "Boot ID: %s\n"
2353                "Sequential Number ID: %s\n"
2354                "State: %s\n"
2355                "Compatible Flags:%s%s\n"
2356                "Incompatible Flags:%s%s\n"
2357                "Header size: %llu\n"
2358                "Arena size: %llu\n"
2359                "Data Hash Table Size: %llu\n"
2360                "Field Hash Table Size: %llu\n"
2361                "Objects: %llu\n"
2362                "Entry Objects: %llu\n"
2363                "Rotate Suggested: %s\n"
2364                "Head Sequential Number: %llu\n"
2365                "Tail Sequential Number: %llu\n"
2366                "Head Realtime Timestamp: %s\n"
2367                "Tail Realtime Timestamp: %s\n",
2368                f->path,
2369                sd_id128_to_string(f->header->file_id, a),
2370                sd_id128_to_string(f->header->machine_id, b),
2371                sd_id128_to_string(f->header->boot_id, c),
2372                sd_id128_to_string(f->header->seqnum_id, c),
2373                f->header->state == STATE_OFFLINE ? "offline" :
2374                f->header->state == STATE_ONLINE ? "online" :
2375                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
2376                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2377                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
2378                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2379                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
2380                (unsigned long long) le64toh(f->header->header_size),
2381                (unsigned long long) le64toh(f->header->arena_size),
2382                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2383                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2384                (unsigned long long) le64toh(f->header->n_objects),
2385                (unsigned long long) le64toh(f->header->n_entries),
2386                yes_no(journal_file_rotate_suggested(f)),
2387                (unsigned long long) le64toh(f->header->head_seqnum),
2388                (unsigned long long) le64toh(f->header->tail_seqnum),
2389                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2390                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2391
2392         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2393                 printf("Data Objects: %llu\n"
2394                        "Data Hash Table Fill: %.1f%%\n",
2395                        (unsigned long long) le64toh(f->header->n_data),
2396                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2397
2398         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2399                 printf("Field Objects: %llu\n"
2400                        "Field Hash Table Fill: %.1f%%\n",
2401                        (unsigned long long) le64toh(f->header->n_fields),
2402                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2403 }
2404
2405 int journal_file_open(
2406                 const char *fname,
2407                 int flags,
2408                 mode_t mode,
2409                 bool compress,
2410                 bool authenticate,
2411                 JournalMetrics *metrics,
2412                 JournalFile *template,
2413                 JournalFile **ret) {
2414
2415         JournalFile *f;
2416         int r;
2417         bool newly_created = false;
2418
2419         assert(fname);
2420
2421         if ((flags & O_ACCMODE) != O_RDONLY &&
2422             (flags & O_ACCMODE) != O_RDWR)
2423                 return -EINVAL;
2424
2425         if (!endswith(fname, ".journal"))
2426                 return -EINVAL;
2427
2428         f = new0(JournalFile, 1);
2429         if (!f)
2430                 return -ENOMEM;
2431
2432         f->fd = -1;
2433         f->mode = mode;
2434
2435         f->flags = flags;
2436         f->prot = prot_from_flags(flags);
2437         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2438         f->compress = compress;
2439         f->authenticate = authenticate;
2440
2441         f->path = strdup(fname);
2442         if (!f->path) {
2443                 r = -ENOMEM;
2444                 goto fail;
2445         }
2446
2447         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2448         if (f->fd < 0) {
2449                 r = -errno;
2450                 goto fail;
2451         }
2452
2453         if (fstat(f->fd, &f->last_stat) < 0) {
2454                 r = -errno;
2455                 goto fail;
2456         }
2457
2458         if (f->last_stat.st_size == 0 && f->writable) {
2459                 newly_created = true;
2460
2461                 /* Try to load the FSPRG state, and if we can't, then
2462                  * just don't do authentication */
2463                 r = journal_file_load_fsprg(f);
2464                 if (r < 0)
2465                         f->authenticate = false;
2466
2467                 r = journal_file_init_header(f, template);
2468                 if (r < 0)
2469                         goto fail;
2470
2471                 if (fstat(f->fd, &f->last_stat) < 0) {
2472                         r = -errno;
2473                         goto fail;
2474                 }
2475         }
2476
2477         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2478                 r = -EIO;
2479                 goto fail;
2480         }
2481
2482         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2483         if (f->header == MAP_FAILED) {
2484                 f->header = NULL;
2485                 r = -errno;
2486                 goto fail;
2487         }
2488
2489         if (!newly_created) {
2490                 r = journal_file_verify_header(f);
2491                 if (r < 0)
2492                         goto fail;
2493         }
2494
2495         if (!newly_created && f->writable) {
2496                 r = journal_file_load_fsprg(f);
2497                 if (r < 0)
2498                         goto fail;
2499         }
2500
2501         if (f->writable) {
2502                 if (metrics) {
2503                         journal_default_metrics(metrics, f->fd);
2504                         f->metrics = *metrics;
2505                 } else if (template)
2506                         f->metrics = template->metrics;
2507
2508                 r = journal_file_refresh_header(f);
2509                 if (r < 0)
2510                         goto fail;
2511
2512                 r = journal_file_setup_hmac(f);
2513                 if (r < 0)
2514                         goto fail;
2515         }
2516
2517         if (newly_created) {
2518                 r = journal_file_setup_field_hash_table(f);
2519                 if (r < 0)
2520                         goto fail;
2521
2522                 r = journal_file_setup_data_hash_table(f);
2523                 if (r < 0)
2524                         goto fail;
2525
2526                 r = journal_file_append_first_tag(f);
2527                 if (r < 0)
2528                         goto fail;
2529         }
2530
2531         r = journal_file_map_field_hash_table(f);
2532         if (r < 0)
2533                 goto fail;
2534
2535         r = journal_file_map_data_hash_table(f);
2536         if (r < 0)
2537                 goto fail;
2538
2539         if (ret)
2540                 *ret = f;
2541
2542         return 0;
2543
2544 fail:
2545         journal_file_close(f);
2546
2547         return r;
2548 }
2549
2550 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2551         char *p;
2552         size_t l;
2553         JournalFile *old_file, *new_file = NULL;
2554         int r;
2555
2556         assert(f);
2557         assert(*f);
2558
2559         old_file = *f;
2560
2561         if (!old_file->writable)
2562                 return -EINVAL;
2563
2564         if (!endswith(old_file->path, ".journal"))
2565                 return -EINVAL;
2566
2567         l = strlen(old_file->path);
2568
2569         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2570         if (!p)
2571                 return -ENOMEM;
2572
2573         memcpy(p, old_file->path, l - 8);
2574         p[l-8] = '@';
2575         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2576         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2577                  "-%016llx-%016llx.journal",
2578                  (unsigned long long) le64toh((*f)->header->tail_seqnum),
2579                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2580
2581         r = rename(old_file->path, p);
2582         free(p);
2583
2584         if (r < 0)
2585                 return -errno;
2586
2587         old_file->header->state = STATE_ARCHIVED;
2588
2589         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
2590         journal_file_close(old_file);
2591
2592         *f = new_file;
2593         return r;
2594 }
2595
2596 int journal_file_open_reliably(
2597                 const char *fname,
2598                 int flags,
2599                 mode_t mode,
2600                 bool compress,
2601                 bool authenticate,
2602                 JournalMetrics *metrics,
2603                 JournalFile *template,
2604                 JournalFile **ret) {
2605
2606         int r;
2607         size_t l;
2608         char *p;
2609
2610         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2611         if (r != -EBADMSG && /* corrupted */
2612             r != -ENODATA && /* truncated */
2613             r != -EHOSTDOWN && /* other machine */
2614             r != -EPROTONOSUPPORT && /* incompatible feature */
2615             r != -EBUSY && /* unclean shutdown */
2616             r != -ESHUTDOWN /* already archived */)
2617                 return r;
2618
2619         if ((flags & O_ACCMODE) == O_RDONLY)
2620                 return r;
2621
2622         if (!(flags & O_CREAT))
2623                 return r;
2624
2625         if (!endswith(fname, ".journal"))
2626                 return r;
2627
2628         /* The file is corrupted. Rotate it away and try it again (but only once) */
2629
2630         l = strlen(fname);
2631         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2632                      (int) (l-8), fname,
2633                      (unsigned long long) now(CLOCK_REALTIME),
2634                      random_ull()) < 0)
2635                 return -ENOMEM;
2636
2637         r = rename(fname, p);
2638         free(p);
2639         if (r < 0)
2640                 return -errno;
2641
2642         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2643
2644         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2645 }
2646
2647 struct vacuum_info {
2648         off_t usage;
2649         char *filename;
2650
2651         uint64_t realtime;
2652         sd_id128_t seqnum_id;
2653         uint64_t seqnum;
2654
2655         bool have_seqnum;
2656 };
2657
2658 static int vacuum_compare(const void *_a, const void *_b) {
2659         const struct vacuum_info *a, *b;
2660
2661         a = _a;
2662         b = _b;
2663
2664         if (a->have_seqnum && b->have_seqnum &&
2665             sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2666                 if (a->seqnum < b->seqnum)
2667                         return -1;
2668                 else if (a->seqnum > b->seqnum)
2669                         return 1;
2670                 else
2671                         return 0;
2672         }
2673
2674         if (a->realtime < b->realtime)
2675                 return -1;
2676         else if (a->realtime > b->realtime)
2677                 return 1;
2678         else if (a->have_seqnum && b->have_seqnum)
2679                 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2680         else
2681                 return strcmp(a->filename, b->filename);
2682 }
2683
2684 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2685         DIR *d;
2686         int r = 0;
2687         struct vacuum_info *list = NULL;
2688         unsigned n_list = 0, n_allocated = 0, i;
2689         uint64_t sum = 0;
2690
2691         assert(directory);
2692
2693         if (max_use <= 0)
2694                 return 0;
2695
2696         d = opendir(directory);
2697         if (!d)
2698                 return -errno;
2699
2700         for (;;) {
2701                 int k;
2702                 struct dirent buf, *de;
2703                 size_t q;
2704                 struct stat st;
2705                 char *p;
2706                 unsigned long long seqnum = 0, realtime;
2707                 sd_id128_t seqnum_id;
2708                 bool have_seqnum;
2709
2710                 k = readdir_r(d, &buf, &de);
2711                 if (k != 0) {
2712                         r = -k;
2713                         goto finish;
2714                 }
2715
2716                 if (!de)
2717                         break;
2718
2719                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2720                         continue;
2721
2722                 if (!S_ISREG(st.st_mode))
2723                         continue;
2724
2725                 q = strlen(de->d_name);
2726
2727                 if (endswith(de->d_name, ".journal")) {
2728
2729                         /* Vacuum archived files */
2730
2731                         if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2732                                 continue;
2733
2734                         if (de->d_name[q-8-16-1] != '-' ||
2735                             de->d_name[q-8-16-1-16-1] != '-' ||
2736                             de->d_name[q-8-16-1-16-1-32-1] != '@')
2737                                 continue;
2738
2739                         p = strdup(de->d_name);
2740                         if (!p) {
2741                                 r = -ENOMEM;
2742                                 goto finish;
2743                         }
2744
2745                         de->d_name[q-8-16-1-16-1] = 0;
2746                         if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2747                                 free(p);
2748                                 continue;
2749                         }
2750
2751                         if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2752                                 free(p);
2753                                 continue;
2754                         }
2755
2756                         have_seqnum = true;
2757
2758                 } else if (endswith(de->d_name, ".journal~")) {
2759                         unsigned long long tmp;
2760
2761                         /* Vacuum corrupted files */
2762
2763                         if (q < 1 + 16 + 1 + 16 + 8 + 1)
2764                                 continue;
2765
2766                         if (de->d_name[q-1-8-16-1] != '-' ||
2767                             de->d_name[q-1-8-16-1-16-1] != '@')
2768                                 continue;
2769
2770                         p = strdup(de->d_name);
2771                         if (!p) {
2772                                 r = -ENOMEM;
2773                                 goto finish;
2774                         }
2775
2776                         if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2777                                 free(p);
2778                                 continue;
2779                         }
2780
2781                         have_seqnum = false;
2782                 } else
2783                         continue;
2784
2785                 if (n_list >= n_allocated) {
2786                         struct vacuum_info *j;
2787
2788                         n_allocated = MAX(n_allocated * 2U, 8U);
2789                         j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2790                         if (!j) {
2791                                 free(p);
2792                                 r = -ENOMEM;
2793                                 goto finish;
2794                         }
2795
2796                         list = j;
2797                 }
2798
2799                 list[n_list].filename = p;
2800                 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2801                 list[n_list].seqnum = seqnum;
2802                 list[n_list].realtime = realtime;
2803                 list[n_list].seqnum_id = seqnum_id;
2804                 list[n_list].have_seqnum = have_seqnum;
2805
2806                 sum += list[n_list].usage;
2807
2808                 n_list ++;
2809         }
2810
2811         if (n_list > 0)
2812                 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2813
2814         for(i = 0; i < n_list; i++) {
2815                 struct statvfs ss;
2816
2817                 if (fstatvfs(dirfd(d), &ss) < 0) {
2818                         r = -errno;
2819                         goto finish;
2820                 }
2821
2822                 if (sum <= max_use &&
2823                     (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2824                         break;
2825
2826                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2827                         log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2828                         sum -= list[i].usage;
2829                 } else if (errno != ENOENT)
2830                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2831         }
2832
2833 finish:
2834         for (i = 0; i < n_list; i++)
2835                 free(list[i].filename);
2836
2837         free(list);
2838
2839         if (d)
2840                 closedir(d);
2841
2842         return r;
2843 }
2844
2845 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2846         uint64_t i, n;
2847         uint64_t q, xor_hash = 0;
2848         int r;
2849         EntryItem *items;
2850         dual_timestamp ts;
2851
2852         assert(from);
2853         assert(to);
2854         assert(o);
2855         assert(p);
2856
2857         if (!to->writable)
2858                 return -EPERM;
2859
2860         ts.monotonic = le64toh(o->entry.monotonic);
2861         ts.realtime = le64toh(o->entry.realtime);
2862
2863         if (to->tail_entry_monotonic_valid &&
2864             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2865                 return -EINVAL;
2866
2867         n = journal_file_entry_n_items(o);
2868         items = alloca(sizeof(EntryItem) * n);
2869
2870         for (i = 0; i < n; i++) {
2871                 uint64_t l, h;
2872                 le64_t le_hash;
2873                 size_t t;
2874                 void *data;
2875                 Object *u;
2876
2877                 q = le64toh(o->entry.items[i].object_offset);
2878                 le_hash = o->entry.items[i].hash;
2879
2880                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2881                 if (r < 0)
2882                         return r;
2883
2884                 if (le_hash != o->data.hash)
2885                         return -EBADMSG;
2886
2887                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2888                 t = (size_t) l;
2889
2890                 /* We hit the limit on 32bit machines */
2891                 if ((uint64_t) t != l)
2892                         return -E2BIG;
2893
2894                 if (o->object.flags & OBJECT_COMPRESSED) {
2895 #ifdef HAVE_XZ
2896                         uint64_t rsize;
2897
2898                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2899                                 return -EBADMSG;
2900
2901                         data = from->compress_buffer;
2902                         l = rsize;
2903 #else
2904                         return -EPROTONOSUPPORT;
2905 #endif
2906                 } else
2907                         data = o->data.payload;
2908
2909                 r = journal_file_append_data(to, data, l, &u, &h);
2910                 if (r < 0)
2911                         return r;
2912
2913                 xor_hash ^= le64toh(u->data.hash);
2914                 items[i].object_offset = htole64(h);
2915                 items[i].hash = u->data.hash;
2916
2917                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2918                 if (r < 0)
2919                         return r;
2920         }
2921
2922         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2923 }
2924
2925 void journal_default_metrics(JournalMetrics *m, int fd) {
2926         uint64_t fs_size = 0;
2927         struct statvfs ss;
2928         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2929
2930         assert(m);
2931         assert(fd >= 0);
2932
2933         if (fstatvfs(fd, &ss) >= 0)
2934                 fs_size = ss.f_frsize * ss.f_blocks;
2935
2936         if (m->max_use == (uint64_t) -1) {
2937
2938                 if (fs_size > 0) {
2939                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2940
2941                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2942                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2943
2944                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2945                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2946                 } else
2947                         m->max_use = DEFAULT_MAX_USE_LOWER;
2948         } else {
2949                 m->max_use = PAGE_ALIGN(m->max_use);
2950
2951                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2952                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2953         }
2954
2955         if (m->max_size == (uint64_t) -1) {
2956                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2957
2958                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2959                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2960         } else
2961                 m->max_size = PAGE_ALIGN(m->max_size);
2962
2963         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2964                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2965
2966         if (m->max_size*2 > m->max_use)
2967                 m->max_use = m->max_size*2;
2968
2969         if (m->min_size == (uint64_t) -1)
2970                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2971         else {
2972                 m->min_size = PAGE_ALIGN(m->min_size);
2973
2974                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2975                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2976
2977                 if (m->min_size > m->max_size)
2978                         m->max_size = m->min_size;
2979         }
2980
2981         if (m->keep_free == (uint64_t) -1) {
2982
2983                 if (fs_size > 0) {
2984                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2985
2986                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2987                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2988
2989                 } else
2990                         m->keep_free = DEFAULT_KEEP_FREE;
2991         }
2992
2993         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2994                  format_bytes(a, sizeof(a), m->max_use),
2995                  format_bytes(b, sizeof(b), m->max_size),
2996                  format_bytes(c, sizeof(c), m->min_size),
2997                  format_bytes(d, sizeof(d), m->keep_free));
2998 }
2999
3000 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
3001         assert(f);
3002         assert(from || to);
3003
3004         if (from) {
3005                 if (f->header->head_entry_realtime == 0)
3006                         return -ENOENT;
3007
3008                 *from = le64toh(f->header->head_entry_realtime);
3009         }
3010
3011         if (to) {
3012                 if (f->header->tail_entry_realtime == 0)
3013                         return -ENOENT;
3014
3015                 *to = le64toh(f->header->tail_entry_realtime);
3016         }
3017
3018         return 1;
3019 }
3020
3021 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3022         char t[9+32+1] = "_BOOT_ID=";
3023         Object *o;
3024         uint64_t p;
3025         int r;
3026
3027         assert(f);
3028         assert(from || to);
3029
3030         sd_id128_to_string(boot_id, t + 9);
3031
3032         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3033         if (r <= 0)
3034                 return r;
3035
3036         if (le64toh(o->data.n_entries) <= 0)
3037                 return 0;
3038
3039         if (from) {
3040                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3041                 if (r < 0)
3042                         return r;
3043
3044                 *from = le64toh(o->entry.monotonic);
3045         }
3046
3047         if (to) {
3048                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3049                 if (r < 0)
3050                         return r;
3051
3052                 r = generic_array_get_plus_one(f,
3053                                                le64toh(o->data.entry_offset),
3054                                                le64toh(o->data.entry_array_offset),
3055                                                le64toh(o->data.n_entries)-1,
3056                                                &o, NULL);
3057                 if (r <= 0)
3058                         return r;
3059
3060                 *to = le64toh(o->entry.monotonic);
3061         }
3062
3063         return 1;
3064 }
3065
3066 bool journal_file_rotate_suggested(JournalFile *f) {
3067         assert(f);
3068
3069         /* If we gained new header fields we gained new features,
3070          * hence suggest a rotation */
3071         if (le64toh(f->header->header_size) < sizeof(Header)) {
3072                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3073                 return true;
3074         }
3075
3076         /* Let's check if the hash tables grew over a certain fill
3077          * level (75%, borrowing this value from Java's hash table
3078          * implementation), and if so suggest a rotation. To calculate
3079          * the fill level we need the n_data field, which only exists
3080          * in newer versions. */
3081
3082         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3083                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3084                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3085                                   f->path,
3086                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3087                                   (unsigned long long) le64toh(f->header->n_data),
3088                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3089                                   (unsigned long long) (f->last_stat.st_size),
3090                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
3091                         return true;
3092                 }
3093
3094         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3095                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3096                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3097                                   f->path,
3098                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3099                                   (unsigned long long) le64toh(f->header->n_fields),
3100                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
3101                         return true;
3102                 }
3103
3104         return false;
3105 }