chiark / gitweb /
journal: implement generic sharable mmap caching logic
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "lookup3.h"
33 #include "compress.h"
34 #include "fsprg.h"
35
36 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
38
39 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
40
41 /* This is the minimum journal file size */
42 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
43
44 /* These are the lower and upper bounds if we deduce the max_use value
45  * from the file system size */
46 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
47 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
48
49 /* This is the upper bound if we deduce max_size from max_use */
50 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
51
52 /* This is the upper bound if we deduce the keep_free value from the
53  * file system size */
54 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
55
56 /* This is the keep_free value when we can't determine the system
57  * size */
58 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
59
60 /* n_data was the first entry we added after the initial file format design */
61 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
62
63 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
64
65 #define JOURNAL_HEADER_CONTAINS(h, field) \
66         (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
67
68 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
69 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
70
71 void journal_file_close(JournalFile *f) {
72         assert(f);
73
74         /* Write the final tag */
75         if (f->authenticate)
76                 journal_file_append_tag(f);
77
78         /* Sync everything to disk, before we mark the file offline */
79         if (f->mmap && f->fd >= 0)
80                 mmap_cache_close_fd(f->mmap, f->fd);
81
82         if (f->writable && f->fd >= 0)
83                 fdatasync(f->fd);
84
85         if (f->header) {
86                 /* Mark the file offline. Don't override the archived state if it already is set */
87                 if (f->writable && f->header->state == STATE_ONLINE)
88                         f->header->state = STATE_OFFLINE;
89
90                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
91         }
92
93         if (f->fd >= 0)
94                 close_nointr_nofail(f->fd);
95
96         free(f->path);
97
98         if (f->mmap)
99                 mmap_cache_unref(f->mmap);
100
101 #ifdef HAVE_XZ
102         free(f->compress_buffer);
103 #endif
104
105 #ifdef HAVE_GCRYPT
106         if (f->fsprg_header)
107                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
108
109         if (f->hmac)
110                 gcry_md_close(f->hmac);
111 #endif
112
113         free(f);
114 }
115
116 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
117         Header h;
118         ssize_t k;
119         int r;
120
121         assert(f);
122
123         zero(h);
124         memcpy(h.signature, HEADER_SIGNATURE, 8);
125         h.header_size = htole64(ALIGN64(sizeof(h)));
126
127         h.incompatible_flags =
128                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
129
130         h.compatible_flags =
131                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
132
133         r = sd_id128_randomize(&h.file_id);
134         if (r < 0)
135                 return r;
136
137         if (template) {
138                 h.seqnum_id = template->header->seqnum_id;
139                 h.tail_seqnum = template->header->tail_seqnum;
140         } else
141                 h.seqnum_id = h.file_id;
142
143         k = pwrite(f->fd, &h, sizeof(h), 0);
144         if (k < 0)
145                 return -errno;
146
147         if (k != sizeof(h))
148                 return -EIO;
149
150         return 0;
151 }
152
153 static int journal_file_refresh_header(JournalFile *f) {
154         int r;
155         sd_id128_t boot_id;
156
157         assert(f);
158
159         r = sd_id128_get_machine(&f->header->machine_id);
160         if (r < 0)
161                 return r;
162
163         r = sd_id128_get_boot(&boot_id);
164         if (r < 0)
165                 return r;
166
167         if (sd_id128_equal(boot_id, f->header->boot_id))
168                 f->tail_entry_monotonic_valid = true;
169
170         f->header->boot_id = boot_id;
171
172         f->header->state = STATE_ONLINE;
173
174         /* Sync the online state to disk */
175         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
176         fdatasync(f->fd);
177
178         return 0;
179 }
180
181 static int journal_file_verify_header(JournalFile *f) {
182         assert(f);
183
184         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
185                 return -EBADMSG;
186
187         /* In both read and write mode we refuse to open files with
188          * incompatible flags we don't know */
189 #ifdef HAVE_XZ
190         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
191                 return -EPROTONOSUPPORT;
192 #else
193         if (f->header->incompatible_flags != 0)
194                 return -EPROTONOSUPPORT;
195 #endif
196
197         /* When open for writing we refuse to open files with
198          * compatible flags, too */
199         if (f->writable) {
200 #ifdef HAVE_GCRYPT
201                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
202                         return -EPROTONOSUPPORT;
203 #else
204                 if (f->header->compatible_flags != 0)
205                         return -EPROTONOSUPPORT;
206 #endif
207         }
208
209         /* The first addition was n_data, so check that we are at least this large */
210         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
211                 return -EBADMSG;
212
213         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
214                 return -ENODATA;
215
216         if (f->writable) {
217                 uint8_t state;
218                 sd_id128_t machine_id;
219                 int r;
220
221                 r = sd_id128_get_machine(&machine_id);
222                 if (r < 0)
223                         return r;
224
225                 if (!sd_id128_equal(machine_id, f->header->machine_id))
226                         return -EHOSTDOWN;
227
228                 state = f->header->state;
229
230                 if (state == STATE_ONLINE) {
231                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
232                         return -EBUSY;
233                 } else if (state == STATE_ARCHIVED)
234                         return -ESHUTDOWN;
235                 else if (state != STATE_OFFLINE) {
236                         log_debug("Journal file %s has unknown state %u.", f->path, state);
237                         return -EBUSY;
238                 }
239         }
240
241         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
242         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
243
244         return 0;
245 }
246
247 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
248         uint64_t old_size, new_size;
249         int r;
250
251         assert(f);
252
253         /* We assume that this file is not sparse, and we know that
254          * for sure, since we always call posix_fallocate()
255          * ourselves */
256
257         old_size =
258                 le64toh(f->header->header_size) +
259                 le64toh(f->header->arena_size);
260
261         new_size = PAGE_ALIGN(offset + size);
262         if (new_size < le64toh(f->header->header_size))
263                 new_size = le64toh(f->header->header_size);
264
265         if (new_size <= old_size)
266                 return 0;
267
268         if (f->metrics.max_size > 0 &&
269             new_size > f->metrics.max_size)
270                 return -E2BIG;
271
272         if (new_size > f->metrics.min_size &&
273             f->metrics.keep_free > 0) {
274                 struct statvfs svfs;
275
276                 if (fstatvfs(f->fd, &svfs) >= 0) {
277                         uint64_t available;
278
279                         available = svfs.f_bfree * svfs.f_bsize;
280
281                         if (available >= f->metrics.keep_free)
282                                 available -= f->metrics.keep_free;
283                         else
284                                 available = 0;
285
286                         if (new_size - old_size > available)
287                                 return -E2BIG;
288                 }
289         }
290
291         /* Note that the glibc fallocate() fallback is very
292            inefficient, hence we try to minimize the allocation area
293            as we can. */
294         r = posix_fallocate(f->fd, old_size, new_size - old_size);
295         if (r != 0)
296                 return -r;
297
298         if (fstat(f->fd, &f->last_stat) < 0)
299                 return -errno;
300
301         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
302
303         return 0;
304 }
305
306 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
307         assert(f);
308         assert(ret);
309
310         /* Avoid SIGBUS on invalid accesses */
311         if (offset + size > (uint64_t) f->last_stat.st_size) {
312                 /* Hmm, out of range? Let's refresh the fstat() data
313                  * first, before we trust that check. */
314
315                 if (fstat(f->fd, &f->last_stat) < 0 ||
316                     offset + size > (uint64_t) f->last_stat.st_size)
317                         return -EADDRNOTAVAIL;
318         }
319
320         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
321 }
322
323 static bool verify_hash(Object *o) {
324         uint64_t h1, h2;
325
326         assert(o);
327
328         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
329                 h1 = le64toh(o->data.hash);
330                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
331         } else if (o->object.type == OBJECT_FIELD) {
332                 h1 = le64toh(o->field.hash);
333                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
334         } else
335                 return true;
336
337         return h1 == h2;
338 }
339
340 static uint64_t minimum_header_size(Object *o) {
341
342         static uint64_t table[] = {
343                 [OBJECT_DATA] = sizeof(DataObject),
344                 [OBJECT_FIELD] = sizeof(FieldObject),
345                 [OBJECT_ENTRY] = sizeof(EntryObject),
346                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349                 [OBJECT_TAG] = sizeof(TagObject),
350         };
351
352         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353                 return sizeof(ObjectHeader);
354
355         return table[o->object.type];
356 }
357
358 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
359         int r;
360         void *t;
361         Object *o;
362         uint64_t s;
363         unsigned context;
364
365         assert(f);
366         assert(ret);
367
368         /* One context for each type, plus one catch-all for the rest */
369         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
370
371         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
372         if (r < 0)
373                 return r;
374
375         o = (Object*) t;
376         s = le64toh(o->object.size);
377
378         if (s < sizeof(ObjectHeader))
379                 return -EBADMSG;
380
381         if (o->object.type <= OBJECT_UNUSED)
382                 return -EBADMSG;
383
384         if (s < minimum_header_size(o))
385                 return -EBADMSG;
386
387         if (type >= 0 && o->object.type != type)
388                 return -EBADMSG;
389
390         if (s > sizeof(ObjectHeader)) {
391                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
392                 if (r < 0)
393                         return r;
394
395                 o = (Object*) t;
396         }
397
398         if (!verify_hash(o))
399                 return -EBADMSG;
400
401         *ret = o;
402         return 0;
403 }
404
405 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
406         uint64_t r;
407
408         assert(f);
409
410         r = le64toh(f->header->tail_seqnum) + 1;
411
412         if (seqnum) {
413                 /* If an external seqnum counter was passed, we update
414                  * both the local and the external one, and set it to
415                  * the maximum of both */
416
417                 if (*seqnum + 1 > r)
418                         r = *seqnum + 1;
419
420                 *seqnum = r;
421         }
422
423         f->header->tail_seqnum = htole64(r);
424
425         if (f->header->head_seqnum == 0)
426                 f->header->head_seqnum = htole64(r);
427
428         return r;
429 }
430
431 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
432         int r;
433         uint64_t p;
434         Object *tail, *o;
435         void *t;
436
437         assert(f);
438         assert(type > 0 && type < _OBJECT_TYPE_MAX);
439         assert(size >= sizeof(ObjectHeader));
440         assert(offset);
441         assert(ret);
442
443         p = le64toh(f->header->tail_object_offset);
444         if (p == 0)
445                 p = le64toh(f->header->header_size);
446         else {
447                 r = journal_file_move_to_object(f, -1, p, &tail);
448                 if (r < 0)
449                         return r;
450
451                 p += ALIGN64(le64toh(tail->object.size));
452         }
453
454         r = journal_file_allocate(f, p, size);
455         if (r < 0)
456                 return r;
457
458         r = journal_file_move_to(f, type, p, size, &t);
459         if (r < 0)
460                 return r;
461
462         o = (Object*) t;
463
464         zero(o->object);
465         o->object.type = type;
466         o->object.size = htole64(size);
467
468         f->header->tail_object_offset = htole64(p);
469         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
470
471         *ret = o;
472         *offset = p;
473
474         return 0;
475 }
476
477 static int journal_file_setup_data_hash_table(JournalFile *f) {
478         uint64_t s, p;
479         Object *o;
480         int r;
481
482         assert(f);
483
484         /* We estimate that we need 1 hash table entry per 768 of
485            journal file and we want to make sure we never get beyond
486            75% fill level. Calculate the hash table size for the
487            maximum file size based on these metrics. */
488
489         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
490         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
491                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
492
493         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
494
495         r = journal_file_append_object(f,
496                                        OBJECT_DATA_HASH_TABLE,
497                                        offsetof(Object, hash_table.items) + s,
498                                        &o, &p);
499         if (r < 0)
500                 return r;
501
502         memset(o->hash_table.items, 0, s);
503
504         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
505         f->header->data_hash_table_size = htole64(s);
506
507         return 0;
508 }
509
510 static int journal_file_setup_field_hash_table(JournalFile *f) {
511         uint64_t s, p;
512         Object *o;
513         int r;
514
515         assert(f);
516
517         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
518         r = journal_file_append_object(f,
519                                        OBJECT_FIELD_HASH_TABLE,
520                                        offsetof(Object, hash_table.items) + s,
521                                        &o, &p);
522         if (r < 0)
523                 return r;
524
525         memset(o->hash_table.items, 0, s);
526
527         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
528         f->header->field_hash_table_size = htole64(s);
529
530         return 0;
531 }
532
533 static int journal_file_map_data_hash_table(JournalFile *f) {
534         uint64_t s, p;
535         void *t;
536         int r;
537
538         assert(f);
539
540         p = le64toh(f->header->data_hash_table_offset);
541         s = le64toh(f->header->data_hash_table_size);
542
543         r = journal_file_move_to(f,
544                                  OBJECT_DATA_HASH_TABLE,
545                                  p, s,
546                                  &t);
547         if (r < 0)
548                 return r;
549
550         f->data_hash_table = t;
551         return 0;
552 }
553
554 static int journal_file_map_field_hash_table(JournalFile *f) {
555         uint64_t s, p;
556         void *t;
557         int r;
558
559         assert(f);
560
561         p = le64toh(f->header->field_hash_table_offset);
562         s = le64toh(f->header->field_hash_table_size);
563
564         r = journal_file_move_to(f,
565                                  OBJECT_FIELD_HASH_TABLE,
566                                  p, s,
567                                  &t);
568         if (r < 0)
569                 return r;
570
571         f->field_hash_table = t;
572         return 0;
573 }
574
575 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
576         uint64_t p, h;
577         int r;
578
579         assert(f);
580         assert(o);
581         assert(offset > 0);
582         assert(o->object.type == OBJECT_DATA);
583
584         /* This might alter the window we are looking at */
585
586         o->data.next_hash_offset = o->data.next_field_offset = 0;
587         o->data.entry_offset = o->data.entry_array_offset = 0;
588         o->data.n_entries = 0;
589
590         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
591         p = le64toh(f->data_hash_table[h].tail_hash_offset);
592         if (p == 0) {
593                 /* Only entry in the hash table is easy */
594                 f->data_hash_table[h].head_hash_offset = htole64(offset);
595         } else {
596                 /* Move back to the previous data object, to patch in
597                  * pointer */
598
599                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
600                 if (r < 0)
601                         return r;
602
603                 o->data.next_hash_offset = htole64(offset);
604         }
605
606         f->data_hash_table[h].tail_hash_offset = htole64(offset);
607
608         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
609                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
610
611         return 0;
612 }
613
614 int journal_file_find_data_object_with_hash(
615                 JournalFile *f,
616                 const void *data, uint64_t size, uint64_t hash,
617                 Object **ret, uint64_t *offset) {
618
619         uint64_t p, osize, h;
620         int r;
621
622         assert(f);
623         assert(data || size == 0);
624
625         osize = offsetof(Object, data.payload) + size;
626
627         if (f->header->data_hash_table_size == 0)
628                 return -EBADMSG;
629
630         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
631         p = le64toh(f->data_hash_table[h].head_hash_offset);
632
633         while (p > 0) {
634                 Object *o;
635
636                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
637                 if (r < 0)
638                         return r;
639
640                 if (le64toh(o->data.hash) != hash)
641                         goto next;
642
643                 if (o->object.flags & OBJECT_COMPRESSED) {
644 #ifdef HAVE_XZ
645                         uint64_t l, rsize;
646
647                         l = le64toh(o->object.size);
648                         if (l <= offsetof(Object, data.payload))
649                                 return -EBADMSG;
650
651                         l -= offsetof(Object, data.payload);
652
653                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
654                                 return -EBADMSG;
655
656                         if (rsize == size &&
657                             memcmp(f->compress_buffer, data, size) == 0) {
658
659                                 if (ret)
660                                         *ret = o;
661
662                                 if (offset)
663                                         *offset = p;
664
665                                 return 1;
666                         }
667 #else
668                         return -EPROTONOSUPPORT;
669 #endif
670
671                 } else if (le64toh(o->object.size) == osize &&
672                            memcmp(o->data.payload, data, size) == 0) {
673
674                         if (ret)
675                                 *ret = o;
676
677                         if (offset)
678                                 *offset = p;
679
680                         return 1;
681                 }
682
683         next:
684                 p = le64toh(o->data.next_hash_offset);
685         }
686
687         return 0;
688 }
689
690 int journal_file_find_data_object(
691                 JournalFile *f,
692                 const void *data, uint64_t size,
693                 Object **ret, uint64_t *offset) {
694
695         uint64_t hash;
696
697         assert(f);
698         assert(data || size == 0);
699
700         hash = hash64(data, size);
701
702         return journal_file_find_data_object_with_hash(f,
703                                                        data, size, hash,
704                                                        ret, offset);
705 }
706
707 static int journal_file_append_data(
708                 JournalFile *f,
709                 const void *data, uint64_t size,
710                 Object **ret, uint64_t *offset) {
711
712         uint64_t hash, p;
713         uint64_t osize;
714         Object *o;
715         int r;
716         bool compressed = false;
717
718         assert(f);
719         assert(data || size == 0);
720
721         hash = hash64(data, size);
722
723         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
724         if (r < 0)
725                 return r;
726         else if (r > 0) {
727
728                 if (ret)
729                         *ret = o;
730
731                 if (offset)
732                         *offset = p;
733
734                 return 0;
735         }
736
737         osize = offsetof(Object, data.payload) + size;
738         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
739         if (r < 0)
740                 return r;
741
742         o->data.hash = htole64(hash);
743
744 #ifdef HAVE_XZ
745         if (f->compress &&
746             size >= COMPRESSION_SIZE_THRESHOLD) {
747                 uint64_t rsize;
748
749                 compressed = compress_blob(data, size, o->data.payload, &rsize);
750
751                 if (compressed) {
752                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
753                         o->object.flags |= OBJECT_COMPRESSED;
754
755                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
756                 }
757         }
758 #endif
759
760         if (!compressed && size > 0)
761                 memcpy(o->data.payload, data, size);
762
763         r = journal_file_link_data(f, o, p, hash);
764         if (r < 0)
765                 return r;
766
767         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
768         if (r < 0)
769                 return r;
770
771         /* The linking might have altered the window, so let's
772          * refresh our pointer */
773         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
774         if (r < 0)
775                 return r;
776
777         if (ret)
778                 *ret = o;
779
780         if (offset)
781                 *offset = p;
782
783         return 0;
784 }
785
786 uint64_t journal_file_entry_n_items(Object *o) {
787         assert(o);
788         assert(o->object.type == OBJECT_ENTRY);
789
790         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
791 }
792
793 static uint64_t journal_file_entry_array_n_items(Object *o) {
794         assert(o);
795         assert(o->object.type == OBJECT_ENTRY_ARRAY);
796
797         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
798 }
799
800 static int link_entry_into_array(JournalFile *f,
801                                  le64_t *first,
802                                  le64_t *idx,
803                                  uint64_t p) {
804         int r;
805         uint64_t n = 0, ap = 0, q, i, a, hidx;
806         Object *o;
807
808         assert(f);
809         assert(first);
810         assert(idx);
811         assert(p > 0);
812
813         a = le64toh(*first);
814         i = hidx = le64toh(*idx);
815         while (a > 0) {
816
817                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
818                 if (r < 0)
819                         return r;
820
821                 n = journal_file_entry_array_n_items(o);
822                 if (i < n) {
823                         o->entry_array.items[i] = htole64(p);
824                         *idx = htole64(hidx + 1);
825                         return 0;
826                 }
827
828                 i -= n;
829                 ap = a;
830                 a = le64toh(o->entry_array.next_entry_array_offset);
831         }
832
833         if (hidx > n)
834                 n = (hidx+1) * 2;
835         else
836                 n = n * 2;
837
838         if (n < 4)
839                 n = 4;
840
841         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
842                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
843                                        &o, &q);
844         if (r < 0)
845                 return r;
846
847         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
848         if (r < 0)
849                 return r;
850
851         o->entry_array.items[i] = htole64(p);
852
853         if (ap == 0)
854                 *first = htole64(q);
855         else {
856                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
857                 if (r < 0)
858                         return r;
859
860                 o->entry_array.next_entry_array_offset = htole64(q);
861         }
862
863         *idx = htole64(hidx + 1);
864
865         return 0;
866 }
867
868 static int link_entry_into_array_plus_one(JournalFile *f,
869                                           le64_t *extra,
870                                           le64_t *first,
871                                           le64_t *idx,
872                                           uint64_t p) {
873
874         int r;
875
876         assert(f);
877         assert(extra);
878         assert(first);
879         assert(idx);
880         assert(p > 0);
881
882         if (*idx == 0)
883                 *extra = htole64(p);
884         else {
885                 le64_t i;
886
887                 i = htole64(le64toh(*idx) - 1);
888                 r = link_entry_into_array(f, first, &i, p);
889                 if (r < 0)
890                         return r;
891         }
892
893         *idx = htole64(le64toh(*idx) + 1);
894         return 0;
895 }
896
897 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
898         uint64_t p;
899         int r;
900         assert(f);
901         assert(o);
902         assert(offset > 0);
903
904         p = le64toh(o->entry.items[i].object_offset);
905         if (p == 0)
906                 return -EINVAL;
907
908         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
909         if (r < 0)
910                 return r;
911
912         return link_entry_into_array_plus_one(f,
913                                               &o->data.entry_offset,
914                                               &o->data.entry_array_offset,
915                                               &o->data.n_entries,
916                                               offset);
917 }
918
919 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
920         uint64_t n, i;
921         int r;
922
923         assert(f);
924         assert(o);
925         assert(offset > 0);
926         assert(o->object.type == OBJECT_ENTRY);
927
928         __sync_synchronize();
929
930         /* Link up the entry itself */
931         r = link_entry_into_array(f,
932                                   &f->header->entry_array_offset,
933                                   &f->header->n_entries,
934                                   offset);
935         if (r < 0)
936                 return r;
937
938         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
939
940         if (f->header->head_entry_realtime == 0)
941                 f->header->head_entry_realtime = o->entry.realtime;
942
943         f->header->tail_entry_realtime = o->entry.realtime;
944         f->header->tail_entry_monotonic = o->entry.monotonic;
945
946         f->tail_entry_monotonic_valid = true;
947
948         /* Link up the items */
949         n = journal_file_entry_n_items(o);
950         for (i = 0; i < n; i++) {
951                 r = journal_file_link_entry_item(f, o, offset, i);
952                 if (r < 0)
953                         return r;
954         }
955
956         return 0;
957 }
958
959 static int journal_file_append_entry_internal(
960                 JournalFile *f,
961                 const dual_timestamp *ts,
962                 uint64_t xor_hash,
963                 const EntryItem items[], unsigned n_items,
964                 uint64_t *seqnum,
965                 Object **ret, uint64_t *offset) {
966         uint64_t np;
967         uint64_t osize;
968         Object *o;
969         int r;
970
971         assert(f);
972         assert(items || n_items == 0);
973         assert(ts);
974
975         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
976
977         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
978         if (r < 0)
979                 return r;
980
981         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
982         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
983         o->entry.realtime = htole64(ts->realtime);
984         o->entry.monotonic = htole64(ts->monotonic);
985         o->entry.xor_hash = htole64(xor_hash);
986         o->entry.boot_id = f->header->boot_id;
987
988         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
989         if (r < 0)
990                 return r;
991
992         r = journal_file_link_entry(f, o, np);
993         if (r < 0)
994                 return r;
995
996         if (ret)
997                 *ret = o;
998
999         if (offset)
1000                 *offset = np;
1001
1002         return 0;
1003 }
1004
1005 void journal_file_post_change(JournalFile *f) {
1006         assert(f);
1007
1008         /* inotify() does not receive IN_MODIFY events from file
1009          * accesses done via mmap(). After each access we hence
1010          * trigger IN_MODIFY by truncating the journal file to its
1011          * current size which triggers IN_MODIFY. */
1012
1013         __sync_synchronize();
1014
1015         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1016                 log_error("Failed to to truncate file to its own size: %m");
1017 }
1018
1019 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1020         unsigned i;
1021         EntryItem *items;
1022         int r;
1023         uint64_t xor_hash = 0;
1024         struct dual_timestamp _ts;
1025
1026         assert(f);
1027         assert(iovec || n_iovec == 0);
1028
1029         if (!f->writable)
1030                 return -EPERM;
1031
1032         if (!ts) {
1033                 dual_timestamp_get(&_ts);
1034                 ts = &_ts;
1035         }
1036
1037         if (f->tail_entry_monotonic_valid &&
1038             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1039                 return -EINVAL;
1040
1041         r = journal_file_maybe_append_tag(f, ts->realtime);
1042         if (r < 0)
1043                 return r;
1044
1045         /* alloca() can't take 0, hence let's allocate at least one */
1046         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1047
1048         for (i = 0; i < n_iovec; i++) {
1049                 uint64_t p;
1050                 Object *o;
1051
1052                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1053                 if (r < 0)
1054                         return r;
1055
1056                 xor_hash ^= le64toh(o->data.hash);
1057                 items[i].object_offset = htole64(p);
1058                 items[i].hash = o->data.hash;
1059         }
1060
1061         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1062
1063         journal_file_post_change(f);
1064
1065         return r;
1066 }
1067
1068 static int generic_array_get(JournalFile *f,
1069                              uint64_t first,
1070                              uint64_t i,
1071                              Object **ret, uint64_t *offset) {
1072
1073         Object *o;
1074         uint64_t p = 0, a;
1075         int r;
1076
1077         assert(f);
1078
1079         a = first;
1080         while (a > 0) {
1081                 uint64_t n;
1082
1083                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1084                 if (r < 0)
1085                         return r;
1086
1087                 n = journal_file_entry_array_n_items(o);
1088                 if (i < n) {
1089                         p = le64toh(o->entry_array.items[i]);
1090                         break;
1091                 }
1092
1093                 i -= n;
1094                 a = le64toh(o->entry_array.next_entry_array_offset);
1095         }
1096
1097         if (a <= 0 || p <= 0)
1098                 return 0;
1099
1100         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1101         if (r < 0)
1102                 return r;
1103
1104         if (ret)
1105                 *ret = o;
1106
1107         if (offset)
1108                 *offset = p;
1109
1110         return 1;
1111 }
1112
1113 static int generic_array_get_plus_one(JournalFile *f,
1114                                       uint64_t extra,
1115                                       uint64_t first,
1116                                       uint64_t i,
1117                                       Object **ret, uint64_t *offset) {
1118
1119         Object *o;
1120
1121         assert(f);
1122
1123         if (i == 0) {
1124                 int r;
1125
1126                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1127                 if (r < 0)
1128                         return r;
1129
1130                 if (ret)
1131                         *ret = o;
1132
1133                 if (offset)
1134                         *offset = extra;
1135
1136                 return 1;
1137         }
1138
1139         return generic_array_get(f, first, i-1, ret, offset);
1140 }
1141
1142 enum {
1143         TEST_FOUND,
1144         TEST_LEFT,
1145         TEST_RIGHT
1146 };
1147
1148 static int generic_array_bisect(JournalFile *f,
1149                                 uint64_t first,
1150                                 uint64_t n,
1151                                 uint64_t needle,
1152                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1153                                 direction_t direction,
1154                                 Object **ret,
1155                                 uint64_t *offset,
1156                                 uint64_t *idx) {
1157
1158         uint64_t a, p, t = 0, i = 0, last_p = 0;
1159         bool subtract_one = false;
1160         Object *o, *array = NULL;
1161         int r;
1162
1163         assert(f);
1164         assert(test_object);
1165
1166         a = first;
1167         while (a > 0) {
1168                 uint64_t left, right, k, lp;
1169
1170                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1171                 if (r < 0)
1172                         return r;
1173
1174                 k = journal_file_entry_array_n_items(array);
1175                 right = MIN(k, n);
1176                 if (right <= 0)
1177                         return 0;
1178
1179                 i = right - 1;
1180                 lp = p = le64toh(array->entry_array.items[i]);
1181                 if (p <= 0)
1182                         return -EBADMSG;
1183
1184                 r = test_object(f, p, needle);
1185                 if (r < 0)
1186                         return r;
1187
1188                 if (r == TEST_FOUND)
1189                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1190
1191                 if (r == TEST_RIGHT) {
1192                         left = 0;
1193                         right -= 1;
1194                         for (;;) {
1195                                 if (left == right) {
1196                                         if (direction == DIRECTION_UP)
1197                                                 subtract_one = true;
1198
1199                                         i = left;
1200                                         goto found;
1201                                 }
1202
1203                                 assert(left < right);
1204
1205                                 i = (left + right) / 2;
1206                                 p = le64toh(array->entry_array.items[i]);
1207                                 if (p <= 0)
1208                                         return -EBADMSG;
1209
1210                                 r = test_object(f, p, needle);
1211                                 if (r < 0)
1212                                         return r;
1213
1214                                 if (r == TEST_FOUND)
1215                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1216
1217                                 if (r == TEST_RIGHT)
1218                                         right = i;
1219                                 else
1220                                         left = i + 1;
1221                         }
1222                 }
1223
1224                 if (k > n) {
1225                         if (direction == DIRECTION_UP) {
1226                                 i = n;
1227                                 subtract_one = true;
1228                                 goto found;
1229                         }
1230
1231                         return 0;
1232                 }
1233
1234                 last_p = lp;
1235
1236                 n -= k;
1237                 t += k;
1238                 a = le64toh(array->entry_array.next_entry_array_offset);
1239         }
1240
1241         return 0;
1242
1243 found:
1244         if (subtract_one && t == 0 && i == 0)
1245                 return 0;
1246
1247         if (subtract_one && i == 0)
1248                 p = last_p;
1249         else if (subtract_one)
1250                 p = le64toh(array->entry_array.items[i-1]);
1251         else
1252                 p = le64toh(array->entry_array.items[i]);
1253
1254         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1255         if (r < 0)
1256                 return r;
1257
1258         if (ret)
1259                 *ret = o;
1260
1261         if (offset)
1262                 *offset = p;
1263
1264         if (idx)
1265                 *idx = t + i + (subtract_one ? -1 : 0);
1266
1267         return 1;
1268 }
1269
1270 static int generic_array_bisect_plus_one(JournalFile *f,
1271                                          uint64_t extra,
1272                                          uint64_t first,
1273                                          uint64_t n,
1274                                          uint64_t needle,
1275                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1276                                          direction_t direction,
1277                                          Object **ret,
1278                                          uint64_t *offset,
1279                                          uint64_t *idx) {
1280
1281         int r;
1282         bool step_back = false;
1283         Object *o;
1284
1285         assert(f);
1286         assert(test_object);
1287
1288         if (n <= 0)
1289                 return 0;
1290
1291         /* This bisects the array in object 'first', but first checks
1292          * an extra  */
1293         r = test_object(f, extra, needle);
1294         if (r < 0)
1295                 return r;
1296
1297         if (r == TEST_FOUND)
1298                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1299
1300         /* if we are looking with DIRECTION_UP then we need to first
1301            see if in the actual array there is a matching entry, and
1302            return the last one of that. But if there isn't any we need
1303            to return this one. Hence remember this, and return it
1304            below. */
1305         if (r == TEST_LEFT)
1306                 step_back = direction == DIRECTION_UP;
1307
1308         if (r == TEST_RIGHT) {
1309                 if (direction == DIRECTION_DOWN)
1310                         goto found;
1311                 else
1312                         return 0;
1313         }
1314
1315         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1316
1317         if (r == 0 && step_back)
1318                 goto found;
1319
1320         if (r > 0 && idx)
1321                 (*idx) ++;
1322
1323         return r;
1324
1325 found:
1326         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1327         if (r < 0)
1328                 return r;
1329
1330         if (ret)
1331                 *ret = o;
1332
1333         if (offset)
1334                 *offset = extra;
1335
1336         if (idx)
1337                 *idx = 0;
1338
1339         return 1;
1340 }
1341
1342 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1343         assert(f);
1344         assert(p > 0);
1345
1346         if (p == needle)
1347                 return TEST_FOUND;
1348         else if (p < needle)
1349                 return TEST_LEFT;
1350         else
1351                 return TEST_RIGHT;
1352 }
1353
1354 int journal_file_move_to_entry_by_offset(
1355                 JournalFile *f,
1356                 uint64_t p,
1357                 direction_t direction,
1358                 Object **ret,
1359                 uint64_t *offset) {
1360
1361         return generic_array_bisect(f,
1362                                     le64toh(f->header->entry_array_offset),
1363                                     le64toh(f->header->n_entries),
1364                                     p,
1365                                     test_object_offset,
1366                                     direction,
1367                                     ret, offset, NULL);
1368 }
1369
1370
1371 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1372         Object *o;
1373         int r;
1374
1375         assert(f);
1376         assert(p > 0);
1377
1378         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1379         if (r < 0)
1380                 return r;
1381
1382         if (le64toh(o->entry.seqnum) == needle)
1383                 return TEST_FOUND;
1384         else if (le64toh(o->entry.seqnum) < needle)
1385                 return TEST_LEFT;
1386         else
1387                 return TEST_RIGHT;
1388 }
1389
1390 int journal_file_move_to_entry_by_seqnum(
1391                 JournalFile *f,
1392                 uint64_t seqnum,
1393                 direction_t direction,
1394                 Object **ret,
1395                 uint64_t *offset) {
1396
1397         return generic_array_bisect(f,
1398                                     le64toh(f->header->entry_array_offset),
1399                                     le64toh(f->header->n_entries),
1400                                     seqnum,
1401                                     test_object_seqnum,
1402                                     direction,
1403                                     ret, offset, NULL);
1404 }
1405
1406 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1407         Object *o;
1408         int r;
1409
1410         assert(f);
1411         assert(p > 0);
1412
1413         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1414         if (r < 0)
1415                 return r;
1416
1417         if (le64toh(o->entry.realtime) == needle)
1418                 return TEST_FOUND;
1419         else if (le64toh(o->entry.realtime) < needle)
1420                 return TEST_LEFT;
1421         else
1422                 return TEST_RIGHT;
1423 }
1424
1425 int journal_file_move_to_entry_by_realtime(
1426                 JournalFile *f,
1427                 uint64_t realtime,
1428                 direction_t direction,
1429                 Object **ret,
1430                 uint64_t *offset) {
1431
1432         return generic_array_bisect(f,
1433                                     le64toh(f->header->entry_array_offset),
1434                                     le64toh(f->header->n_entries),
1435                                     realtime,
1436                                     test_object_realtime,
1437                                     direction,
1438                                     ret, offset, NULL);
1439 }
1440
1441 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1442         Object *o;
1443         int r;
1444
1445         assert(f);
1446         assert(p > 0);
1447
1448         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1449         if (r < 0)
1450                 return r;
1451
1452         if (le64toh(o->entry.monotonic) == needle)
1453                 return TEST_FOUND;
1454         else if (le64toh(o->entry.monotonic) < needle)
1455                 return TEST_LEFT;
1456         else
1457                 return TEST_RIGHT;
1458 }
1459
1460 int journal_file_move_to_entry_by_monotonic(
1461                 JournalFile *f,
1462                 sd_id128_t boot_id,
1463                 uint64_t monotonic,
1464                 direction_t direction,
1465                 Object **ret,
1466                 uint64_t *offset) {
1467
1468         char t[9+32+1] = "_BOOT_ID=";
1469         Object *o;
1470         int r;
1471
1472         assert(f);
1473
1474         sd_id128_to_string(boot_id, t + 9);
1475         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1476         if (r < 0)
1477                 return r;
1478         if (r == 0)
1479                 return -ENOENT;
1480
1481         return generic_array_bisect_plus_one(f,
1482                                              le64toh(o->data.entry_offset),
1483                                              le64toh(o->data.entry_array_offset),
1484                                              le64toh(o->data.n_entries),
1485                                              monotonic,
1486                                              test_object_monotonic,
1487                                              direction,
1488                                              ret, offset, NULL);
1489 }
1490
1491 int journal_file_next_entry(
1492                 JournalFile *f,
1493                 Object *o, uint64_t p,
1494                 direction_t direction,
1495                 Object **ret, uint64_t *offset) {
1496
1497         uint64_t i, n;
1498         int r;
1499
1500         assert(f);
1501         assert(p > 0 || !o);
1502
1503         n = le64toh(f->header->n_entries);
1504         if (n <= 0)
1505                 return 0;
1506
1507         if (!o)
1508                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1509         else {
1510                 if (o->object.type != OBJECT_ENTRY)
1511                         return -EINVAL;
1512
1513                 r = generic_array_bisect(f,
1514                                          le64toh(f->header->entry_array_offset),
1515                                          le64toh(f->header->n_entries),
1516                                          p,
1517                                          test_object_offset,
1518                                          DIRECTION_DOWN,
1519                                          NULL, NULL,
1520                                          &i);
1521                 if (r <= 0)
1522                         return r;
1523
1524                 if (direction == DIRECTION_DOWN) {
1525                         if (i >= n - 1)
1526                                 return 0;
1527
1528                         i++;
1529                 } else {
1530                         if (i <= 0)
1531                                 return 0;
1532
1533                         i--;
1534                 }
1535         }
1536
1537         /* And jump to it */
1538         return generic_array_get(f,
1539                                  le64toh(f->header->entry_array_offset),
1540                                  i,
1541                                  ret, offset);
1542 }
1543
1544 int journal_file_skip_entry(
1545                 JournalFile *f,
1546                 Object *o, uint64_t p,
1547                 int64_t skip,
1548                 Object **ret, uint64_t *offset) {
1549
1550         uint64_t i, n;
1551         int r;
1552
1553         assert(f);
1554         assert(o);
1555         assert(p > 0);
1556
1557         if (o->object.type != OBJECT_ENTRY)
1558                 return -EINVAL;
1559
1560         r = generic_array_bisect(f,
1561                                  le64toh(f->header->entry_array_offset),
1562                                  le64toh(f->header->n_entries),
1563                                  p,
1564                                  test_object_offset,
1565                                  DIRECTION_DOWN,
1566                                  NULL, NULL,
1567                                  &i);
1568         if (r <= 0)
1569                 return r;
1570
1571         /* Calculate new index */
1572         if (skip < 0) {
1573                 if ((uint64_t) -skip >= i)
1574                         i = 0;
1575                 else
1576                         i = i - (uint64_t) -skip;
1577         } else
1578                 i  += (uint64_t) skip;
1579
1580         n = le64toh(f->header->n_entries);
1581         if (n <= 0)
1582                 return -EBADMSG;
1583
1584         if (i >= n)
1585                 i = n-1;
1586
1587         return generic_array_get(f,
1588                                  le64toh(f->header->entry_array_offset),
1589                                  i,
1590                                  ret, offset);
1591 }
1592
1593 int journal_file_next_entry_for_data(
1594                 JournalFile *f,
1595                 Object *o, uint64_t p,
1596                 uint64_t data_offset,
1597                 direction_t direction,
1598                 Object **ret, uint64_t *offset) {
1599
1600         uint64_t n, i;
1601         int r;
1602         Object *d;
1603
1604         assert(f);
1605         assert(p > 0 || !o);
1606
1607         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1608         if (r < 0)
1609                 return r;
1610
1611         n = le64toh(d->data.n_entries);
1612         if (n <= 0)
1613                 return n;
1614
1615         if (!o)
1616                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1617         else {
1618                 if (o->object.type != OBJECT_ENTRY)
1619                         return -EINVAL;
1620
1621                 r = generic_array_bisect_plus_one(f,
1622                                                   le64toh(d->data.entry_offset),
1623                                                   le64toh(d->data.entry_array_offset),
1624                                                   le64toh(d->data.n_entries),
1625                                                   p,
1626                                                   test_object_offset,
1627                                                   DIRECTION_DOWN,
1628                                                   NULL, NULL,
1629                                                   &i);
1630
1631                 if (r <= 0)
1632                         return r;
1633
1634                 if (direction == DIRECTION_DOWN) {
1635                         if (i >= n - 1)
1636                                 return 0;
1637
1638                         i++;
1639                 } else {
1640                         if (i <= 0)
1641                                 return 0;
1642
1643                         i--;
1644                 }
1645
1646         }
1647
1648         return generic_array_get_plus_one(f,
1649                                           le64toh(d->data.entry_offset),
1650                                           le64toh(d->data.entry_array_offset),
1651                                           i,
1652                                           ret, offset);
1653 }
1654
1655 int journal_file_move_to_entry_by_offset_for_data(
1656                 JournalFile *f,
1657                 uint64_t data_offset,
1658                 uint64_t p,
1659                 direction_t direction,
1660                 Object **ret, uint64_t *offset) {
1661
1662         int r;
1663         Object *d;
1664
1665         assert(f);
1666
1667         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1668         if (r < 0)
1669                 return r;
1670
1671         return generic_array_bisect_plus_one(f,
1672                                              le64toh(d->data.entry_offset),
1673                                              le64toh(d->data.entry_array_offset),
1674                                              le64toh(d->data.n_entries),
1675                                              p,
1676                                              test_object_offset,
1677                                              direction,
1678                                              ret, offset, NULL);
1679 }
1680
1681 int journal_file_move_to_entry_by_monotonic_for_data(
1682                 JournalFile *f,
1683                 uint64_t data_offset,
1684                 sd_id128_t boot_id,
1685                 uint64_t monotonic,
1686                 direction_t direction,
1687                 Object **ret, uint64_t *offset) {
1688
1689         char t[9+32+1] = "_BOOT_ID=";
1690         Object *o, *d;
1691         int r;
1692         uint64_t b, z;
1693
1694         assert(f);
1695
1696         /* First, seek by time */
1697         sd_id128_to_string(boot_id, t + 9);
1698         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1699         if (r < 0)
1700                 return r;
1701         if (r == 0)
1702                 return -ENOENT;
1703
1704         r = generic_array_bisect_plus_one(f,
1705                                           le64toh(o->data.entry_offset),
1706                                           le64toh(o->data.entry_array_offset),
1707                                           le64toh(o->data.n_entries),
1708                                           monotonic,
1709                                           test_object_monotonic,
1710                                           direction,
1711                                           NULL, &z, NULL);
1712         if (r <= 0)
1713                 return r;
1714
1715         /* And now, continue seeking until we find an entry that
1716          * exists in both bisection arrays */
1717
1718         for (;;) {
1719                 Object *qo;
1720                 uint64_t p, q;
1721
1722                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1723                 if (r < 0)
1724                         return r;
1725
1726                 r = generic_array_bisect_plus_one(f,
1727                                                   le64toh(d->data.entry_offset),
1728                                                   le64toh(d->data.entry_array_offset),
1729                                                   le64toh(d->data.n_entries),
1730                                                   z,
1731                                                   test_object_offset,
1732                                                   direction,
1733                                                   NULL, &p, NULL);
1734                 if (r <= 0)
1735                         return r;
1736
1737                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1738                 if (r < 0)
1739                         return r;
1740
1741                 r = generic_array_bisect_plus_one(f,
1742                                                   le64toh(o->data.entry_offset),
1743                                                   le64toh(o->data.entry_array_offset),
1744                                                   le64toh(o->data.n_entries),
1745                                                   p,
1746                                                   test_object_offset,
1747                                                   direction,
1748                                                   &qo, &q, NULL);
1749
1750                 if (r <= 0)
1751                         return r;
1752
1753                 if (p == q) {
1754                         if (ret)
1755                                 *ret = qo;
1756                         if (offset)
1757                                 *offset = q;
1758
1759                         return 1;
1760                 }
1761
1762                 z = q;
1763         }
1764
1765         return 0;
1766 }
1767
1768 int journal_file_move_to_entry_by_seqnum_for_data(
1769                 JournalFile *f,
1770                 uint64_t data_offset,
1771                 uint64_t seqnum,
1772                 direction_t direction,
1773                 Object **ret, uint64_t *offset) {
1774
1775         Object *d;
1776         int r;
1777
1778         assert(f);
1779
1780         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1781         if (r < 0)
1782                 return r;
1783
1784         return generic_array_bisect_plus_one(f,
1785                                              le64toh(d->data.entry_offset),
1786                                              le64toh(d->data.entry_array_offset),
1787                                              le64toh(d->data.n_entries),
1788                                              seqnum,
1789                                              test_object_seqnum,
1790                                              direction,
1791                                              ret, offset, NULL);
1792 }
1793
1794 int journal_file_move_to_entry_by_realtime_for_data(
1795                 JournalFile *f,
1796                 uint64_t data_offset,
1797                 uint64_t realtime,
1798                 direction_t direction,
1799                 Object **ret, uint64_t *offset) {
1800
1801         Object *d;
1802         int r;
1803
1804         assert(f);
1805
1806         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1807         if (r < 0)
1808                 return r;
1809
1810         return generic_array_bisect_plus_one(f,
1811                                              le64toh(d->data.entry_offset),
1812                                              le64toh(d->data.entry_array_offset),
1813                                              le64toh(d->data.n_entries),
1814                                              realtime,
1815                                              test_object_realtime,
1816                                              direction,
1817                                              ret, offset, NULL);
1818 }
1819
1820 static void *fsprg_state(JournalFile *f) {
1821         uint64_t a, b;
1822         assert(f);
1823
1824         if (!f->authenticate)
1825                 return NULL;
1826
1827         a = le64toh(f->fsprg_header->header_size);
1828         b = le64toh(f->fsprg_header->state_size);
1829
1830         if (a + b > f->fsprg_size)
1831                 return NULL;
1832
1833         return (uint8_t*) f->fsprg_header + a;
1834 }
1835
1836 static uint64_t journal_file_tag_seqnum(JournalFile *f) {
1837         uint64_t r;
1838
1839         assert(f);
1840
1841         r = le64toh(f->header->n_tags) + 1;
1842         f->header->n_tags = htole64(r);
1843
1844         return r;
1845 }
1846
1847 int journal_file_append_tag(JournalFile *f) {
1848         Object *o;
1849         uint64_t p;
1850         int r;
1851
1852         assert(f);
1853
1854         if (!f->authenticate)
1855                 return 0;
1856
1857         if (!f->hmac_running)
1858                 return 0;
1859
1860         log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1861
1862         assert(f->hmac);
1863
1864         r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1865         if (r < 0)
1866                 return r;
1867
1868         o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
1869
1870         /* Add the tag object itself, so that we can protect its
1871          * header. This will exclude the actual hash value in it */
1872         r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
1873         if (r < 0)
1874                 return r;
1875
1876         /* Get the HMAC tag and store it in the object */
1877         memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1878         f->hmac_running = false;
1879
1880         return 0;
1881 }
1882
1883 static int journal_file_hmac_start(JournalFile *f) {
1884         uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1885
1886         assert(f);
1887
1888         if (!f->authenticate)
1889                 return 0;
1890
1891         if (f->hmac_running)
1892                 return 0;
1893
1894         /* Prepare HMAC for next cycle */
1895         gcry_md_reset(f->hmac);
1896         FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1897         gcry_md_setkey(f->hmac, key, sizeof(key));
1898
1899         f->hmac_running = true;
1900
1901         return 0;
1902 }
1903
1904 static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1905         uint64_t t;
1906
1907         assert(f);
1908         assert(epoch);
1909         assert(f->authenticate);
1910
1911         if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1912             le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1913                 return -ENOTSUP;
1914
1915         if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1916                 return -ESTALE;
1917
1918         t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1919         t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1920
1921         *epoch = t;
1922         return 0;
1923 }
1924
1925 static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1926         uint64_t goal, epoch;
1927         int r;
1928         assert(f);
1929
1930         if (!f->authenticate)
1931                 return 0;
1932
1933         r = journal_file_get_epoch(f, realtime, &goal);
1934         if (r < 0)
1935                 return r;
1936
1937         epoch = FSPRG_GetEpoch(fsprg_state(f));
1938         if (epoch > goal)
1939                 return -ESTALE;
1940
1941         return epoch != goal;
1942 }
1943
1944 static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
1945         uint64_t goal, epoch;
1946         int r;
1947
1948         assert(f);
1949
1950         if (!f->authenticate)
1951                 return 0;
1952
1953         r = journal_file_get_epoch(f, realtime, &goal);
1954         if (r < 0)
1955                 return r;
1956
1957         epoch = FSPRG_GetEpoch(fsprg_state(f));
1958         if (epoch < goal)
1959                 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
1960
1961         for (;;) {
1962                 if (epoch > goal)
1963                         return -ESTALE;
1964                 if (epoch == goal)
1965                         return 0;
1966
1967                 FSPRG_Evolve(fsprg_state(f));
1968                 epoch = FSPRG_GetEpoch(fsprg_state(f));
1969         }
1970 }
1971
1972 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
1973         int r;
1974
1975         assert(f);
1976
1977         if (!f->authenticate)
1978                 return 0;
1979
1980         r = journal_file_need_evolve(f, realtime);
1981         if (r <= 0)
1982                 return 0;
1983
1984         r = journal_file_append_tag(f);
1985         if (r < 0)
1986                 return r;
1987
1988         r = journal_file_evolve(f, realtime);
1989         if (r < 0)
1990                 return r;
1991
1992         r = journal_file_hmac_start(f);
1993         if (r < 0)
1994                 return r;
1995
1996         return 0;
1997 }
1998
1999 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2000         int r;
2001         Object *o;
2002
2003         assert(f);
2004
2005         if (!f->authenticate)
2006                 return 0;
2007
2008         r = journal_file_hmac_start(f);
2009         if (r < 0)
2010                 return r;
2011
2012         r = journal_file_move_to_object(f, type, p, &o);
2013         if (r < 0)
2014                 return r;
2015
2016         gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2017
2018         switch (o->object.type) {
2019
2020         case OBJECT_DATA:
2021                 /* All but: hash and payload are mutable */
2022                 gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
2023                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2024                 break;
2025
2026         case OBJECT_ENTRY:
2027                 /* All */
2028                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2029                 break;
2030
2031         case OBJECT_FIELD_HASH_TABLE:
2032         case OBJECT_DATA_HASH_TABLE:
2033         case OBJECT_ENTRY_ARRAY:
2034                 /* Nothing: everything is mutable */
2035                 break;
2036
2037         case OBJECT_TAG:
2038                 /* All but the tag itself */
2039                 gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
2040                 break;
2041         default:
2042                 return -EINVAL;
2043         }
2044
2045         return 0;
2046 }
2047
2048 static int journal_file_hmac_put_header(JournalFile *f) {
2049         int r;
2050
2051         assert(f);
2052
2053         if (!f->authenticate)
2054                 return 0;
2055
2056         r = journal_file_hmac_start(f);
2057         if (r < 0)
2058                 return r;
2059
2060         /* All but state+reserved, boot_id, arena_size,
2061          * tail_object_offset, n_objects, n_entries, tail_seqnum,
2062          * head_entry_realtime, tail_entry_realtime,
2063          * tail_entry_monotonic, n_data, n_fields, header_tag */
2064
2065         gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2066         gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2067         gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2068         gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2069         gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2070
2071         return 0;
2072 }
2073
2074 static int journal_file_load_fsprg(JournalFile *f) {
2075         int r, fd = -1;
2076         char *p = NULL;
2077         struct stat st;
2078         FSPRGHeader *m = NULL;
2079         sd_id128_t machine;
2080
2081         assert(f);
2082
2083         if (!f->authenticate)
2084                 return 0;
2085
2086         r = sd_id128_get_machine(&machine);
2087         if (r < 0)
2088                 return r;
2089
2090         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2091                      SD_ID128_FORMAT_VAL(machine)) < 0)
2092                 return -ENOMEM;
2093
2094         fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2095         if (fd < 0) {
2096                 log_error("Failed to open %s: %m", p);
2097                 r = -errno;
2098                 goto finish;
2099         }
2100
2101         if (fstat(fd, &st) < 0) {
2102                 r = -errno;
2103                 goto finish;
2104         }
2105
2106         if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2107                 r = -ENODATA;
2108                 goto finish;
2109         }
2110
2111         m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2112         if (m == MAP_FAILED) {
2113                 m = NULL;
2114                 r = -errno;
2115                 goto finish;
2116         }
2117
2118         if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2119                 r = -EBADMSG;
2120                 goto finish;
2121         }
2122
2123         if (m->incompatible_flags != 0) {
2124                 r = -EPROTONOSUPPORT;
2125                 goto finish;
2126         }
2127
2128         if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2129                 r = -EBADMSG;
2130                 goto finish;
2131         }
2132
2133         if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2134                 r = -EBADMSG;
2135                 goto finish;
2136         }
2137
2138         f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2139         if ((uint64_t) st.st_size < f->fsprg_size) {
2140                 r = -ENODATA;
2141                 goto finish;
2142         }
2143
2144         if (!sd_id128_equal(machine, m->machine_id)) {
2145                 r = -EHOSTDOWN;
2146                 goto finish;
2147         }
2148
2149         if (le64toh(m->fsprg_start_usec) <= 0 ||
2150             le64toh(m->fsprg_interval_usec) <= 0) {
2151                 r = -EBADMSG;
2152                 goto finish;
2153         }
2154
2155         f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2156         if (f->fsprg_header == MAP_FAILED) {
2157                 f->fsprg_header = NULL;
2158                 r = -errno;
2159                 goto finish;
2160         }
2161
2162         r = 0;
2163
2164 finish:
2165         if (m)
2166                 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2167
2168         if (fd >= 0)
2169                 close_nointr_nofail(fd);
2170
2171         free(p);
2172         return r;
2173 }
2174
2175 static int journal_file_setup_hmac(JournalFile *f) {
2176         gcry_error_t e;
2177
2178         if (!f->authenticate)
2179                 return 0;
2180
2181         e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2182         if (e != 0)
2183                 return -ENOTSUP;
2184
2185         return 0;
2186 }
2187
2188 static int journal_file_append_first_tag(JournalFile *f) {
2189         int r;
2190         uint64_t p;
2191
2192         if (!f->authenticate)
2193                 return 0;
2194
2195         log_debug("Calculating first tag...");
2196
2197         r = journal_file_hmac_put_header(f);
2198         if (r < 0)
2199                 return r;
2200
2201         p = le64toh(f->header->field_hash_table_offset);
2202         if (p < offsetof(Object, hash_table.items))
2203                 return -EINVAL;
2204         p -= offsetof(Object, hash_table.items);
2205
2206         r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2207         if (r < 0)
2208                 return r;
2209
2210         p = le64toh(f->header->data_hash_table_offset);
2211         if (p < offsetof(Object, hash_table.items))
2212                 return -EINVAL;
2213         p -= offsetof(Object, hash_table.items);
2214
2215         r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2216         if (r < 0)
2217                 return r;
2218
2219         r = journal_file_append_tag(f);
2220         if (r < 0)
2221                 return r;
2222
2223         return 0;
2224 }
2225
2226 void journal_file_dump(JournalFile *f) {
2227         Object *o;
2228         int r;
2229         uint64_t p;
2230
2231         assert(f);
2232
2233         journal_file_print_header(f);
2234
2235         p = le64toh(f->header->header_size);
2236         while (p != 0) {
2237                 r = journal_file_move_to_object(f, -1, p, &o);
2238                 if (r < 0)
2239                         goto fail;
2240
2241                 switch (o->object.type) {
2242
2243                 case OBJECT_UNUSED:
2244                         printf("Type: OBJECT_UNUSED\n");
2245                         break;
2246
2247                 case OBJECT_DATA:
2248                         printf("Type: OBJECT_DATA\n");
2249                         break;
2250
2251                 case OBJECT_ENTRY:
2252                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2253                                (unsigned long long) le64toh(o->entry.seqnum),
2254                                (unsigned long long) le64toh(o->entry.monotonic),
2255                                (unsigned long long) le64toh(o->entry.realtime));
2256                         break;
2257
2258                 case OBJECT_FIELD_HASH_TABLE:
2259                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2260                         break;
2261
2262                 case OBJECT_DATA_HASH_TABLE:
2263                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2264                         break;
2265
2266                 case OBJECT_ENTRY_ARRAY:
2267                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2268                         break;
2269
2270                 case OBJECT_TAG:
2271                         printf("Type: OBJECT_TAG %llu\n",
2272                                (unsigned long long) le64toh(o->tag.seqnum));
2273                         break;
2274                 }
2275
2276                 if (o->object.flags & OBJECT_COMPRESSED)
2277                         printf("Flags: COMPRESSED\n");
2278
2279                 if (p == le64toh(f->header->tail_object_offset))
2280                         p = 0;
2281                 else
2282                         p = p + ALIGN64(le64toh(o->object.size));
2283         }
2284
2285         return;
2286 fail:
2287         log_error("File corrupt");
2288 }
2289
2290 void journal_file_print_header(JournalFile *f) {
2291         char a[33], b[33], c[33];
2292         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2293
2294         assert(f);
2295
2296         printf("File Path: %s\n"
2297                "File ID: %s\n"
2298                "Machine ID: %s\n"
2299                "Boot ID: %s\n"
2300                "Sequential Number ID: %s\n"
2301                "State: %s\n"
2302                "Compatible Flags:%s%s\n"
2303                "Incompatible Flags:%s%s\n"
2304                "Header size: %llu\n"
2305                "Arena size: %llu\n"
2306                "Data Hash Table Size: %llu\n"
2307                "Field Hash Table Size: %llu\n"
2308                "Objects: %llu\n"
2309                "Entry Objects: %llu\n"
2310                "Rotate Suggested: %s\n"
2311                "Head Sequential Number: %llu\n"
2312                "Tail Sequential Number: %llu\n"
2313                "Head Realtime Timestamp: %s\n"
2314                "Tail Realtime Timestamp: %s\n",
2315                f->path,
2316                sd_id128_to_string(f->header->file_id, a),
2317                sd_id128_to_string(f->header->machine_id, b),
2318                sd_id128_to_string(f->header->boot_id, c),
2319                sd_id128_to_string(f->header->seqnum_id, c),
2320                f->header->state == STATE_OFFLINE ? "offline" :
2321                f->header->state == STATE_ONLINE ? "online" :
2322                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
2323                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2324                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
2325                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2326                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
2327                (unsigned long long) le64toh(f->header->header_size),
2328                (unsigned long long) le64toh(f->header->arena_size),
2329                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2330                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2331                (unsigned long long) le64toh(f->header->n_objects),
2332                (unsigned long long) le64toh(f->header->n_entries),
2333                yes_no(journal_file_rotate_suggested(f)),
2334                (unsigned long long) le64toh(f->header->head_seqnum),
2335                (unsigned long long) le64toh(f->header->tail_seqnum),
2336                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2337                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2338
2339         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2340                 printf("Data Objects: %llu\n"
2341                        "Data Hash Table Fill: %.1f%%\n",
2342                        (unsigned long long) le64toh(f->header->n_data),
2343                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2344
2345         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2346                 printf("Field Objects: %llu\n"
2347                        "Field Hash Table Fill: %.1f%%\n",
2348                        (unsigned long long) le64toh(f->header->n_fields),
2349                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2350 }
2351
2352 int journal_file_open(
2353                 const char *fname,
2354                 int flags,
2355                 mode_t mode,
2356                 bool compress,
2357                 bool authenticate,
2358                 JournalMetrics *metrics,
2359                 MMapCache *mmap_cache,
2360                 JournalFile *template,
2361                 JournalFile **ret) {
2362
2363         JournalFile *f;
2364         int r;
2365         bool newly_created = false;
2366
2367         assert(fname);
2368
2369         if ((flags & O_ACCMODE) != O_RDONLY &&
2370             (flags & O_ACCMODE) != O_RDWR)
2371                 return -EINVAL;
2372
2373         if (!endswith(fname, ".journal"))
2374                 return -EINVAL;
2375
2376         f = new0(JournalFile, 1);
2377         if (!f)
2378                 return -ENOMEM;
2379
2380         f->fd = -1;
2381         f->mode = mode;
2382
2383         f->flags = flags;
2384         f->prot = prot_from_flags(flags);
2385         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2386         f->compress = compress;
2387         f->authenticate = authenticate;
2388
2389         if (mmap_cache)
2390                 f->mmap = mmap_cache_ref(mmap_cache);
2391         else {
2392                 /* One context for each type, plus the zeroth catchall
2393                  * context. One fd for the file plus one for each type
2394                  * (which we need during verification */
2395                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2396                 if (!f->mmap) {
2397                         r = -ENOMEM;
2398                         goto fail;
2399                 }
2400         }
2401
2402         f->path = strdup(fname);
2403         if (!f->path) {
2404                 r = -ENOMEM;
2405                 goto fail;
2406         }
2407
2408         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2409         if (f->fd < 0) {
2410                 r = -errno;
2411                 goto fail;
2412         }
2413
2414         if (fstat(f->fd, &f->last_stat) < 0) {
2415                 r = -errno;
2416                 goto fail;
2417         }
2418
2419         if (f->last_stat.st_size == 0 && f->writable) {
2420                 newly_created = true;
2421
2422                 /* Try to load the FSPRG state, and if we can't, then
2423                  * just don't do authentication */
2424                 r = journal_file_load_fsprg(f);
2425                 if (r < 0)
2426                         f->authenticate = false;
2427
2428                 r = journal_file_init_header(f, template);
2429                 if (r < 0)
2430                         goto fail;
2431
2432                 if (fstat(f->fd, &f->last_stat) < 0) {
2433                         r = -errno;
2434                         goto fail;
2435                 }
2436         }
2437
2438         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2439                 r = -EIO;
2440                 goto fail;
2441         }
2442
2443         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2444         if (f->header == MAP_FAILED) {
2445                 f->header = NULL;
2446                 r = -errno;
2447                 goto fail;
2448         }
2449
2450         if (!newly_created) {
2451                 r = journal_file_verify_header(f);
2452                 if (r < 0)
2453                         goto fail;
2454         }
2455
2456         if (!newly_created && f->writable) {
2457                 r = journal_file_load_fsprg(f);
2458                 if (r < 0)
2459                         goto fail;
2460         }
2461
2462         if (f->writable) {
2463                 if (metrics) {
2464                         journal_default_metrics(metrics, f->fd);
2465                         f->metrics = *metrics;
2466                 } else if (template)
2467                         f->metrics = template->metrics;
2468
2469                 r = journal_file_refresh_header(f);
2470                 if (r < 0)
2471                         goto fail;
2472
2473                 r = journal_file_setup_hmac(f);
2474                 if (r < 0)
2475                         goto fail;
2476         }
2477
2478         if (newly_created) {
2479                 r = journal_file_setup_field_hash_table(f);
2480                 if (r < 0)
2481                         goto fail;
2482
2483                 r = journal_file_setup_data_hash_table(f);
2484                 if (r < 0)
2485                         goto fail;
2486
2487                 r = journal_file_append_first_tag(f);
2488                 if (r < 0)
2489                         goto fail;
2490         }
2491
2492         r = journal_file_map_field_hash_table(f);
2493         if (r < 0)
2494                 goto fail;
2495
2496         r = journal_file_map_data_hash_table(f);
2497         if (r < 0)
2498                 goto fail;
2499
2500         if (ret)
2501                 *ret = f;
2502
2503         return 0;
2504
2505 fail:
2506         journal_file_close(f);
2507
2508         return r;
2509 }
2510
2511 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2512         char *p;
2513         size_t l;
2514         JournalFile *old_file, *new_file = NULL;
2515         int r;
2516
2517         assert(f);
2518         assert(*f);
2519
2520         old_file = *f;
2521
2522         if (!old_file->writable)
2523                 return -EINVAL;
2524
2525         if (!endswith(old_file->path, ".journal"))
2526                 return -EINVAL;
2527
2528         l = strlen(old_file->path);
2529
2530         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2531         if (!p)
2532                 return -ENOMEM;
2533
2534         memcpy(p, old_file->path, l - 8);
2535         p[l-8] = '@';
2536         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2537         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2538                  "-%016llx-%016llx.journal",
2539                  (unsigned long long) le64toh((*f)->header->tail_seqnum),
2540                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2541
2542         r = rename(old_file->path, p);
2543         free(p);
2544
2545         if (r < 0)
2546                 return -errno;
2547
2548         old_file->header->state = STATE_ARCHIVED;
2549
2550         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
2551         journal_file_close(old_file);
2552
2553         *f = new_file;
2554         return r;
2555 }
2556
2557 int journal_file_open_reliably(
2558                 const char *fname,
2559                 int flags,
2560                 mode_t mode,
2561                 bool compress,
2562                 bool authenticate,
2563                 JournalMetrics *metrics,
2564                 MMapCache *mmap,
2565                 JournalFile *template,
2566                 JournalFile **ret) {
2567
2568         int r;
2569         size_t l;
2570         char *p;
2571
2572         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2573         if (r != -EBADMSG && /* corrupted */
2574             r != -ENODATA && /* truncated */
2575             r != -EHOSTDOWN && /* other machine */
2576             r != -EPROTONOSUPPORT && /* incompatible feature */
2577             r != -EBUSY && /* unclean shutdown */
2578             r != -ESHUTDOWN /* already archived */)
2579                 return r;
2580
2581         if ((flags & O_ACCMODE) == O_RDONLY)
2582                 return r;
2583
2584         if (!(flags & O_CREAT))
2585                 return r;
2586
2587         if (!endswith(fname, ".journal"))
2588                 return r;
2589
2590         /* The file is corrupted. Rotate it away and try it again (but only once) */
2591
2592         l = strlen(fname);
2593         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2594                      (int) (l-8), fname,
2595                      (unsigned long long) now(CLOCK_REALTIME),
2596                      random_ull()) < 0)
2597                 return -ENOMEM;
2598
2599         r = rename(fname, p);
2600         free(p);
2601         if (r < 0)
2602                 return -errno;
2603
2604         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2605
2606         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2607 }
2608
2609 struct vacuum_info {
2610         off_t usage;
2611         char *filename;
2612
2613         uint64_t realtime;
2614         sd_id128_t seqnum_id;
2615         uint64_t seqnum;
2616
2617         bool have_seqnum;
2618 };
2619
2620 static int vacuum_compare(const void *_a, const void *_b) {
2621         const struct vacuum_info *a, *b;
2622
2623         a = _a;
2624         b = _b;
2625
2626         if (a->have_seqnum && b->have_seqnum &&
2627             sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2628                 if (a->seqnum < b->seqnum)
2629                         return -1;
2630                 else if (a->seqnum > b->seqnum)
2631                         return 1;
2632                 else
2633                         return 0;
2634         }
2635
2636         if (a->realtime < b->realtime)
2637                 return -1;
2638         else if (a->realtime > b->realtime)
2639                 return 1;
2640         else if (a->have_seqnum && b->have_seqnum)
2641                 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2642         else
2643                 return strcmp(a->filename, b->filename);
2644 }
2645
2646 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2647         DIR *d;
2648         int r = 0;
2649         struct vacuum_info *list = NULL;
2650         unsigned n_list = 0, n_allocated = 0, i;
2651         uint64_t sum = 0;
2652
2653         assert(directory);
2654
2655         if (max_use <= 0)
2656                 return 0;
2657
2658         d = opendir(directory);
2659         if (!d)
2660                 return -errno;
2661
2662         for (;;) {
2663                 int k;
2664                 struct dirent buf, *de;
2665                 size_t q;
2666                 struct stat st;
2667                 char *p;
2668                 unsigned long long seqnum = 0, realtime;
2669                 sd_id128_t seqnum_id;
2670                 bool have_seqnum;
2671
2672                 k = readdir_r(d, &buf, &de);
2673                 if (k != 0) {
2674                         r = -k;
2675                         goto finish;
2676                 }
2677
2678                 if (!de)
2679                         break;
2680
2681                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2682                         continue;
2683
2684                 if (!S_ISREG(st.st_mode))
2685                         continue;
2686
2687                 q = strlen(de->d_name);
2688
2689                 if (endswith(de->d_name, ".journal")) {
2690
2691                         /* Vacuum archived files */
2692
2693                         if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2694                                 continue;
2695
2696                         if (de->d_name[q-8-16-1] != '-' ||
2697                             de->d_name[q-8-16-1-16-1] != '-' ||
2698                             de->d_name[q-8-16-1-16-1-32-1] != '@')
2699                                 continue;
2700
2701                         p = strdup(de->d_name);
2702                         if (!p) {
2703                                 r = -ENOMEM;
2704                                 goto finish;
2705                         }
2706
2707                         de->d_name[q-8-16-1-16-1] = 0;
2708                         if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2709                                 free(p);
2710                                 continue;
2711                         }
2712
2713                         if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2714                                 free(p);
2715                                 continue;
2716                         }
2717
2718                         have_seqnum = true;
2719
2720                 } else if (endswith(de->d_name, ".journal~")) {
2721                         unsigned long long tmp;
2722
2723                         /* Vacuum corrupted files */
2724
2725                         if (q < 1 + 16 + 1 + 16 + 8 + 1)
2726                                 continue;
2727
2728                         if (de->d_name[q-1-8-16-1] != '-' ||
2729                             de->d_name[q-1-8-16-1-16-1] != '@')
2730                                 continue;
2731
2732                         p = strdup(de->d_name);
2733                         if (!p) {
2734                                 r = -ENOMEM;
2735                                 goto finish;
2736                         }
2737
2738                         if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2739                                 free(p);
2740                                 continue;
2741                         }
2742
2743                         have_seqnum = false;
2744                 } else
2745                         continue;
2746
2747                 if (n_list >= n_allocated) {
2748                         struct vacuum_info *j;
2749
2750                         n_allocated = MAX(n_allocated * 2U, 8U);
2751                         j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2752                         if (!j) {
2753                                 free(p);
2754                                 r = -ENOMEM;
2755                                 goto finish;
2756                         }
2757
2758                         list = j;
2759                 }
2760
2761                 list[n_list].filename = p;
2762                 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2763                 list[n_list].seqnum = seqnum;
2764                 list[n_list].realtime = realtime;
2765                 list[n_list].seqnum_id = seqnum_id;
2766                 list[n_list].have_seqnum = have_seqnum;
2767
2768                 sum += list[n_list].usage;
2769
2770                 n_list ++;
2771         }
2772
2773         if (n_list > 0)
2774                 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2775
2776         for(i = 0; i < n_list; i++) {
2777                 struct statvfs ss;
2778
2779                 if (fstatvfs(dirfd(d), &ss) < 0) {
2780                         r = -errno;
2781                         goto finish;
2782                 }
2783
2784                 if (sum <= max_use &&
2785                     (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2786                         break;
2787
2788                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2789                         log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2790                         sum -= list[i].usage;
2791                 } else if (errno != ENOENT)
2792                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2793         }
2794
2795 finish:
2796         for (i = 0; i < n_list; i++)
2797                 free(list[i].filename);
2798
2799         free(list);
2800
2801         if (d)
2802                 closedir(d);
2803
2804         return r;
2805 }
2806
2807 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2808         uint64_t i, n;
2809         uint64_t q, xor_hash = 0;
2810         int r;
2811         EntryItem *items;
2812         dual_timestamp ts;
2813
2814         assert(from);
2815         assert(to);
2816         assert(o);
2817         assert(p);
2818
2819         if (!to->writable)
2820                 return -EPERM;
2821
2822         ts.monotonic = le64toh(o->entry.monotonic);
2823         ts.realtime = le64toh(o->entry.realtime);
2824
2825         if (to->tail_entry_monotonic_valid &&
2826             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2827                 return -EINVAL;
2828
2829         n = journal_file_entry_n_items(o);
2830         items = alloca(sizeof(EntryItem) * n);
2831
2832         for (i = 0; i < n; i++) {
2833                 uint64_t l, h;
2834                 le64_t le_hash;
2835                 size_t t;
2836                 void *data;
2837                 Object *u;
2838
2839                 q = le64toh(o->entry.items[i].object_offset);
2840                 le_hash = o->entry.items[i].hash;
2841
2842                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2843                 if (r < 0)
2844                         return r;
2845
2846                 if (le_hash != o->data.hash)
2847                         return -EBADMSG;
2848
2849                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2850                 t = (size_t) l;
2851
2852                 /* We hit the limit on 32bit machines */
2853                 if ((uint64_t) t != l)
2854                         return -E2BIG;
2855
2856                 if (o->object.flags & OBJECT_COMPRESSED) {
2857 #ifdef HAVE_XZ
2858                         uint64_t rsize;
2859
2860                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2861                                 return -EBADMSG;
2862
2863                         data = from->compress_buffer;
2864                         l = rsize;
2865 #else
2866                         return -EPROTONOSUPPORT;
2867 #endif
2868                 } else
2869                         data = o->data.payload;
2870
2871                 r = journal_file_append_data(to, data, l, &u, &h);
2872                 if (r < 0)
2873                         return r;
2874
2875                 xor_hash ^= le64toh(u->data.hash);
2876                 items[i].object_offset = htole64(h);
2877                 items[i].hash = u->data.hash;
2878
2879                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2880                 if (r < 0)
2881                         return r;
2882         }
2883
2884         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2885 }
2886
2887 void journal_default_metrics(JournalMetrics *m, int fd) {
2888         uint64_t fs_size = 0;
2889         struct statvfs ss;
2890         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2891
2892         assert(m);
2893         assert(fd >= 0);
2894
2895         if (fstatvfs(fd, &ss) >= 0)
2896                 fs_size = ss.f_frsize * ss.f_blocks;
2897
2898         if (m->max_use == (uint64_t) -1) {
2899
2900                 if (fs_size > 0) {
2901                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2902
2903                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2904                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2905
2906                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2907                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2908                 } else
2909                         m->max_use = DEFAULT_MAX_USE_LOWER;
2910         } else {
2911                 m->max_use = PAGE_ALIGN(m->max_use);
2912
2913                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2914                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2915         }
2916
2917         if (m->max_size == (uint64_t) -1) {
2918                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2919
2920                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2921                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2922         } else
2923                 m->max_size = PAGE_ALIGN(m->max_size);
2924
2925         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2926                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2927
2928         if (m->max_size*2 > m->max_use)
2929                 m->max_use = m->max_size*2;
2930
2931         if (m->min_size == (uint64_t) -1)
2932                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2933         else {
2934                 m->min_size = PAGE_ALIGN(m->min_size);
2935
2936                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2937                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2938
2939                 if (m->min_size > m->max_size)
2940                         m->max_size = m->min_size;
2941         }
2942
2943         if (m->keep_free == (uint64_t) -1) {
2944
2945                 if (fs_size > 0) {
2946                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2947
2948                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2949                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2950
2951                 } else
2952                         m->keep_free = DEFAULT_KEEP_FREE;
2953         }
2954
2955         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2956                  format_bytes(a, sizeof(a), m->max_use),
2957                  format_bytes(b, sizeof(b), m->max_size),
2958                  format_bytes(c, sizeof(c), m->min_size),
2959                  format_bytes(d, sizeof(d), m->keep_free));
2960 }
2961
2962 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2963         assert(f);
2964         assert(from || to);
2965
2966         if (from) {
2967                 if (f->header->head_entry_realtime == 0)
2968                         return -ENOENT;
2969
2970                 *from = le64toh(f->header->head_entry_realtime);
2971         }
2972
2973         if (to) {
2974                 if (f->header->tail_entry_realtime == 0)
2975                         return -ENOENT;
2976
2977                 *to = le64toh(f->header->tail_entry_realtime);
2978         }
2979
2980         return 1;
2981 }
2982
2983 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2984         char t[9+32+1] = "_BOOT_ID=";
2985         Object *o;
2986         uint64_t p;
2987         int r;
2988
2989         assert(f);
2990         assert(from || to);
2991
2992         sd_id128_to_string(boot_id, t + 9);
2993
2994         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2995         if (r <= 0)
2996                 return r;
2997
2998         if (le64toh(o->data.n_entries) <= 0)
2999                 return 0;
3000
3001         if (from) {
3002                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3003                 if (r < 0)
3004                         return r;
3005
3006                 *from = le64toh(o->entry.monotonic);
3007         }
3008
3009         if (to) {
3010                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3011                 if (r < 0)
3012                         return r;
3013
3014                 r = generic_array_get_plus_one(f,
3015                                                le64toh(o->data.entry_offset),
3016                                                le64toh(o->data.entry_array_offset),
3017                                                le64toh(o->data.n_entries)-1,
3018                                                &o, NULL);
3019                 if (r <= 0)
3020                         return r;
3021
3022                 *to = le64toh(o->entry.monotonic);
3023         }
3024
3025         return 1;
3026 }
3027
3028 bool journal_file_rotate_suggested(JournalFile *f) {
3029         assert(f);
3030
3031         /* If we gained new header fields we gained new features,
3032          * hence suggest a rotation */
3033         if (le64toh(f->header->header_size) < sizeof(Header)) {
3034                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3035                 return true;
3036         }
3037
3038         /* Let's check if the hash tables grew over a certain fill
3039          * level (75%, borrowing this value from Java's hash table
3040          * implementation), and if so suggest a rotation. To calculate
3041          * the fill level we need the n_data field, which only exists
3042          * in newer versions. */
3043
3044         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3045                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3046                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3047                                   f->path,
3048                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3049                                   (unsigned long long) le64toh(f->header->n_data),
3050                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3051                                   (unsigned long long) (f->last_stat.st_size),
3052                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
3053                         return true;
3054                 }
3055
3056         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3057                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3058                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3059                                   f->path,
3060                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3061                                   (unsigned long long) le64toh(f->header->n_fields),
3062                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
3063                         return true;
3064                 }
3065
3066         return false;
3067 }