chiark / gitweb /
journal: consistently use OBJECT_<type> names instead of numbers
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/xattr.h>
30
31 #include "journal-def.h"
32 #include "journal-file.h"
33 #include "journal-authenticate.h"
34 #include "lookup3.h"
35 #include "compress.h"
36 #include "fsprg.h"
37
38 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
39 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 /* How many entries to keep in the entry array chain cache at max */
66 #define CHAIN_CACHE_MAX 20
67
68 /* How much to increase the journal file size at once each time we allocate something new. */
69 #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
70
71 static int journal_file_set_online(JournalFile *f) {
72         assert(f);
73
74         if (!f->writable)
75                 return -EPERM;
76
77         if (!(f->fd >= 0 && f->header))
78                 return -EINVAL;
79
80         switch(f->header->state) {
81                 case STATE_ONLINE:
82                         return 0;
83
84                 case STATE_OFFLINE:
85                         f->header->state = STATE_ONLINE;
86                         fsync(f->fd);
87                         return 0;
88
89                 default:
90                         return -EINVAL;
91         }
92 }
93
94 int journal_file_set_offline(JournalFile *f) {
95         assert(f);
96
97         if (!f->writable)
98                 return -EPERM;
99
100         if (!(f->fd >= 0 && f->header))
101                 return -EINVAL;
102
103         if (f->header->state != STATE_ONLINE)
104                 return 0;
105
106         fsync(f->fd);
107
108         f->header->state = STATE_OFFLINE;
109
110         fsync(f->fd);
111
112         return 0;
113 }
114
115 void journal_file_close(JournalFile *f) {
116         assert(f);
117
118 #ifdef HAVE_GCRYPT
119         /* Write the final tag */
120         if (f->seal && f->writable)
121                 journal_file_append_tag(f);
122 #endif
123
124         /* Sync everything to disk, before we mark the file offline */
125         if (f->mmap && f->fd >= 0)
126                 mmap_cache_close_fd(f->mmap, f->fd);
127
128         journal_file_set_offline(f);
129
130         if (f->header)
131                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
132
133         safe_close(f->fd);
134         free(f->path);
135
136         if (f->mmap)
137                 mmap_cache_unref(f->mmap);
138
139         ordered_hashmap_free_free(f->chain_cache);
140
141 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
142         free(f->compress_buffer);
143 #endif
144
145 #ifdef HAVE_GCRYPT
146         if (f->fss_file)
147                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
148         else if (f->fsprg_state)
149                 free(f->fsprg_state);
150
151         free(f->fsprg_seed);
152
153         if (f->hmac)
154                 gcry_md_close(f->hmac);
155 #endif
156
157         free(f);
158 }
159
160 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
161         Header h = {};
162         ssize_t k;
163         int r;
164
165         assert(f);
166
167         memcpy(h.signature, HEADER_SIGNATURE, 8);
168         h.header_size = htole64(ALIGN64(sizeof(h)));
169
170         h.incompatible_flags |= htole32(
171                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
172                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
173
174         h.compatible_flags = htole32(
175                 f->seal * HEADER_COMPATIBLE_SEALED);
176
177         r = sd_id128_randomize(&h.file_id);
178         if (r < 0)
179                 return r;
180
181         if (template) {
182                 h.seqnum_id = template->header->seqnum_id;
183                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
184         } else
185                 h.seqnum_id = h.file_id;
186
187         k = pwrite(f->fd, &h, sizeof(h), 0);
188         if (k < 0)
189                 return -errno;
190
191         if (k != sizeof(h))
192                 return -EIO;
193
194         return 0;
195 }
196
197 static int journal_file_refresh_header(JournalFile *f) {
198         int r;
199         sd_id128_t boot_id;
200
201         assert(f);
202
203         r = sd_id128_get_machine(&f->header->machine_id);
204         if (r < 0)
205                 return r;
206
207         r = sd_id128_get_boot(&boot_id);
208         if (r < 0)
209                 return r;
210
211         if (sd_id128_equal(boot_id, f->header->boot_id))
212                 f->tail_entry_monotonic_valid = true;
213
214         f->header->boot_id = boot_id;
215
216         journal_file_set_online(f);
217
218         /* Sync the online state to disk */
219         fsync(f->fd);
220
221         return 0;
222 }
223
224 static int journal_file_verify_header(JournalFile *f) {
225         uint32_t flags;
226
227         assert(f);
228
229         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
230                 return -EBADMSG;
231
232         /* In both read and write mode we refuse to open files with
233          * incompatible flags we don't know */
234         flags = le32toh(f->header->incompatible_flags);
235         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
236                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
237                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
238                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
239                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
240                 if (flags)
241                         log_debug("Journal file %s uses incompatible flags %"PRIx32
242                                   " disabled at compilation time.", f->path, flags);
243                 return -EPROTONOSUPPORT;
244         }
245
246         /* When open for writing we refuse to open files with
247          * compatible flags, too */
248         flags = le32toh(f->header->compatible_flags);
249         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
250                 if (flags & ~HEADER_COMPATIBLE_ANY)
251                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
252                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
253                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
254                 if (flags)
255                         log_debug("Journal file %s uses compatible flags %"PRIx32
256                                   " disabled at compilation time.", f->path, flags);
257                 return -EPROTONOSUPPORT;
258         }
259
260         if (f->header->state >= _STATE_MAX)
261                 return -EBADMSG;
262
263         /* The first addition was n_data, so check that we are at least this large */
264         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
265                 return -EBADMSG;
266
267         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
268                 return -EBADMSG;
269
270         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
271                 return -ENODATA;
272
273         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
274                 return -ENODATA;
275
276         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
277             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
278             !VALID64(le64toh(f->header->tail_object_offset)) ||
279             !VALID64(le64toh(f->header->entry_array_offset)))
280                 return -ENODATA;
281
282         if (f->writable) {
283                 uint8_t state;
284                 sd_id128_t machine_id;
285                 int r;
286
287                 r = sd_id128_get_machine(&machine_id);
288                 if (r < 0)
289                         return r;
290
291                 if (!sd_id128_equal(machine_id, f->header->machine_id))
292                         return -EHOSTDOWN;
293
294                 state = f->header->state;
295
296                 if (state == STATE_ONLINE) {
297                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
298                         return -EBUSY;
299                 } else if (state == STATE_ARCHIVED)
300                         return -ESHUTDOWN;
301                 else if (state != STATE_OFFLINE) {
302                         log_debug("Journal file %s has unknown state %u.", f->path, state);
303                         return -EBUSY;
304                 }
305         }
306
307         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
308         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
309
310         f->seal = JOURNAL_HEADER_SEALED(f->header);
311
312         return 0;
313 }
314
315 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
316         uint64_t old_size, new_size;
317         int r;
318
319         assert(f);
320
321         /* We assume that this file is not sparse, and we know that
322          * for sure, since we always call posix_fallocate()
323          * ourselves */
324
325         old_size =
326                 le64toh(f->header->header_size) +
327                 le64toh(f->header->arena_size);
328
329         new_size = PAGE_ALIGN(offset + size);
330         if (new_size < le64toh(f->header->header_size))
331                 new_size = le64toh(f->header->header_size);
332
333         if (new_size <= old_size)
334                 return 0;
335
336         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
337                 return -E2BIG;
338
339         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
340                 struct statvfs svfs;
341
342                 if (fstatvfs(f->fd, &svfs) >= 0) {
343                         uint64_t available;
344
345                         available = svfs.f_bfree * svfs.f_bsize;
346
347                         if (available >= f->metrics.keep_free)
348                                 available -= f->metrics.keep_free;
349                         else
350                                 available = 0;
351
352                         if (new_size - old_size > available)
353                                 return -E2BIG;
354                 }
355         }
356
357         /* Increase by larger blocks at once */
358         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
359         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
360                 new_size = f->metrics.max_size;
361
362         /* Note that the glibc fallocate() fallback is very
363            inefficient, hence we try to minimize the allocation area
364            as we can. */
365         r = posix_fallocate(f->fd, old_size, new_size - old_size);
366         if (r != 0)
367                 return -r;
368
369         if (fstat(f->fd, &f->last_stat) < 0)
370                 return -errno;
371
372         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
373
374         return 0;
375 }
376
377 static unsigned type_to_context(int type) {
378         /* One context for each type, plus one catch-all for the rest */
379         return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
380 }
381
382 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
383         assert(f);
384         assert(ret);
385
386         if (size <= 0)
387                 return -EINVAL;
388
389         /* Avoid SIGBUS on invalid accesses */
390         if (offset + size > (uint64_t) f->last_stat.st_size) {
391                 /* Hmm, out of range? Let's refresh the fstat() data
392                  * first, before we trust that check. */
393
394                 if (fstat(f->fd, &f->last_stat) < 0 ||
395                     offset + size > (uint64_t) f->last_stat.st_size)
396                         return -EADDRNOTAVAIL;
397         }
398
399         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
400 }
401
402 static uint64_t minimum_header_size(Object *o) {
403
404         static const uint64_t table[] = {
405                 [OBJECT_DATA] = sizeof(DataObject),
406                 [OBJECT_FIELD] = sizeof(FieldObject),
407                 [OBJECT_ENTRY] = sizeof(EntryObject),
408                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
409                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
410                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
411                 [OBJECT_TAG] = sizeof(TagObject),
412         };
413
414         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
415                 return sizeof(ObjectHeader);
416
417         return table[o->object.type];
418 }
419
420 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
421         int r;
422         void *t;
423         Object *o;
424         uint64_t s;
425
426         assert(f);
427         assert(ret);
428
429         /* Objects may only be located at multiple of 64 bit */
430         if (!VALID64(offset))
431                 return -EFAULT;
432
433         r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
434         if (r < 0)
435                 return r;
436
437         o = (Object*) t;
438         s = le64toh(o->object.size);
439
440         if (s < sizeof(ObjectHeader))
441                 return -EBADMSG;
442
443         if (o->object.type <= OBJECT_UNUSED)
444                 return -EBADMSG;
445
446         if (s < minimum_header_size(o))
447                 return -EBADMSG;
448
449         if (type > OBJECT_UNUSED && o->object.type != type)
450                 return -EBADMSG;
451
452         if (s > sizeof(ObjectHeader)) {
453                 r = journal_file_move_to(f, type_to_context(type), false, offset, s, &t);
454                 if (r < 0)
455                         return r;
456
457                 o = (Object*) t;
458         }
459
460         *ret = o;
461         return 0;
462 }
463
464 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
465         uint64_t r;
466
467         assert(f);
468
469         r = le64toh(f->header->tail_entry_seqnum) + 1;
470
471         if (seqnum) {
472                 /* If an external seqnum counter was passed, we update
473                  * both the local and the external one, and set it to
474                  * the maximum of both */
475
476                 if (*seqnum + 1 > r)
477                         r = *seqnum + 1;
478
479                 *seqnum = r;
480         }
481
482         f->header->tail_entry_seqnum = htole64(r);
483
484         if (f->header->head_entry_seqnum == 0)
485                 f->header->head_entry_seqnum = htole64(r);
486
487         return r;
488 }
489
490 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
491         int r;
492         uint64_t p;
493         Object *tail, *o;
494         void *t;
495
496         assert(f);
497         assert(type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX);
498         assert(size >= sizeof(ObjectHeader));
499         assert(offset);
500         assert(ret);
501
502         r = journal_file_set_online(f);
503         if (r < 0)
504                 return r;
505
506         p = le64toh(f->header->tail_object_offset);
507         if (p == 0)
508                 p = le64toh(f->header->header_size);
509         else {
510                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
511                 if (r < 0)
512                         return r;
513
514                 p += ALIGN64(le64toh(tail->object.size));
515         }
516
517         r = journal_file_allocate(f, p, size);
518         if (r < 0)
519                 return r;
520
521         r = journal_file_move_to(f, type, false, p, size, &t);
522         if (r < 0)
523                 return r;
524
525         o = (Object*) t;
526
527         zero(o->object);
528         o->object.type = type;
529         o->object.size = htole64(size);
530
531         f->header->tail_object_offset = htole64(p);
532         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
533
534         *ret = o;
535         *offset = p;
536
537         return 0;
538 }
539
540 static int journal_file_setup_data_hash_table(JournalFile *f) {
541         uint64_t s, p;
542         Object *o;
543         int r;
544
545         assert(f);
546
547         /* We estimate that we need 1 hash table entry per 768 of
548            journal file and we want to make sure we never get beyond
549            75% fill level. Calculate the hash table size for the
550            maximum file size based on these metrics. */
551
552         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
553         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
554                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
555
556         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
557
558         r = journal_file_append_object(f,
559                                        OBJECT_DATA_HASH_TABLE,
560                                        offsetof(Object, hash_table.items) + s,
561                                        &o, &p);
562         if (r < 0)
563                 return r;
564
565         memzero(o->hash_table.items, s);
566
567         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
568         f->header->data_hash_table_size = htole64(s);
569
570         return 0;
571 }
572
573 static int journal_file_setup_field_hash_table(JournalFile *f) {
574         uint64_t s, p;
575         Object *o;
576         int r;
577
578         assert(f);
579
580         /* We use a fixed size hash table for the fields as this
581          * number should grow very slowly only */
582
583         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
584         r = journal_file_append_object(f,
585                                        OBJECT_FIELD_HASH_TABLE,
586                                        offsetof(Object, hash_table.items) + s,
587                                        &o, &p);
588         if (r < 0)
589                 return r;
590
591         memzero(o->hash_table.items, s);
592
593         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
594         f->header->field_hash_table_size = htole64(s);
595
596         return 0;
597 }
598
599 static int journal_file_map_data_hash_table(JournalFile *f) {
600         uint64_t s, p;
601         void *t;
602         int r;
603
604         assert(f);
605
606         p = le64toh(f->header->data_hash_table_offset);
607         s = le64toh(f->header->data_hash_table_size);
608
609         r = journal_file_move_to(f,
610                                  OBJECT_DATA_HASH_TABLE,
611                                  true,
612                                  p, s,
613                                  &t);
614         if (r < 0)
615                 return r;
616
617         f->data_hash_table = t;
618         return 0;
619 }
620
621 static int journal_file_map_field_hash_table(JournalFile *f) {
622         uint64_t s, p;
623         void *t;
624         int r;
625
626         assert(f);
627
628         p = le64toh(f->header->field_hash_table_offset);
629         s = le64toh(f->header->field_hash_table_size);
630
631         r = journal_file_move_to(f,
632                                  OBJECT_FIELD_HASH_TABLE,
633                                  true,
634                                  p, s,
635                                  &t);
636         if (r < 0)
637                 return r;
638
639         f->field_hash_table = t;
640         return 0;
641 }
642
643 static int journal_file_link_field(
644                 JournalFile *f,
645                 Object *o,
646                 uint64_t offset,
647                 uint64_t hash) {
648
649         uint64_t p, h;
650         int r;
651
652         assert(f);
653         assert(o);
654         assert(offset > 0);
655
656         if (o->object.type != OBJECT_FIELD)
657                 return -EINVAL;
658
659         /* This might alter the window we are looking at */
660
661         o->field.next_hash_offset = o->field.head_data_offset = 0;
662
663         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
664         p = le64toh(f->field_hash_table[h].tail_hash_offset);
665         if (p == 0)
666                 f->field_hash_table[h].head_hash_offset = htole64(offset);
667         else {
668                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
669                 if (r < 0)
670                         return r;
671
672                 o->field.next_hash_offset = htole64(offset);
673         }
674
675         f->field_hash_table[h].tail_hash_offset = htole64(offset);
676
677         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
678                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
679
680         return 0;
681 }
682
683 static int journal_file_link_data(
684                 JournalFile *f,
685                 Object *o,
686                 uint64_t offset,
687                 uint64_t hash) {
688
689         uint64_t p, h;
690         int r;
691
692         assert(f);
693         assert(o);
694         assert(offset > 0);
695
696         if (o->object.type != OBJECT_DATA)
697                 return -EINVAL;
698
699         /* This might alter the window we are looking at */
700
701         o->data.next_hash_offset = o->data.next_field_offset = 0;
702         o->data.entry_offset = o->data.entry_array_offset = 0;
703         o->data.n_entries = 0;
704
705         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
706         p = le64toh(f->data_hash_table[h].tail_hash_offset);
707         if (p == 0)
708                 /* Only entry in the hash table is easy */
709                 f->data_hash_table[h].head_hash_offset = htole64(offset);
710         else {
711                 /* Move back to the previous data object, to patch in
712                  * pointer */
713
714                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
715                 if (r < 0)
716                         return r;
717
718                 o->data.next_hash_offset = htole64(offset);
719         }
720
721         f->data_hash_table[h].tail_hash_offset = htole64(offset);
722
723         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
724                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
725
726         return 0;
727 }
728
729 int journal_file_find_field_object_with_hash(
730                 JournalFile *f,
731                 const void *field, uint64_t size, uint64_t hash,
732                 Object **ret, uint64_t *offset) {
733
734         uint64_t p, osize, h;
735         int r;
736
737         assert(f);
738         assert(field && size > 0);
739
740         osize = offsetof(Object, field.payload) + size;
741
742         if (f->header->field_hash_table_size == 0)
743                 return -EBADMSG;
744
745         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
746         p = le64toh(f->field_hash_table[h].head_hash_offset);
747
748         while (p > 0) {
749                 Object *o;
750
751                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
752                 if (r < 0)
753                         return r;
754
755                 if (le64toh(o->field.hash) == hash &&
756                     le64toh(o->object.size) == osize &&
757                     memcmp(o->field.payload, field, size) == 0) {
758
759                         if (ret)
760                                 *ret = o;
761                         if (offset)
762                                 *offset = p;
763
764                         return 1;
765                 }
766
767                 p = le64toh(o->field.next_hash_offset);
768         }
769
770         return 0;
771 }
772
773 int journal_file_find_field_object(
774                 JournalFile *f,
775                 const void *field, uint64_t size,
776                 Object **ret, uint64_t *offset) {
777
778         uint64_t hash;
779
780         assert(f);
781         assert(field && size > 0);
782
783         hash = hash64(field, size);
784
785         return journal_file_find_field_object_with_hash(f,
786                                                         field, size, hash,
787                                                         ret, offset);
788 }
789
790 int journal_file_find_data_object_with_hash(
791                 JournalFile *f,
792                 const void *data, uint64_t size, uint64_t hash,
793                 Object **ret, uint64_t *offset) {
794
795         uint64_t p, osize, h;
796         int r;
797
798         assert(f);
799         assert(data || size == 0);
800
801         osize = offsetof(Object, data.payload) + size;
802
803         if (f->header->data_hash_table_size == 0)
804                 return -EBADMSG;
805
806         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
807         p = le64toh(f->data_hash_table[h].head_hash_offset);
808
809         while (p > 0) {
810                 Object *o;
811
812                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
813                 if (r < 0)
814                         return r;
815
816                 if (le64toh(o->data.hash) != hash)
817                         goto next;
818
819                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
820 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
821                         uint64_t l;
822                         size_t rsize;
823
824                         l = le64toh(o->object.size);
825                         if (l <= offsetof(Object, data.payload))
826                                 return -EBADMSG;
827
828                         l -= offsetof(Object, data.payload);
829
830                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
831                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
832                         if (r < 0)
833                                 return r;
834
835                         if (rsize == size &&
836                             memcmp(f->compress_buffer, data, size) == 0) {
837
838                                 if (ret)
839                                         *ret = o;
840
841                                 if (offset)
842                                         *offset = p;
843
844                                 return 1;
845                         }
846 #else
847                         return -EPROTONOSUPPORT;
848 #endif
849                 } else if (le64toh(o->object.size) == osize &&
850                            memcmp(o->data.payload, data, size) == 0) {
851
852                         if (ret)
853                                 *ret = o;
854
855                         if (offset)
856                                 *offset = p;
857
858                         return 1;
859                 }
860
861         next:
862                 p = le64toh(o->data.next_hash_offset);
863         }
864
865         return 0;
866 }
867
868 int journal_file_find_data_object(
869                 JournalFile *f,
870                 const void *data, uint64_t size,
871                 Object **ret, uint64_t *offset) {
872
873         uint64_t hash;
874
875         assert(f);
876         assert(data || size == 0);
877
878         hash = hash64(data, size);
879
880         return journal_file_find_data_object_with_hash(f,
881                                                        data, size, hash,
882                                                        ret, offset);
883 }
884
885 static int journal_file_append_field(
886                 JournalFile *f,
887                 const void *field, uint64_t size,
888                 Object **ret, uint64_t *offset) {
889
890         uint64_t hash, p;
891         uint64_t osize;
892         Object *o;
893         int r;
894
895         assert(f);
896         assert(field && size > 0);
897
898         hash = hash64(field, size);
899
900         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
901         if (r < 0)
902                 return r;
903         else if (r > 0) {
904
905                 if (ret)
906                         *ret = o;
907
908                 if (offset)
909                         *offset = p;
910
911                 return 0;
912         }
913
914         osize = offsetof(Object, field.payload) + size;
915         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
916         if (r < 0)
917                 return r;
918
919         o->field.hash = htole64(hash);
920         memcpy(o->field.payload, field, size);
921
922         r = journal_file_link_field(f, o, p, hash);
923         if (r < 0)
924                 return r;
925
926         /* The linking might have altered the window, so let's
927          * refresh our pointer */
928         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
929         if (r < 0)
930                 return r;
931
932 #ifdef HAVE_GCRYPT
933         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
934         if (r < 0)
935                 return r;
936 #endif
937
938         if (ret)
939                 *ret = o;
940
941         if (offset)
942                 *offset = p;
943
944         return 0;
945 }
946
947 static int journal_file_append_data(
948                 JournalFile *f,
949                 const void *data, uint64_t size,
950                 Object **ret, uint64_t *offset) {
951
952         uint64_t hash, p;
953         uint64_t osize;
954         Object *o;
955         int r, compression = 0;
956         const void *eq;
957
958         assert(f);
959         assert(data || size == 0);
960
961         hash = hash64(data, size);
962
963         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
964         if (r < 0)
965                 return r;
966         else if (r > 0) {
967
968                 if (ret)
969                         *ret = o;
970
971                 if (offset)
972                         *offset = p;
973
974                 return 0;
975         }
976
977         osize = offsetof(Object, data.payload) + size;
978         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
979         if (r < 0)
980                 return r;
981
982         o->data.hash = htole64(hash);
983
984 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
985         if (f->compress_xz &&
986             size >= COMPRESSION_SIZE_THRESHOLD) {
987                 size_t rsize;
988
989                 compression = compress_blob(data, size, o->data.payload, &rsize);
990
991                 if (compression) {
992                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
993                         o->object.flags |= compression;
994
995                         log_debug("Compressed data object %"PRIu64" -> %zu using %s",
996                                   size, rsize, object_compressed_to_string(compression));
997                 }
998         }
999 #endif
1000
1001         if (!compression && size > 0)
1002                 memcpy(o->data.payload, data, size);
1003
1004         r = journal_file_link_data(f, o, p, hash);
1005         if (r < 0)
1006                 return r;
1007
1008         /* The linking might have altered the window, so let's
1009          * refresh our pointer */
1010         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1011         if (r < 0)
1012                 return r;
1013
1014         if (!data)
1015                 eq = NULL;
1016         else
1017                 eq = memchr(data, '=', size);
1018         if (eq && eq > data) {
1019                 Object *fo = NULL;
1020                 uint64_t fp;
1021
1022                 /* Create field object ... */
1023                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1024                 if (r < 0)
1025                         return r;
1026
1027                 /* ... and link it in. */
1028                 o->data.next_field_offset = fo->field.head_data_offset;
1029                 fo->field.head_data_offset = le64toh(p);
1030         }
1031
1032 #ifdef HAVE_GCRYPT
1033         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1034         if (r < 0)
1035                 return r;
1036 #endif
1037
1038         if (ret)
1039                 *ret = o;
1040
1041         if (offset)
1042                 *offset = p;
1043
1044         return 0;
1045 }
1046
1047 uint64_t journal_file_entry_n_items(Object *o) {
1048         assert(o);
1049
1050         if (o->object.type != OBJECT_ENTRY)
1051                 return 0;
1052
1053         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1054 }
1055
1056 uint64_t journal_file_entry_array_n_items(Object *o) {
1057         assert(o);
1058
1059         if (o->object.type != OBJECT_ENTRY_ARRAY)
1060                 return 0;
1061
1062         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1063 }
1064
1065 uint64_t journal_file_hash_table_n_items(Object *o) {
1066         assert(o);
1067
1068         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1069             o->object.type != OBJECT_FIELD_HASH_TABLE)
1070                 return 0;
1071
1072         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1073 }
1074
1075 static int link_entry_into_array(JournalFile *f,
1076                                  le64_t *first,
1077                                  le64_t *idx,
1078                                  uint64_t p) {
1079         int r;
1080         uint64_t n = 0, ap = 0, q, i, a, hidx;
1081         Object *o;
1082
1083         assert(f);
1084         assert(first);
1085         assert(idx);
1086         assert(p > 0);
1087
1088         a = le64toh(*first);
1089         i = hidx = le64toh(*idx);
1090         while (a > 0) {
1091
1092                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1093                 if (r < 0)
1094                         return r;
1095
1096                 n = journal_file_entry_array_n_items(o);
1097                 if (i < n) {
1098                         o->entry_array.items[i] = htole64(p);
1099                         *idx = htole64(hidx + 1);
1100                         return 0;
1101                 }
1102
1103                 i -= n;
1104                 ap = a;
1105                 a = le64toh(o->entry_array.next_entry_array_offset);
1106         }
1107
1108         if (hidx > n)
1109                 n = (hidx+1) * 2;
1110         else
1111                 n = n * 2;
1112
1113         if (n < 4)
1114                 n = 4;
1115
1116         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1117                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1118                                        &o, &q);
1119         if (r < 0)
1120                 return r;
1121
1122 #ifdef HAVE_GCRYPT
1123         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1124         if (r < 0)
1125                 return r;
1126 #endif
1127
1128         o->entry_array.items[i] = htole64(p);
1129
1130         if (ap == 0)
1131                 *first = htole64(q);
1132         else {
1133                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1134                 if (r < 0)
1135                         return r;
1136
1137                 o->entry_array.next_entry_array_offset = htole64(q);
1138         }
1139
1140         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1141                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1142
1143         *idx = htole64(hidx + 1);
1144
1145         return 0;
1146 }
1147
1148 static int link_entry_into_array_plus_one(JournalFile *f,
1149                                           le64_t *extra,
1150                                           le64_t *first,
1151                                           le64_t *idx,
1152                                           uint64_t p) {
1153
1154         int r;
1155
1156         assert(f);
1157         assert(extra);
1158         assert(first);
1159         assert(idx);
1160         assert(p > 0);
1161
1162         if (*idx == 0)
1163                 *extra = htole64(p);
1164         else {
1165                 le64_t i;
1166
1167                 i = htole64(le64toh(*idx) - 1);
1168                 r = link_entry_into_array(f, first, &i, p);
1169                 if (r < 0)
1170                         return r;
1171         }
1172
1173         *idx = htole64(le64toh(*idx) + 1);
1174         return 0;
1175 }
1176
1177 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1178         uint64_t p;
1179         int r;
1180         assert(f);
1181         assert(o);
1182         assert(offset > 0);
1183
1184         p = le64toh(o->entry.items[i].object_offset);
1185         if (p == 0)
1186                 return -EINVAL;
1187
1188         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1189         if (r < 0)
1190                 return r;
1191
1192         return link_entry_into_array_plus_one(f,
1193                                               &o->data.entry_offset,
1194                                               &o->data.entry_array_offset,
1195                                               &o->data.n_entries,
1196                                               offset);
1197 }
1198
1199 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1200         uint64_t n, i;
1201         int r;
1202
1203         assert(f);
1204         assert(o);
1205         assert(offset > 0);
1206
1207         if (o->object.type != OBJECT_ENTRY)
1208                 return -EINVAL;
1209
1210         __sync_synchronize();
1211
1212         /* Link up the entry itself */
1213         r = link_entry_into_array(f,
1214                                   &f->header->entry_array_offset,
1215                                   &f->header->n_entries,
1216                                   offset);
1217         if (r < 0)
1218                 return r;
1219
1220         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1221
1222         if (f->header->head_entry_realtime == 0)
1223                 f->header->head_entry_realtime = o->entry.realtime;
1224
1225         f->header->tail_entry_realtime = o->entry.realtime;
1226         f->header->tail_entry_monotonic = o->entry.monotonic;
1227
1228         f->tail_entry_monotonic_valid = true;
1229
1230         /* Link up the items */
1231         n = journal_file_entry_n_items(o);
1232         for (i = 0; i < n; i++) {
1233                 r = journal_file_link_entry_item(f, o, offset, i);
1234                 if (r < 0)
1235                         return r;
1236         }
1237
1238         return 0;
1239 }
1240
1241 static int journal_file_append_entry_internal(
1242                 JournalFile *f,
1243                 const dual_timestamp *ts,
1244                 uint64_t xor_hash,
1245                 const EntryItem items[], unsigned n_items,
1246                 uint64_t *seqnum,
1247                 Object **ret, uint64_t *offset) {
1248         uint64_t np;
1249         uint64_t osize;
1250         Object *o;
1251         int r;
1252
1253         assert(f);
1254         assert(items || n_items == 0);
1255         assert(ts);
1256
1257         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1258
1259         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1260         if (r < 0)
1261                 return r;
1262
1263         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1264         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1265         o->entry.realtime = htole64(ts->realtime);
1266         o->entry.monotonic = htole64(ts->monotonic);
1267         o->entry.xor_hash = htole64(xor_hash);
1268         o->entry.boot_id = f->header->boot_id;
1269
1270 #ifdef HAVE_GCRYPT
1271         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1272         if (r < 0)
1273                 return r;
1274 #endif
1275
1276         r = journal_file_link_entry(f, o, np);
1277         if (r < 0)
1278                 return r;
1279
1280         if (ret)
1281                 *ret = o;
1282
1283         if (offset)
1284                 *offset = np;
1285
1286         return 0;
1287 }
1288
1289 void journal_file_post_change(JournalFile *f) {
1290         assert(f);
1291
1292         /* inotify() does not receive IN_MODIFY events from file
1293          * accesses done via mmap(). After each access we hence
1294          * trigger IN_MODIFY by truncating the journal file to its
1295          * current size which triggers IN_MODIFY. */
1296
1297         __sync_synchronize();
1298
1299         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1300                 log_error_errno(errno, "Failed to truncate file to its own size: %m");
1301 }
1302
1303 static int entry_item_cmp(const void *_a, const void *_b) {
1304         const EntryItem *a = _a, *b = _b;
1305
1306         if (le64toh(a->object_offset) < le64toh(b->object_offset))
1307                 return -1;
1308         if (le64toh(a->object_offset) > le64toh(b->object_offset))
1309                 return 1;
1310         return 0;
1311 }
1312
1313 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1314         unsigned i;
1315         EntryItem *items;
1316         int r;
1317         uint64_t xor_hash = 0;
1318         struct dual_timestamp _ts;
1319
1320         assert(f);
1321         assert(iovec || n_iovec == 0);
1322
1323         if (!ts) {
1324                 dual_timestamp_get(&_ts);
1325                 ts = &_ts;
1326         }
1327
1328         if (f->tail_entry_monotonic_valid &&
1329             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1330                 return -EINVAL;
1331
1332 #ifdef HAVE_GCRYPT
1333         r = journal_file_maybe_append_tag(f, ts->realtime);
1334         if (r < 0)
1335                 return r;
1336 #endif
1337
1338         /* alloca() can't take 0, hence let's allocate at least one */
1339         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1340
1341         for (i = 0; i < n_iovec; i++) {
1342                 uint64_t p;
1343                 Object *o;
1344
1345                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1346                 if (r < 0)
1347                         return r;
1348
1349                 xor_hash ^= le64toh(o->data.hash);
1350                 items[i].object_offset = htole64(p);
1351                 items[i].hash = o->data.hash;
1352         }
1353
1354         /* Order by the position on disk, in order to improve seek
1355          * times for rotating media. */
1356         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1357
1358         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1359
1360         journal_file_post_change(f);
1361
1362         return r;
1363 }
1364
1365 typedef struct ChainCacheItem {
1366         uint64_t first; /* the array at the beginning of the chain */
1367         uint64_t array; /* the cached array */
1368         uint64_t begin; /* the first item in the cached array */
1369         uint64_t total; /* the total number of items in all arrays before this one in the chain */
1370         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1371 } ChainCacheItem;
1372
1373 static void chain_cache_put(
1374                 OrderedHashmap *h,
1375                 ChainCacheItem *ci,
1376                 uint64_t first,
1377                 uint64_t array,
1378                 uint64_t begin,
1379                 uint64_t total,
1380                 uint64_t last_index) {
1381
1382         if (!ci) {
1383                 /* If the chain item to cache for this chain is the
1384                  * first one it's not worth caching anything */
1385                 if (array == first)
1386                         return;
1387
1388                 if (ordered_hashmap_size(h) >= CHAIN_CACHE_MAX) {
1389                         ci = ordered_hashmap_steal_first(h);
1390                         assert(ci);
1391                 } else {
1392                         ci = new(ChainCacheItem, 1);
1393                         if (!ci)
1394                                 return;
1395                 }
1396
1397                 ci->first = first;
1398
1399                 if (ordered_hashmap_put(h, &ci->first, ci) < 0) {
1400                         free(ci);
1401                         return;
1402                 }
1403         } else
1404                 assert(ci->first == first);
1405
1406         ci->array = array;
1407         ci->begin = begin;
1408         ci->total = total;
1409         ci->last_index = last_index;
1410 }
1411
1412 static int generic_array_get(
1413                 JournalFile *f,
1414                 uint64_t first,
1415                 uint64_t i,
1416                 Object **ret, uint64_t *offset) {
1417
1418         Object *o;
1419         uint64_t p = 0, a, t = 0;
1420         int r;
1421         ChainCacheItem *ci;
1422
1423         assert(f);
1424
1425         a = first;
1426
1427         /* Try the chain cache first */
1428         ci = ordered_hashmap_get(f->chain_cache, &first);
1429         if (ci && i > ci->total) {
1430                 a = ci->array;
1431                 i -= ci->total;
1432                 t = ci->total;
1433         }
1434
1435         while (a > 0) {
1436                 uint64_t k;
1437
1438                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1439                 if (r < 0)
1440                         return r;
1441
1442                 k = journal_file_entry_array_n_items(o);
1443                 if (i < k) {
1444                         p = le64toh(o->entry_array.items[i]);
1445                         goto found;
1446                 }
1447
1448                 i -= k;
1449                 t += k;
1450                 a = le64toh(o->entry_array.next_entry_array_offset);
1451         }
1452
1453         return 0;
1454
1455 found:
1456         /* Let's cache this item for the next invocation */
1457         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1458
1459         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1460         if (r < 0)
1461                 return r;
1462
1463         if (ret)
1464                 *ret = o;
1465
1466         if (offset)
1467                 *offset = p;
1468
1469         return 1;
1470 }
1471
1472 static int generic_array_get_plus_one(
1473                 JournalFile *f,
1474                 uint64_t extra,
1475                 uint64_t first,
1476                 uint64_t i,
1477                 Object **ret, uint64_t *offset) {
1478
1479         Object *o;
1480
1481         assert(f);
1482
1483         if (i == 0) {
1484                 int r;
1485
1486                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1487                 if (r < 0)
1488                         return r;
1489
1490                 if (ret)
1491                         *ret = o;
1492
1493                 if (offset)
1494                         *offset = extra;
1495
1496                 return 1;
1497         }
1498
1499         return generic_array_get(f, first, i-1, ret, offset);
1500 }
1501
1502 enum {
1503         TEST_FOUND,
1504         TEST_LEFT,
1505         TEST_RIGHT
1506 };
1507
1508 static int generic_array_bisect(
1509                 JournalFile *f,
1510                 uint64_t first,
1511                 uint64_t n,
1512                 uint64_t needle,
1513                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1514                 direction_t direction,
1515                 Object **ret,
1516                 uint64_t *offset,
1517                 uint64_t *idx) {
1518
1519         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1520         bool subtract_one = false;
1521         Object *o, *array = NULL;
1522         int r;
1523         ChainCacheItem *ci;
1524
1525         assert(f);
1526         assert(test_object);
1527
1528         /* Start with the first array in the chain */
1529         a = first;
1530
1531         ci = ordered_hashmap_get(f->chain_cache, &first);
1532         if (ci && n > ci->total) {
1533                 /* Ah, we have iterated this bisection array chain
1534                  * previously! Let's see if we can skip ahead in the
1535                  * chain, as far as the last time. But we can't jump
1536                  * backwards in the chain, so let's check that
1537                  * first. */
1538
1539                 r = test_object(f, ci->begin, needle);
1540                 if (r < 0)
1541                         return r;
1542
1543                 if (r == TEST_LEFT) {
1544                         /* OK, what we are looking for is right of the
1545                          * begin of this EntryArray, so let's jump
1546                          * straight to previously cached array in the
1547                          * chain */
1548
1549                         a = ci->array;
1550                         n -= ci->total;
1551                         t = ci->total;
1552                         last_index = ci->last_index;
1553                 }
1554         }
1555
1556         while (a > 0) {
1557                 uint64_t left, right, k, lp;
1558
1559                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1560                 if (r < 0)
1561                         return r;
1562
1563                 k = journal_file_entry_array_n_items(array);
1564                 right = MIN(k, n);
1565                 if (right <= 0)
1566                         return 0;
1567
1568                 i = right - 1;
1569                 lp = p = le64toh(array->entry_array.items[i]);
1570                 if (p <= 0)
1571                         return -EBADMSG;
1572
1573                 r = test_object(f, p, needle);
1574                 if (r < 0)
1575                         return r;
1576
1577                 if (r == TEST_FOUND)
1578                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1579
1580                 if (r == TEST_RIGHT) {
1581                         left = 0;
1582                         right -= 1;
1583
1584                         if (last_index != (uint64_t) -1) {
1585                                 assert(last_index <= right);
1586
1587                                 /* If we cached the last index we
1588                                  * looked at, let's try to not to jump
1589                                  * too wildly around and see if we can
1590                                  * limit the range to look at early to
1591                                  * the immediate neighbors of the last
1592                                  * index we looked at. */
1593
1594                                 if (last_index > 0) {
1595                                         uint64_t x = last_index - 1;
1596
1597                                         p = le64toh(array->entry_array.items[x]);
1598                                         if (p <= 0)
1599                                                 return -EBADMSG;
1600
1601                                         r = test_object(f, p, needle);
1602                                         if (r < 0)
1603                                                 return r;
1604
1605                                         if (r == TEST_FOUND)
1606                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1607
1608                                         if (r == TEST_RIGHT)
1609                                                 right = x;
1610                                         else
1611                                                 left = x + 1;
1612                                 }
1613
1614                                 if (last_index < right) {
1615                                         uint64_t y = last_index + 1;
1616
1617                                         p = le64toh(array->entry_array.items[y]);
1618                                         if (p <= 0)
1619                                                 return -EBADMSG;
1620
1621                                         r = test_object(f, p, needle);
1622                                         if (r < 0)
1623                                                 return r;
1624
1625                                         if (r == TEST_FOUND)
1626                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1627
1628                                         if (r == TEST_RIGHT)
1629                                                 right = y;
1630                                         else
1631                                                 left = y + 1;
1632                                 }
1633                         }
1634
1635                         for (;;) {
1636                                 if (left == right) {
1637                                         if (direction == DIRECTION_UP)
1638                                                 subtract_one = true;
1639
1640                                         i = left;
1641                                         goto found;
1642                                 }
1643
1644                                 assert(left < right);
1645                                 i = (left + right) / 2;
1646
1647                                 p = le64toh(array->entry_array.items[i]);
1648                                 if (p <= 0)
1649                                         return -EBADMSG;
1650
1651                                 r = test_object(f, p, needle);
1652                                 if (r < 0)
1653                                         return r;
1654
1655                                 if (r == TEST_FOUND)
1656                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1657
1658                                 if (r == TEST_RIGHT)
1659                                         right = i;
1660                                 else
1661                                         left = i + 1;
1662                         }
1663                 }
1664
1665                 if (k >= n) {
1666                         if (direction == DIRECTION_UP) {
1667                                 i = n;
1668                                 subtract_one = true;
1669                                 goto found;
1670                         }
1671
1672                         return 0;
1673                 }
1674
1675                 last_p = lp;
1676
1677                 n -= k;
1678                 t += k;
1679                 last_index = (uint64_t) -1;
1680                 a = le64toh(array->entry_array.next_entry_array_offset);
1681         }
1682
1683         return 0;
1684
1685 found:
1686         if (subtract_one && t == 0 && i == 0)
1687                 return 0;
1688
1689         /* Let's cache this item for the next invocation */
1690         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1691
1692         if (subtract_one && i == 0)
1693                 p = last_p;
1694         else if (subtract_one)
1695                 p = le64toh(array->entry_array.items[i-1]);
1696         else
1697                 p = le64toh(array->entry_array.items[i]);
1698
1699         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1700         if (r < 0)
1701                 return r;
1702
1703         if (ret)
1704                 *ret = o;
1705
1706         if (offset)
1707                 *offset = p;
1708
1709         if (idx)
1710                 *idx = t + i + (subtract_one ? -1 : 0);
1711
1712         return 1;
1713 }
1714
1715
1716 static int generic_array_bisect_plus_one(
1717                 JournalFile *f,
1718                 uint64_t extra,
1719                 uint64_t first,
1720                 uint64_t n,
1721                 uint64_t needle,
1722                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1723                 direction_t direction,
1724                 Object **ret,
1725                 uint64_t *offset,
1726                 uint64_t *idx) {
1727
1728         int r;
1729         bool step_back = false;
1730         Object *o;
1731
1732         assert(f);
1733         assert(test_object);
1734
1735         if (n <= 0)
1736                 return 0;
1737
1738         /* This bisects the array in object 'first', but first checks
1739          * an extra  */
1740         r = test_object(f, extra, needle);
1741         if (r < 0)
1742                 return r;
1743
1744         if (r == TEST_FOUND)
1745                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1746
1747         /* if we are looking with DIRECTION_UP then we need to first
1748            see if in the actual array there is a matching entry, and
1749            return the last one of that. But if there isn't any we need
1750            to return this one. Hence remember this, and return it
1751            below. */
1752         if (r == TEST_LEFT)
1753                 step_back = direction == DIRECTION_UP;
1754
1755         if (r == TEST_RIGHT) {
1756                 if (direction == DIRECTION_DOWN)
1757                         goto found;
1758                 else
1759                         return 0;
1760         }
1761
1762         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1763
1764         if (r == 0 && step_back)
1765                 goto found;
1766
1767         if (r > 0 && idx)
1768                 (*idx) ++;
1769
1770         return r;
1771
1772 found:
1773         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1774         if (r < 0)
1775                 return r;
1776
1777         if (ret)
1778                 *ret = o;
1779
1780         if (offset)
1781                 *offset = extra;
1782
1783         if (idx)
1784                 *idx = 0;
1785
1786         return 1;
1787 }
1788
1789 _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1790         assert(f);
1791         assert(p > 0);
1792
1793         if (p == needle)
1794                 return TEST_FOUND;
1795         else if (p < needle)
1796                 return TEST_LEFT;
1797         else
1798                 return TEST_RIGHT;
1799 }
1800
1801 int journal_file_move_to_entry_by_offset(
1802                 JournalFile *f,
1803                 uint64_t p,
1804                 direction_t direction,
1805                 Object **ret,
1806                 uint64_t *offset) {
1807
1808         return generic_array_bisect(f,
1809                                     le64toh(f->header->entry_array_offset),
1810                                     le64toh(f->header->n_entries),
1811                                     p,
1812                                     test_object_offset,
1813                                     direction,
1814                                     ret, offset, NULL);
1815 }
1816
1817
1818 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1819         Object *o;
1820         int r;
1821
1822         assert(f);
1823         assert(p > 0);
1824
1825         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1826         if (r < 0)
1827                 return r;
1828
1829         if (le64toh(o->entry.seqnum) == needle)
1830                 return TEST_FOUND;
1831         else if (le64toh(o->entry.seqnum) < needle)
1832                 return TEST_LEFT;
1833         else
1834                 return TEST_RIGHT;
1835 }
1836
1837 int journal_file_move_to_entry_by_seqnum(
1838                 JournalFile *f,
1839                 uint64_t seqnum,
1840                 direction_t direction,
1841                 Object **ret,
1842                 uint64_t *offset) {
1843
1844         return generic_array_bisect(f,
1845                                     le64toh(f->header->entry_array_offset),
1846                                     le64toh(f->header->n_entries),
1847                                     seqnum,
1848                                     test_object_seqnum,
1849                                     direction,
1850                                     ret, offset, NULL);
1851 }
1852
1853 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1854         Object *o;
1855         int r;
1856
1857         assert(f);
1858         assert(p > 0);
1859
1860         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1861         if (r < 0)
1862                 return r;
1863
1864         if (le64toh(o->entry.realtime) == needle)
1865                 return TEST_FOUND;
1866         else if (le64toh(o->entry.realtime) < needle)
1867                 return TEST_LEFT;
1868         else
1869                 return TEST_RIGHT;
1870 }
1871
1872 int journal_file_move_to_entry_by_realtime(
1873                 JournalFile *f,
1874                 uint64_t realtime,
1875                 direction_t direction,
1876                 Object **ret,
1877                 uint64_t *offset) {
1878
1879         return generic_array_bisect(f,
1880                                     le64toh(f->header->entry_array_offset),
1881                                     le64toh(f->header->n_entries),
1882                                     realtime,
1883                                     test_object_realtime,
1884                                     direction,
1885                                     ret, offset, NULL);
1886 }
1887
1888 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1889         Object *o;
1890         int r;
1891
1892         assert(f);
1893         assert(p > 0);
1894
1895         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1896         if (r < 0)
1897                 return r;
1898
1899         if (le64toh(o->entry.monotonic) == needle)
1900                 return TEST_FOUND;
1901         else if (le64toh(o->entry.monotonic) < needle)
1902                 return TEST_LEFT;
1903         else
1904                 return TEST_RIGHT;
1905 }
1906
1907 static inline int find_data_object_by_boot_id(
1908                 JournalFile *f,
1909                 sd_id128_t boot_id,
1910                 Object **o,
1911                 uint64_t *b) {
1912         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1913
1914         sd_id128_to_string(boot_id, t + 9);
1915         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1916 }
1917
1918 int journal_file_move_to_entry_by_monotonic(
1919                 JournalFile *f,
1920                 sd_id128_t boot_id,
1921                 uint64_t monotonic,
1922                 direction_t direction,
1923                 Object **ret,
1924                 uint64_t *offset) {
1925
1926         Object *o;
1927         int r;
1928
1929         assert(f);
1930
1931         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1932         if (r < 0)
1933                 return r;
1934         if (r == 0)
1935                 return -ENOENT;
1936
1937         return generic_array_bisect_plus_one(f,
1938                                              le64toh(o->data.entry_offset),
1939                                              le64toh(o->data.entry_array_offset),
1940                                              le64toh(o->data.n_entries),
1941                                              monotonic,
1942                                              test_object_monotonic,
1943                                              direction,
1944                                              ret, offset, NULL);
1945 }
1946
1947 int journal_file_next_entry(
1948                 JournalFile *f,
1949                 Object *o, uint64_t p,
1950                 direction_t direction,
1951                 Object **ret, uint64_t *offset) {
1952
1953         uint64_t i, n, ofs;
1954         int r;
1955
1956         assert(f);
1957         assert(p > 0 || !o);
1958
1959         n = le64toh(f->header->n_entries);
1960         if (n <= 0)
1961                 return 0;
1962
1963         if (!o)
1964                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1965         else {
1966                 if (o->object.type != OBJECT_ENTRY)
1967                         return -EINVAL;
1968
1969                 r = generic_array_bisect(f,
1970                                          le64toh(f->header->entry_array_offset),
1971                                          le64toh(f->header->n_entries),
1972                                          p,
1973                                          test_object_offset,
1974                                          DIRECTION_DOWN,
1975                                          NULL, NULL,
1976                                          &i);
1977                 if (r <= 0)
1978                         return r;
1979
1980                 if (direction == DIRECTION_DOWN) {
1981                         if (i >= n - 1)
1982                                 return 0;
1983
1984                         i++;
1985                 } else {
1986                         if (i <= 0)
1987                                 return 0;
1988
1989                         i--;
1990                 }
1991         }
1992
1993         /* And jump to it */
1994         r = generic_array_get(f,
1995                               le64toh(f->header->entry_array_offset),
1996                               i,
1997                               ret, &ofs);
1998         if (r <= 0)
1999                 return r;
2000
2001         if (p > 0 &&
2002             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
2003                 log_debug("%s: entry array corrupted at entry %"PRIu64,
2004                           f->path, i);
2005                 return -EBADMSG;
2006         }
2007
2008         if (offset)
2009                 *offset = ofs;
2010
2011         return 1;
2012 }
2013
2014 int journal_file_skip_entry(
2015                 JournalFile *f,
2016                 Object *o, uint64_t p,
2017                 int64_t skip,
2018                 Object **ret, uint64_t *offset) {
2019
2020         uint64_t i, n;
2021         int r;
2022
2023         assert(f);
2024         assert(o);
2025         assert(p > 0);
2026
2027         if (o->object.type != OBJECT_ENTRY)
2028                 return -EINVAL;
2029
2030         r = generic_array_bisect(f,
2031                                  le64toh(f->header->entry_array_offset),
2032                                  le64toh(f->header->n_entries),
2033                                  p,
2034                                  test_object_offset,
2035                                  DIRECTION_DOWN,
2036                                  NULL, NULL,
2037                                  &i);
2038         if (r <= 0)
2039                 return r;
2040
2041         /* Calculate new index */
2042         if (skip < 0) {
2043                 if ((uint64_t) -skip >= i)
2044                         i = 0;
2045                 else
2046                         i = i - (uint64_t) -skip;
2047         } else
2048                 i  += (uint64_t) skip;
2049
2050         n = le64toh(f->header->n_entries);
2051         if (n <= 0)
2052                 return -EBADMSG;
2053
2054         if (i >= n)
2055                 i = n-1;
2056
2057         return generic_array_get(f,
2058                                  le64toh(f->header->entry_array_offset),
2059                                  i,
2060                                  ret, offset);
2061 }
2062
2063 int journal_file_next_entry_for_data(
2064                 JournalFile *f,
2065                 Object *o, uint64_t p,
2066                 uint64_t data_offset,
2067                 direction_t direction,
2068                 Object **ret, uint64_t *offset) {
2069
2070         uint64_t n, i;
2071         int r;
2072         Object *d;
2073
2074         assert(f);
2075         assert(p > 0 || !o);
2076
2077         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2078         if (r < 0)
2079                 return r;
2080
2081         n = le64toh(d->data.n_entries);
2082         if (n <= 0)
2083                 return n;
2084
2085         if (!o)
2086                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2087         else {
2088                 if (o->object.type != OBJECT_ENTRY)
2089                         return -EINVAL;
2090
2091                 r = generic_array_bisect_plus_one(f,
2092                                                   le64toh(d->data.entry_offset),
2093                                                   le64toh(d->data.entry_array_offset),
2094                                                   le64toh(d->data.n_entries),
2095                                                   p,
2096                                                   test_object_offset,
2097                                                   DIRECTION_DOWN,
2098                                                   NULL, NULL,
2099                                                   &i);
2100
2101                 if (r <= 0)
2102                         return r;
2103
2104                 if (direction == DIRECTION_DOWN) {
2105                         if (i >= n - 1)
2106                                 return 0;
2107
2108                         i++;
2109                 } else {
2110                         if (i <= 0)
2111                                 return 0;
2112
2113                         i--;
2114                 }
2115
2116         }
2117
2118         return generic_array_get_plus_one(f,
2119                                           le64toh(d->data.entry_offset),
2120                                           le64toh(d->data.entry_array_offset),
2121                                           i,
2122                                           ret, offset);
2123 }
2124
2125 int journal_file_move_to_entry_by_offset_for_data(
2126                 JournalFile *f,
2127                 uint64_t data_offset,
2128                 uint64_t p,
2129                 direction_t direction,
2130                 Object **ret, uint64_t *offset) {
2131
2132         int r;
2133         Object *d;
2134
2135         assert(f);
2136
2137         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2138         if (r < 0)
2139                 return r;
2140
2141         return generic_array_bisect_plus_one(f,
2142                                              le64toh(d->data.entry_offset),
2143                                              le64toh(d->data.entry_array_offset),
2144                                              le64toh(d->data.n_entries),
2145                                              p,
2146                                              test_object_offset,
2147                                              direction,
2148                                              ret, offset, NULL);
2149 }
2150
2151 int journal_file_move_to_entry_by_monotonic_for_data(
2152                 JournalFile *f,
2153                 uint64_t data_offset,
2154                 sd_id128_t boot_id,
2155                 uint64_t monotonic,
2156                 direction_t direction,
2157                 Object **ret, uint64_t *offset) {
2158
2159         Object *o, *d;
2160         int r;
2161         uint64_t b, z;
2162
2163         assert(f);
2164
2165         /* First, seek by time */
2166         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2167         if (r < 0)
2168                 return r;
2169         if (r == 0)
2170                 return -ENOENT;
2171
2172         r = generic_array_bisect_plus_one(f,
2173                                           le64toh(o->data.entry_offset),
2174                                           le64toh(o->data.entry_array_offset),
2175                                           le64toh(o->data.n_entries),
2176                                           monotonic,
2177                                           test_object_monotonic,
2178                                           direction,
2179                                           NULL, &z, NULL);
2180         if (r <= 0)
2181                 return r;
2182
2183         /* And now, continue seeking until we find an entry that
2184          * exists in both bisection arrays */
2185
2186         for (;;) {
2187                 Object *qo;
2188                 uint64_t p, q;
2189
2190                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2191                 if (r < 0)
2192                         return r;
2193
2194                 r = generic_array_bisect_plus_one(f,
2195                                                   le64toh(d->data.entry_offset),
2196                                                   le64toh(d->data.entry_array_offset),
2197                                                   le64toh(d->data.n_entries),
2198                                                   z,
2199                                                   test_object_offset,
2200                                                   direction,
2201                                                   NULL, &p, NULL);
2202                 if (r <= 0)
2203                         return r;
2204
2205                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2206                 if (r < 0)
2207                         return r;
2208
2209                 r = generic_array_bisect_plus_one(f,
2210                                                   le64toh(o->data.entry_offset),
2211                                                   le64toh(o->data.entry_array_offset),
2212                                                   le64toh(o->data.n_entries),
2213                                                   p,
2214                                                   test_object_offset,
2215                                                   direction,
2216                                                   &qo, &q, NULL);
2217
2218                 if (r <= 0)
2219                         return r;
2220
2221                 if (p == q) {
2222                         if (ret)
2223                                 *ret = qo;
2224                         if (offset)
2225                                 *offset = q;
2226
2227                         return 1;
2228                 }
2229
2230                 z = q;
2231         }
2232 }
2233
2234 int journal_file_move_to_entry_by_seqnum_for_data(
2235                 JournalFile *f,
2236                 uint64_t data_offset,
2237                 uint64_t seqnum,
2238                 direction_t direction,
2239                 Object **ret, uint64_t *offset) {
2240
2241         Object *d;
2242         int r;
2243
2244         assert(f);
2245
2246         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2247         if (r < 0)
2248                 return r;
2249
2250         return generic_array_bisect_plus_one(f,
2251                                              le64toh(d->data.entry_offset),
2252                                              le64toh(d->data.entry_array_offset),
2253                                              le64toh(d->data.n_entries),
2254                                              seqnum,
2255                                              test_object_seqnum,
2256                                              direction,
2257                                              ret, offset, NULL);
2258 }
2259
2260 int journal_file_move_to_entry_by_realtime_for_data(
2261                 JournalFile *f,
2262                 uint64_t data_offset,
2263                 uint64_t realtime,
2264                 direction_t direction,
2265                 Object **ret, uint64_t *offset) {
2266
2267         Object *d;
2268         int r;
2269
2270         assert(f);
2271
2272         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2273         if (r < 0)
2274                 return r;
2275
2276         return generic_array_bisect_plus_one(f,
2277                                              le64toh(d->data.entry_offset),
2278                                              le64toh(d->data.entry_array_offset),
2279                                              le64toh(d->data.n_entries),
2280                                              realtime,
2281                                              test_object_realtime,
2282                                              direction,
2283                                              ret, offset, NULL);
2284 }
2285
2286 void journal_file_dump(JournalFile *f) {
2287         Object *o;
2288         int r;
2289         uint64_t p;
2290
2291         assert(f);
2292
2293         journal_file_print_header(f);
2294
2295         p = le64toh(f->header->header_size);
2296         while (p != 0) {
2297                 r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
2298                 if (r < 0)
2299                         goto fail;
2300
2301                 switch (o->object.type) {
2302
2303                 case OBJECT_UNUSED:
2304                         printf("Type: OBJECT_UNUSED\n");
2305                         break;
2306
2307                 case OBJECT_DATA:
2308                         printf("Type: OBJECT_DATA\n");
2309                         break;
2310
2311                 case OBJECT_FIELD:
2312                         printf("Type: OBJECT_FIELD\n");
2313                         break;
2314
2315                 case OBJECT_ENTRY:
2316                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2317                                le64toh(o->entry.seqnum),
2318                                le64toh(o->entry.monotonic),
2319                                le64toh(o->entry.realtime));
2320                         break;
2321
2322                 case OBJECT_FIELD_HASH_TABLE:
2323                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2324                         break;
2325
2326                 case OBJECT_DATA_HASH_TABLE:
2327                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2328                         break;
2329
2330                 case OBJECT_ENTRY_ARRAY:
2331                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2332                         break;
2333
2334                 case OBJECT_TAG:
2335                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2336                                le64toh(o->tag.seqnum),
2337                                le64toh(o->tag.epoch));
2338                         break;
2339
2340                 default:
2341                         printf("Type: unknown (%u)\n", o->object.type);
2342                         break;
2343                 }
2344
2345                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2346                         printf("Flags: %s\n",
2347                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2348
2349                 if (p == le64toh(f->header->tail_object_offset))
2350                         p = 0;
2351                 else
2352                         p = p + ALIGN64(le64toh(o->object.size));
2353         }
2354
2355         return;
2356 fail:
2357         log_error("File corrupt");
2358 }
2359
2360 static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2361         const char *x;
2362
2363         x = format_timestamp(buf, l, t);
2364         if (x)
2365                 return x;
2366         return " --- ";
2367 }
2368
2369 void journal_file_print_header(JournalFile *f) {
2370         char a[33], b[33], c[33], d[33];
2371         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2372         struct stat st;
2373         char bytes[FORMAT_BYTES_MAX];
2374
2375         assert(f);
2376
2377         printf("File Path: %s\n"
2378                "File ID: %s\n"
2379                "Machine ID: %s\n"
2380                "Boot ID: %s\n"
2381                "Sequential Number ID: %s\n"
2382                "State: %s\n"
2383                "Compatible Flags:%s%s\n"
2384                "Incompatible Flags:%s%s%s\n"
2385                "Header size: %"PRIu64"\n"
2386                "Arena size: %"PRIu64"\n"
2387                "Data Hash Table Size: %"PRIu64"\n"
2388                "Field Hash Table Size: %"PRIu64"\n"
2389                "Rotate Suggested: %s\n"
2390                "Head Sequential Number: %"PRIu64"\n"
2391                "Tail Sequential Number: %"PRIu64"\n"
2392                "Head Realtime Timestamp: %s\n"
2393                "Tail Realtime Timestamp: %s\n"
2394                "Tail Monotonic Timestamp: %s\n"
2395                "Objects: %"PRIu64"\n"
2396                "Entry Objects: %"PRIu64"\n",
2397                f->path,
2398                sd_id128_to_string(f->header->file_id, a),
2399                sd_id128_to_string(f->header->machine_id, b),
2400                sd_id128_to_string(f->header->boot_id, c),
2401                sd_id128_to_string(f->header->seqnum_id, d),
2402                f->header->state == STATE_OFFLINE ? "OFFLINE" :
2403                f->header->state == STATE_ONLINE ? "ONLINE" :
2404                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2405                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2406                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2407                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2408                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2409                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2410                le64toh(f->header->header_size),
2411                le64toh(f->header->arena_size),
2412                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2413                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2414                yes_no(journal_file_rotate_suggested(f, 0)),
2415                le64toh(f->header->head_entry_seqnum),
2416                le64toh(f->header->tail_entry_seqnum),
2417                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2418                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2419                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2420                le64toh(f->header->n_objects),
2421                le64toh(f->header->n_entries));
2422
2423         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2424                 printf("Data Objects: %"PRIu64"\n"
2425                        "Data Hash Table Fill: %.1f%%\n",
2426                        le64toh(f->header->n_data),
2427                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2428
2429         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2430                 printf("Field Objects: %"PRIu64"\n"
2431                        "Field Hash Table Fill: %.1f%%\n",
2432                        le64toh(f->header->n_fields),
2433                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2434
2435         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2436                 printf("Tag Objects: %"PRIu64"\n",
2437                        le64toh(f->header->n_tags));
2438         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2439                 printf("Entry Array Objects: %"PRIu64"\n",
2440                        le64toh(f->header->n_entry_arrays));
2441
2442         if (fstat(f->fd, &st) >= 0)
2443                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2444 }
2445
2446 int journal_file_open(
2447                 const char *fname,
2448                 int flags,
2449                 mode_t mode,
2450                 bool compress,
2451                 bool seal,
2452                 JournalMetrics *metrics,
2453                 MMapCache *mmap_cache,
2454                 JournalFile *template,
2455                 JournalFile **ret) {
2456
2457         JournalFile *f;
2458         int r;
2459         bool newly_created = false;
2460
2461         assert(fname);
2462         assert(ret);
2463
2464         if ((flags & O_ACCMODE) != O_RDONLY &&
2465             (flags & O_ACCMODE) != O_RDWR)
2466                 return -EINVAL;
2467
2468         if (!endswith(fname, ".journal") &&
2469             !endswith(fname, ".journal~"))
2470                 return -EINVAL;
2471
2472         f = new0(JournalFile, 1);
2473         if (!f)
2474                 return -ENOMEM;
2475
2476         f->fd = -1;
2477         f->mode = mode;
2478
2479         f->flags = flags;
2480         f->prot = prot_from_flags(flags);
2481         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2482 #if defined(HAVE_LZ4)
2483         f->compress_lz4 = compress;
2484 #elif defined(HAVE_XZ)
2485         f->compress_xz = compress;
2486 #endif
2487 #ifdef HAVE_GCRYPT
2488         f->seal = seal;
2489 #endif
2490
2491         if (mmap_cache)
2492                 f->mmap = mmap_cache_ref(mmap_cache);
2493         else {
2494                 f->mmap = mmap_cache_new();
2495                 if (!f->mmap) {
2496                         r = -ENOMEM;
2497                         goto fail;
2498                 }
2499         }
2500
2501         f->path = strdup(fname);
2502         if (!f->path) {
2503                 r = -ENOMEM;
2504                 goto fail;
2505         }
2506
2507         f->chain_cache = ordered_hashmap_new(&uint64_hash_ops);
2508         if (!f->chain_cache) {
2509                 r = -ENOMEM;
2510                 goto fail;
2511         }
2512
2513         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2514         if (f->fd < 0) {
2515                 r = -errno;
2516                 goto fail;
2517         }
2518
2519         if (fstat(f->fd, &f->last_stat) < 0) {
2520                 r = -errno;
2521                 goto fail;
2522         }
2523
2524         if (f->last_stat.st_size == 0 && f->writable) {
2525                 uint64_t crtime;
2526
2527                 /* Let's attach the creation time to the journal file,
2528                  * so that the vacuuming code knows the age of this
2529                  * file even if the file might end up corrupted one
2530                  * day... Ideally we'd just use the creation time many
2531                  * file systems maintain for each file, but there is
2532                  * currently no usable API to query this, hence let's
2533                  * emulate this via extended attributes. If extended
2534                  * attributes are not supported we'll just skip this,
2535                  * and rely solely on mtime/atime/ctime of the file. */
2536
2537                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2538                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2539
2540 #ifdef HAVE_GCRYPT
2541                 /* Try to load the FSPRG state, and if we can't, then
2542                  * just don't do sealing */
2543                 if (f->seal) {
2544                         r = journal_file_fss_load(f);
2545                         if (r < 0)
2546                                 f->seal = false;
2547                 }
2548 #endif
2549
2550                 r = journal_file_init_header(f, template);
2551                 if (r < 0)
2552                         goto fail;
2553
2554                 if (fstat(f->fd, &f->last_stat) < 0) {
2555                         r = -errno;
2556                         goto fail;
2557                 }
2558
2559                 newly_created = true;
2560         }
2561
2562         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2563                 r = -EIO;
2564                 goto fail;
2565         }
2566
2567         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2568         if (f->header == MAP_FAILED) {
2569                 f->header = NULL;
2570                 r = -errno;
2571                 goto fail;
2572         }
2573
2574         if (!newly_created) {
2575                 r = journal_file_verify_header(f);
2576                 if (r < 0)
2577                         goto fail;
2578         }
2579
2580 #ifdef HAVE_GCRYPT
2581         if (!newly_created && f->writable) {
2582                 r = journal_file_fss_load(f);
2583                 if (r < 0)
2584                         goto fail;
2585         }
2586 #endif
2587
2588         if (f->writable) {
2589                 if (metrics) {
2590                         journal_default_metrics(metrics, f->fd);
2591                         f->metrics = *metrics;
2592                 } else if (template)
2593                         f->metrics = template->metrics;
2594
2595                 r = journal_file_refresh_header(f);
2596                 if (r < 0)
2597                         goto fail;
2598         }
2599
2600 #ifdef HAVE_GCRYPT
2601         r = journal_file_hmac_setup(f);
2602         if (r < 0)
2603                 goto fail;
2604 #endif
2605
2606         if (newly_created) {
2607                 r = journal_file_setup_field_hash_table(f);
2608                 if (r < 0)
2609                         goto fail;
2610
2611                 r = journal_file_setup_data_hash_table(f);
2612                 if (r < 0)
2613                         goto fail;
2614
2615 #ifdef HAVE_GCRYPT
2616                 r = journal_file_append_first_tag(f);
2617                 if (r < 0)
2618                         goto fail;
2619 #endif
2620         }
2621
2622         r = journal_file_map_field_hash_table(f);
2623         if (r < 0)
2624                 goto fail;
2625
2626         r = journal_file_map_data_hash_table(f);
2627         if (r < 0)
2628                 goto fail;
2629
2630         *ret = f;
2631         return 0;
2632
2633 fail:
2634         journal_file_close(f);
2635
2636         return r;
2637 }
2638
2639 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2640         _cleanup_free_ char *p = NULL;
2641         size_t l;
2642         JournalFile *old_file, *new_file = NULL;
2643         int r;
2644
2645         assert(f);
2646         assert(*f);
2647
2648         old_file = *f;
2649
2650         if (!old_file->writable)
2651                 return -EINVAL;
2652
2653         if (!endswith(old_file->path, ".journal"))
2654                 return -EINVAL;
2655
2656         l = strlen(old_file->path);
2657         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2658                      (int) l - 8, old_file->path,
2659                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2660                      le64toh((*f)->header->head_entry_seqnum),
2661                      le64toh((*f)->header->head_entry_realtime));
2662         if (r < 0)
2663                 return -ENOMEM;
2664
2665         r = rename(old_file->path, p);
2666         if (r < 0)
2667                 return -errno;
2668
2669         old_file->header->state = STATE_ARCHIVED;
2670
2671         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2672         journal_file_close(old_file);
2673
2674         *f = new_file;
2675         return r;
2676 }
2677
2678 int journal_file_open_reliably(
2679                 const char *fname,
2680                 int flags,
2681                 mode_t mode,
2682                 bool compress,
2683                 bool seal,
2684                 JournalMetrics *metrics,
2685                 MMapCache *mmap_cache,
2686                 JournalFile *template,
2687                 JournalFile **ret) {
2688
2689         int r;
2690         size_t l;
2691         _cleanup_free_ char *p = NULL;
2692
2693         r = journal_file_open(fname, flags, mode, compress, seal,
2694                               metrics, mmap_cache, template, ret);
2695         if (r != -EBADMSG && /* corrupted */
2696             r != -ENODATA && /* truncated */
2697             r != -EHOSTDOWN && /* other machine */
2698             r != -EPROTONOSUPPORT && /* incompatible feature */
2699             r != -EBUSY && /* unclean shutdown */
2700             r != -ESHUTDOWN /* already archived */)
2701                 return r;
2702
2703         if ((flags & O_ACCMODE) == O_RDONLY)
2704                 return r;
2705
2706         if (!(flags & O_CREAT))
2707                 return r;
2708
2709         if (!endswith(fname, ".journal"))
2710                 return r;
2711
2712         /* The file is corrupted. Rotate it away and try it again (but only once) */
2713
2714         l = strlen(fname);
2715         if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2716                      (int) l - 8, fname,
2717                      (unsigned long long) now(CLOCK_REALTIME),
2718                      random_u64()) < 0)
2719                 return -ENOMEM;
2720
2721         r = rename(fname, p);
2722         if (r < 0)
2723                 return -errno;
2724
2725         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2726
2727         return journal_file_open(fname, flags, mode, compress, seal,
2728                                  metrics, mmap_cache, template, ret);
2729 }
2730
2731 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2732         uint64_t i, n;
2733         uint64_t q, xor_hash = 0;
2734         int r;
2735         EntryItem *items;
2736         dual_timestamp ts;
2737
2738         assert(from);
2739         assert(to);
2740         assert(o);
2741         assert(p);
2742
2743         if (!to->writable)
2744                 return -EPERM;
2745
2746         ts.monotonic = le64toh(o->entry.monotonic);
2747         ts.realtime = le64toh(o->entry.realtime);
2748
2749         n = journal_file_entry_n_items(o);
2750         /* alloca() can't take 0, hence let's allocate at least one */
2751         items = alloca(sizeof(EntryItem) * MAX(1u, n));
2752
2753         for (i = 0; i < n; i++) {
2754                 uint64_t l, h;
2755                 le64_t le_hash;
2756                 size_t t;
2757                 void *data;
2758                 Object *u;
2759
2760                 q = le64toh(o->entry.items[i].object_offset);
2761                 le_hash = o->entry.items[i].hash;
2762
2763                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2764                 if (r < 0)
2765                         return r;
2766
2767                 if (le_hash != o->data.hash)
2768                         return -EBADMSG;
2769
2770                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2771                 t = (size_t) l;
2772
2773                 /* We hit the limit on 32bit machines */
2774                 if ((uint64_t) t != l)
2775                         return -E2BIG;
2776
2777                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2778 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2779                         size_t rsize;
2780
2781                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2782                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2783                         if (r < 0)
2784                                 return r;
2785
2786                         data = from->compress_buffer;
2787                         l = rsize;
2788 #else
2789                         return -EPROTONOSUPPORT;
2790 #endif
2791                 } else
2792                         data = o->data.payload;
2793
2794                 r = journal_file_append_data(to, data, l, &u, &h);
2795                 if (r < 0)
2796                         return r;
2797
2798                 xor_hash ^= le64toh(u->data.hash);
2799                 items[i].object_offset = htole64(h);
2800                 items[i].hash = u->data.hash;
2801
2802                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2803                 if (r < 0)
2804                         return r;
2805         }
2806
2807         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2808 }
2809
2810 void journal_default_metrics(JournalMetrics *m, int fd) {
2811         uint64_t fs_size = 0;
2812         struct statvfs ss;
2813         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2814
2815         assert(m);
2816         assert(fd >= 0);
2817
2818         if (fstatvfs(fd, &ss) >= 0)
2819                 fs_size = ss.f_frsize * ss.f_blocks;
2820
2821         if (m->max_use == (uint64_t) -1) {
2822
2823                 if (fs_size > 0) {
2824                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2825
2826                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2827                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2828
2829                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2830                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2831                 } else
2832                         m->max_use = DEFAULT_MAX_USE_LOWER;
2833         } else {
2834                 m->max_use = PAGE_ALIGN(m->max_use);
2835
2836                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2837                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2838         }
2839
2840         if (m->max_size == (uint64_t) -1) {
2841                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2842
2843                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2844                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2845         } else
2846                 m->max_size = PAGE_ALIGN(m->max_size);
2847
2848         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2849                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2850
2851         if (m->max_size*2 > m->max_use)
2852                 m->max_use = m->max_size*2;
2853
2854         if (m->min_size == (uint64_t) -1)
2855                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2856         else {
2857                 m->min_size = PAGE_ALIGN(m->min_size);
2858
2859                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2860                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2861
2862                 if (m->min_size > m->max_size)
2863                         m->max_size = m->min_size;
2864         }
2865
2866         if (m->keep_free == (uint64_t) -1) {
2867
2868                 if (fs_size > 0) {
2869                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2870
2871                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2872                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2873
2874                 } else
2875                         m->keep_free = DEFAULT_KEEP_FREE;
2876         }
2877
2878         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2879                   format_bytes(a, sizeof(a), m->max_use),
2880                   format_bytes(b, sizeof(b), m->max_size),
2881                   format_bytes(c, sizeof(c), m->min_size),
2882                   format_bytes(d, sizeof(d), m->keep_free));
2883 }
2884
2885 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2886         assert(f);
2887         assert(from || to);
2888
2889         if (from) {
2890                 if (f->header->head_entry_realtime == 0)
2891                         return -ENOENT;
2892
2893                 *from = le64toh(f->header->head_entry_realtime);
2894         }
2895
2896         if (to) {
2897                 if (f->header->tail_entry_realtime == 0)
2898                         return -ENOENT;
2899
2900                 *to = le64toh(f->header->tail_entry_realtime);
2901         }
2902
2903         return 1;
2904 }
2905
2906 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2907         Object *o;
2908         uint64_t p;
2909         int r;
2910
2911         assert(f);
2912         assert(from || to);
2913
2914         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2915         if (r <= 0)
2916                 return r;
2917
2918         if (le64toh(o->data.n_entries) <= 0)
2919                 return 0;
2920
2921         if (from) {
2922                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2923                 if (r < 0)
2924                         return r;
2925
2926                 *from = le64toh(o->entry.monotonic);
2927         }
2928
2929         if (to) {
2930                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2931                 if (r < 0)
2932                         return r;
2933
2934                 r = generic_array_get_plus_one(f,
2935                                                le64toh(o->data.entry_offset),
2936                                                le64toh(o->data.entry_array_offset),
2937                                                le64toh(o->data.n_entries)-1,
2938                                                &o, NULL);
2939                 if (r <= 0)
2940                         return r;
2941
2942                 *to = le64toh(o->entry.monotonic);
2943         }
2944
2945         return 1;
2946 }
2947
2948 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2949         assert(f);
2950
2951         /* If we gained new header fields we gained new features,
2952          * hence suggest a rotation */
2953         if (le64toh(f->header->header_size) < sizeof(Header)) {
2954                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2955                 return true;
2956         }
2957
2958         /* Let's check if the hash tables grew over a certain fill
2959          * level (75%, borrowing this value from Java's hash table
2960          * implementation), and if so suggest a rotation. To calculate
2961          * the fill level we need the n_data field, which only exists
2962          * in newer versions. */
2963
2964         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2965                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2966                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
2967                                   f->path,
2968                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2969                                   le64toh(f->header->n_data),
2970                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2971                                   (unsigned long long) f->last_stat.st_size,
2972                                   f->last_stat.st_size / le64toh(f->header->n_data));
2973                         return true;
2974                 }
2975
2976         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2977                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2978                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
2979                                   f->path,
2980                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2981                                   le64toh(f->header->n_fields),
2982                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
2983                         return true;
2984                 }
2985
2986         /* Are the data objects properly indexed by field objects? */
2987         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2988             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2989             le64toh(f->header->n_data) > 0 &&
2990             le64toh(f->header->n_fields) == 0)
2991                 return true;
2992
2993         if (max_file_usec > 0) {
2994                 usec_t t, h;
2995
2996                 h = le64toh(f->header->head_entry_realtime);
2997                 t = now(CLOCK_REALTIME);
2998
2999                 if (h > 0 && t > h + max_file_usec)
3000                         return true;
3001         }
3002
3003         return false;
3004 }