chiark / gitweb /
Fix build without any compression enabled
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29 #include <sys/xattr.h>
30
31 #include "journal-def.h"
32 #include "journal-file.h"
33 #include "journal-authenticate.h"
34 #include "lookup3.h"
35 #include "compress.h"
36 #include "fsprg.h"
37
38 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
39 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (4ULL*1024ULL*1024ULL)           /* 4 MiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 /* How many entries to keep in the entry array chain cache at max */
66 #define CHAIN_CACHE_MAX 20
67
68 /* How much to increase the journal file size at once each time we allocate something new. */
69 #define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL)              /* 8MB */
70
71 static int journal_file_set_online(JournalFile *f) {
72         assert(f);
73
74         if (!f->writable)
75                 return -EPERM;
76
77         if (!(f->fd >= 0 && f->header))
78                 return -EINVAL;
79
80         switch(f->header->state) {
81                 case STATE_ONLINE:
82                         return 0;
83
84                 case STATE_OFFLINE:
85                         f->header->state = STATE_ONLINE;
86                         fsync(f->fd);
87                         return 0;
88
89                 default:
90                         return -EINVAL;
91         }
92 }
93
94 int journal_file_set_offline(JournalFile *f) {
95         assert(f);
96
97         if (!f->writable)
98                 return -EPERM;
99
100         if (!(f->fd >= 0 && f->header))
101                 return -EINVAL;
102
103         if (f->header->state != STATE_ONLINE)
104                 return 0;
105
106         fsync(f->fd);
107
108         f->header->state = STATE_OFFLINE;
109
110         fsync(f->fd);
111
112         return 0;
113 }
114
115 void journal_file_close(JournalFile *f) {
116         assert(f);
117
118 #ifdef HAVE_GCRYPT
119         /* Write the final tag */
120         if (f->seal && f->writable)
121                 journal_file_append_tag(f);
122 #endif
123
124         /* Sync everything to disk, before we mark the file offline */
125         if (f->mmap && f->fd >= 0)
126                 mmap_cache_close_fd(f->mmap, f->fd);
127
128         journal_file_set_offline(f);
129
130         if (f->header)
131                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
132
133         safe_close(f->fd);
134         free(f->path);
135
136         if (f->mmap)
137                 mmap_cache_unref(f->mmap);
138
139         hashmap_free_free(f->chain_cache);
140
141 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
142         free(f->compress_buffer);
143 #endif
144
145 #ifdef HAVE_GCRYPT
146         if (f->fss_file)
147                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
148         else if (f->fsprg_state)
149                 free(f->fsprg_state);
150
151         free(f->fsprg_seed);
152
153         if (f->hmac)
154                 gcry_md_close(f->hmac);
155 #endif
156
157         free(f);
158 }
159
160 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
161         Header h = {};
162         ssize_t k;
163         int r;
164
165         assert(f);
166
167         memcpy(h.signature, HEADER_SIGNATURE, 8);
168         h.header_size = htole64(ALIGN64(sizeof(h)));
169
170         h.incompatible_flags |= htole32(
171                 f->compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
172                 f->compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4);
173
174         h.compatible_flags = htole32(
175                 f->seal * HEADER_COMPATIBLE_SEALED);
176
177         r = sd_id128_randomize(&h.file_id);
178         if (r < 0)
179                 return r;
180
181         if (template) {
182                 h.seqnum_id = template->header->seqnum_id;
183                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
184         } else
185                 h.seqnum_id = h.file_id;
186
187         k = pwrite(f->fd, &h, sizeof(h), 0);
188         if (k < 0)
189                 return -errno;
190
191         if (k != sizeof(h))
192                 return -EIO;
193
194         return 0;
195 }
196
197 static int journal_file_refresh_header(JournalFile *f) {
198         int r;
199         sd_id128_t boot_id;
200
201         assert(f);
202
203         r = sd_id128_get_machine(&f->header->machine_id);
204         if (r < 0)
205                 return r;
206
207         r = sd_id128_get_boot(&boot_id);
208         if (r < 0)
209                 return r;
210
211         if (sd_id128_equal(boot_id, f->header->boot_id))
212                 f->tail_entry_monotonic_valid = true;
213
214         f->header->boot_id = boot_id;
215
216         journal_file_set_online(f);
217
218         /* Sync the online state to disk */
219         fsync(f->fd);
220
221         return 0;
222 }
223
224 static int journal_file_verify_header(JournalFile *f) {
225         uint32_t flags;
226
227         assert(f);
228
229         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
230                 return -EBADMSG;
231
232         /* In both read and write mode we refuse to open files with
233          * incompatible flags we don't know */
234         flags = le32toh(f->header->incompatible_flags);
235         if (flags & ~HEADER_INCOMPATIBLE_SUPPORTED) {
236                 if (flags & ~HEADER_INCOMPATIBLE_ANY)
237                         log_debug("Journal file %s has unknown incompatible flags %"PRIx32,
238                                   f->path, flags & ~HEADER_INCOMPATIBLE_ANY);
239                 flags = (flags & HEADER_INCOMPATIBLE_ANY) & ~HEADER_INCOMPATIBLE_SUPPORTED;
240                 if (flags)
241                         log_debug("Journal file %s uses incompatible flags %"PRIx32
242                                   " disabled at compilation time.", f->path, flags);
243                 return -EPROTONOSUPPORT;
244         }
245
246         /* When open for writing we refuse to open files with
247          * compatible flags, too */
248         flags = le32toh(f->header->compatible_flags);
249         if (f->writable && (flags & ~HEADER_COMPATIBLE_SUPPORTED)) {
250                 if (flags & ~HEADER_COMPATIBLE_ANY)
251                         log_debug("Journal file %s has unknown compatible flags %"PRIx32,
252                                   f->path, flags & ~HEADER_COMPATIBLE_ANY);
253                 flags = (flags & HEADER_COMPATIBLE_ANY) & ~HEADER_COMPATIBLE_SUPPORTED;
254                 if (flags)
255                         log_debug("Journal file %s uses compatible flags %"PRIx32
256                                   " disabled at compilation time.", f->path, flags);
257                 return -EPROTONOSUPPORT;
258         }
259
260         if (f->header->state >= _STATE_MAX)
261                 return -EBADMSG;
262
263         /* The first addition was n_data, so check that we are at least this large */
264         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
265                 return -EBADMSG;
266
267         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
268                 return -EBADMSG;
269
270         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
271                 return -ENODATA;
272
273         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
274                 return -ENODATA;
275
276         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
277             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
278             !VALID64(le64toh(f->header->tail_object_offset)) ||
279             !VALID64(le64toh(f->header->entry_array_offset)))
280                 return -ENODATA;
281
282         if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
283             le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
284             le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
285             le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
286                 return -ENODATA;
287
288         if (f->writable) {
289                 uint8_t state;
290                 sd_id128_t machine_id;
291                 int r;
292
293                 r = sd_id128_get_machine(&machine_id);
294                 if (r < 0)
295                         return r;
296
297                 if (!sd_id128_equal(machine_id, f->header->machine_id))
298                         return -EHOSTDOWN;
299
300                 state = f->header->state;
301
302                 if (state == STATE_ONLINE) {
303                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
304                         return -EBUSY;
305                 } else if (state == STATE_ARCHIVED)
306                         return -ESHUTDOWN;
307                 else if (state != STATE_OFFLINE) {
308                         log_debug("Journal file %s has unknown state %u.", f->path, state);
309                         return -EBUSY;
310                 }
311         }
312
313         f->compress_xz = JOURNAL_HEADER_COMPRESSED_XZ(f->header);
314         f->compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4(f->header);
315
316         f->seal = JOURNAL_HEADER_SEALED(f->header);
317
318         return 0;
319 }
320
321 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
322         uint64_t old_size, new_size;
323         int r;
324
325         assert(f);
326
327         /* We assume that this file is not sparse, and we know that
328          * for sure, since we always call posix_fallocate()
329          * ourselves */
330
331         old_size =
332                 le64toh(f->header->header_size) +
333                 le64toh(f->header->arena_size);
334
335         new_size = PAGE_ALIGN(offset + size);
336         if (new_size < le64toh(f->header->header_size))
337                 new_size = le64toh(f->header->header_size);
338
339         if (new_size <= old_size)
340                 return 0;
341
342         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
343                 return -E2BIG;
344
345         if (new_size > f->metrics.min_size && f->metrics.keep_free > 0) {
346                 struct statvfs svfs;
347
348                 if (fstatvfs(f->fd, &svfs) >= 0) {
349                         uint64_t available;
350
351                         available = svfs.f_bfree * svfs.f_bsize;
352
353                         if (available >= f->metrics.keep_free)
354                                 available -= f->metrics.keep_free;
355                         else
356                                 available = 0;
357
358                         if (new_size - old_size > available)
359                                 return -E2BIG;
360                 }
361         }
362
363         /* Increase by larger blocks at once */
364         new_size = ((new_size+FILE_SIZE_INCREASE-1) / FILE_SIZE_INCREASE) * FILE_SIZE_INCREASE;
365         if (f->metrics.max_size > 0 && new_size > f->metrics.max_size)
366                 new_size = f->metrics.max_size;
367
368         /* Note that the glibc fallocate() fallback is very
369            inefficient, hence we try to minimize the allocation area
370            as we can. */
371         r = posix_fallocate(f->fd, old_size, new_size - old_size);
372         if (r != 0)
373                 return -r;
374
375         if (fstat(f->fd, &f->last_stat) < 0)
376                 return -errno;
377
378         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
379
380         return 0;
381 }
382
383 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
384         assert(f);
385         assert(ret);
386
387         if (size <= 0)
388                 return -EINVAL;
389
390         /* Avoid SIGBUS on invalid accesses */
391         if (offset + size > (uint64_t) f->last_stat.st_size) {
392                 /* Hmm, out of range? Let's refresh the fstat() data
393                  * first, before we trust that check. */
394
395                 if (fstat(f->fd, &f->last_stat) < 0 ||
396                     offset + size > (uint64_t) f->last_stat.st_size)
397                         return -EADDRNOTAVAIL;
398         }
399
400         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
401 }
402
403 static uint64_t minimum_header_size(Object *o) {
404
405         static const uint64_t table[] = {
406                 [OBJECT_DATA] = sizeof(DataObject),
407                 [OBJECT_FIELD] = sizeof(FieldObject),
408                 [OBJECT_ENTRY] = sizeof(EntryObject),
409                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
410                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
411                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
412                 [OBJECT_TAG] = sizeof(TagObject),
413         };
414
415         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
416                 return sizeof(ObjectHeader);
417
418         return table[o->object.type];
419 }
420
421 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
422         int r;
423         void *t;
424         Object *o;
425         uint64_t s;
426
427         assert(f);
428         assert(ret);
429
430         /* Objects may only be located at multiple of 64 bit */
431         if (!VALID64(offset))
432                 return -EFAULT;
433
434
435         r = journal_file_move_to(f, type_to_context(type), false, offset, sizeof(ObjectHeader), &t);
436         if (r < 0)
437                 return r;
438
439         o = (Object*) t;
440         s = le64toh(o->object.size);
441
442         if (s < sizeof(ObjectHeader))
443                 return -EBADMSG;
444
445         if (o->object.type <= OBJECT_UNUSED)
446                 return -EBADMSG;
447
448         if (s < minimum_header_size(o))
449                 return -EBADMSG;
450
451         if (type > 0 && o->object.type != type)
452                 return -EBADMSG;
453
454         if (s > sizeof(ObjectHeader)) {
455                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
456                 if (r < 0)
457                         return r;
458
459                 o = (Object*) t;
460         }
461
462         *ret = o;
463         return 0;
464 }
465
466 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
467         uint64_t r;
468
469         assert(f);
470
471         r = le64toh(f->header->tail_entry_seqnum) + 1;
472
473         if (seqnum) {
474                 /* If an external seqnum counter was passed, we update
475                  * both the local and the external one, and set it to
476                  * the maximum of both */
477
478                 if (*seqnum + 1 > r)
479                         r = *seqnum + 1;
480
481                 *seqnum = r;
482         }
483
484         f->header->tail_entry_seqnum = htole64(r);
485
486         if (f->header->head_entry_seqnum == 0)
487                 f->header->head_entry_seqnum = htole64(r);
488
489         return r;
490 }
491
492 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
493         int r;
494         uint64_t p;
495         Object *tail, *o;
496         void *t;
497
498         assert(f);
499         assert(type > 0 && type < _OBJECT_TYPE_MAX);
500         assert(size >= sizeof(ObjectHeader));
501         assert(offset);
502         assert(ret);
503
504         r = journal_file_set_online(f);
505         if (r < 0)
506                 return r;
507
508         p = le64toh(f->header->tail_object_offset);
509         if (p == 0)
510                 p = le64toh(f->header->header_size);
511         else {
512                 r = journal_file_move_to_object(f, -1, p, &tail);
513                 if (r < 0)
514                         return r;
515
516                 p += ALIGN64(le64toh(tail->object.size));
517         }
518
519         r = journal_file_allocate(f, p, size);
520         if (r < 0)
521                 return r;
522
523         r = journal_file_move_to(f, type, false, p, size, &t);
524         if (r < 0)
525                 return r;
526
527         o = (Object*) t;
528
529         zero(o->object);
530         o->object.type = type;
531         o->object.size = htole64(size);
532
533         f->header->tail_object_offset = htole64(p);
534         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
535
536         *ret = o;
537         *offset = p;
538
539         return 0;
540 }
541
542 static int journal_file_setup_data_hash_table(JournalFile *f) {
543         uint64_t s, p;
544         Object *o;
545         int r;
546
547         assert(f);
548
549         /* We estimate that we need 1 hash table entry per 768 of
550            journal file and we want to make sure we never get beyond
551            75% fill level. Calculate the hash table size for the
552            maximum file size based on these metrics. */
553
554         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
555         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
556                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
557
558         log_debug("Reserving %"PRIu64" entries in hash table.", s / sizeof(HashItem));
559
560         r = journal_file_append_object(f,
561                                        OBJECT_DATA_HASH_TABLE,
562                                        offsetof(Object, hash_table.items) + s,
563                                        &o, &p);
564         if (r < 0)
565                 return r;
566
567         memzero(o->hash_table.items, s);
568
569         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
570         f->header->data_hash_table_size = htole64(s);
571
572         return 0;
573 }
574
575 static int journal_file_setup_field_hash_table(JournalFile *f) {
576         uint64_t s, p;
577         Object *o;
578         int r;
579
580         assert(f);
581
582         /* We use a fixed size hash table for the fields as this
583          * number should grow very slowly only */
584
585         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
586         r = journal_file_append_object(f,
587                                        OBJECT_FIELD_HASH_TABLE,
588                                        offsetof(Object, hash_table.items) + s,
589                                        &o, &p);
590         if (r < 0)
591                 return r;
592
593         memzero(o->hash_table.items, s);
594
595         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
596         f->header->field_hash_table_size = htole64(s);
597
598         return 0;
599 }
600
601 static int journal_file_map_data_hash_table(JournalFile *f) {
602         uint64_t s, p;
603         void *t;
604         int r;
605
606         assert(f);
607
608         p = le64toh(f->header->data_hash_table_offset);
609         s = le64toh(f->header->data_hash_table_size);
610
611         r = journal_file_move_to(f,
612                                  OBJECT_DATA_HASH_TABLE,
613                                  true,
614                                  p, s,
615                                  &t);
616         if (r < 0)
617                 return r;
618
619         f->data_hash_table = t;
620         return 0;
621 }
622
623 static int journal_file_map_field_hash_table(JournalFile *f) {
624         uint64_t s, p;
625         void *t;
626         int r;
627
628         assert(f);
629
630         p = le64toh(f->header->field_hash_table_offset);
631         s = le64toh(f->header->field_hash_table_size);
632
633         r = journal_file_move_to(f,
634                                  OBJECT_FIELD_HASH_TABLE,
635                                  true,
636                                  p, s,
637                                  &t);
638         if (r < 0)
639                 return r;
640
641         f->field_hash_table = t;
642         return 0;
643 }
644
645 static int journal_file_link_field(
646                 JournalFile *f,
647                 Object *o,
648                 uint64_t offset,
649                 uint64_t hash) {
650
651         uint64_t p, h;
652         int r;
653
654         assert(f);
655         assert(o);
656         assert(offset > 0);
657
658         if (o->object.type != OBJECT_FIELD)
659                 return -EINVAL;
660
661         /* This might alter the window we are looking at */
662
663         o->field.next_hash_offset = o->field.head_data_offset = 0;
664
665         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
666         p = le64toh(f->field_hash_table[h].tail_hash_offset);
667         if (p == 0)
668                 f->field_hash_table[h].head_hash_offset = htole64(offset);
669         else {
670                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
671                 if (r < 0)
672                         return r;
673
674                 o->field.next_hash_offset = htole64(offset);
675         }
676
677         f->field_hash_table[h].tail_hash_offset = htole64(offset);
678
679         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
680                 f->header->n_fields = htole64(le64toh(f->header->n_fields) + 1);
681
682         return 0;
683 }
684
685 static int journal_file_link_data(
686                 JournalFile *f,
687                 Object *o,
688                 uint64_t offset,
689                 uint64_t hash) {
690
691         uint64_t p, h;
692         int r;
693
694         assert(f);
695         assert(o);
696         assert(offset > 0);
697
698         if (o->object.type != OBJECT_DATA)
699                 return -EINVAL;
700
701         /* This might alter the window we are looking at */
702
703         o->data.next_hash_offset = o->data.next_field_offset = 0;
704         o->data.entry_offset = o->data.entry_array_offset = 0;
705         o->data.n_entries = 0;
706
707         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
708         p = le64toh(f->data_hash_table[h].tail_hash_offset);
709         if (p == 0)
710                 /* Only entry in the hash table is easy */
711                 f->data_hash_table[h].head_hash_offset = htole64(offset);
712         else {
713                 /* Move back to the previous data object, to patch in
714                  * pointer */
715
716                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
717                 if (r < 0)
718                         return r;
719
720                 o->data.next_hash_offset = htole64(offset);
721         }
722
723         f->data_hash_table[h].tail_hash_offset = htole64(offset);
724
725         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
726                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
727
728         return 0;
729 }
730
731 int journal_file_find_field_object_with_hash(
732                 JournalFile *f,
733                 const void *field, uint64_t size, uint64_t hash,
734                 Object **ret, uint64_t *offset) {
735
736         uint64_t p, osize, h;
737         int r;
738
739         assert(f);
740         assert(field && size > 0);
741
742         osize = offsetof(Object, field.payload) + size;
743
744         if (f->header->field_hash_table_size == 0)
745                 return -EBADMSG;
746
747         h = hash % (le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
748         p = le64toh(f->field_hash_table[h].head_hash_offset);
749
750         while (p > 0) {
751                 Object *o;
752
753                 r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
754                 if (r < 0)
755                         return r;
756
757                 if (le64toh(o->field.hash) == hash &&
758                     le64toh(o->object.size) == osize &&
759                     memcmp(o->field.payload, field, size) == 0) {
760
761                         if (ret)
762                                 *ret = o;
763                         if (offset)
764                                 *offset = p;
765
766                         return 1;
767                 }
768
769                 p = le64toh(o->field.next_hash_offset);
770         }
771
772         return 0;
773 }
774
775 int journal_file_find_field_object(
776                 JournalFile *f,
777                 const void *field, uint64_t size,
778                 Object **ret, uint64_t *offset) {
779
780         uint64_t hash;
781
782         assert(f);
783         assert(field && size > 0);
784
785         hash = hash64(field, size);
786
787         return journal_file_find_field_object_with_hash(f,
788                                                         field, size, hash,
789                                                         ret, offset);
790 }
791
792 int journal_file_find_data_object_with_hash(
793                 JournalFile *f,
794                 const void *data, uint64_t size, uint64_t hash,
795                 Object **ret, uint64_t *offset) {
796
797         uint64_t p, osize, h;
798         int r;
799
800         assert(f);
801         assert(data || size == 0);
802
803         osize = offsetof(Object, data.payload) + size;
804
805         if (f->header->data_hash_table_size == 0)
806                 return -EBADMSG;
807
808         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
809         p = le64toh(f->data_hash_table[h].head_hash_offset);
810
811         while (p > 0) {
812                 Object *o;
813
814                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
815                 if (r < 0)
816                         return r;
817
818                 if (le64toh(o->data.hash) != hash)
819                         goto next;
820
821                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
822 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
823                         uint64_t l, rsize;
824
825                         l = le64toh(o->object.size);
826                         if (l <= offsetof(Object, data.payload))
827                                 return -EBADMSG;
828
829                         l -= offsetof(Object, data.payload);
830
831                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
832                                             o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize, 0);
833                         if (r < 0)
834                                 return r;
835
836                         if (rsize == size &&
837                             memcmp(f->compress_buffer, data, size) == 0) {
838
839                                 if (ret)
840                                         *ret = o;
841
842                                 if (offset)
843                                         *offset = p;
844
845                                 return 1;
846                         }
847 #else
848                         return -EPROTONOSUPPORT;
849 #endif
850                 } else if (le64toh(o->object.size) == osize &&
851                            memcmp(o->data.payload, data, size) == 0) {
852
853                         if (ret)
854                                 *ret = o;
855
856                         if (offset)
857                                 *offset = p;
858
859                         return 1;
860                 }
861
862         next:
863                 p = le64toh(o->data.next_hash_offset);
864         }
865
866         return 0;
867 }
868
869 int journal_file_find_data_object(
870                 JournalFile *f,
871                 const void *data, uint64_t size,
872                 Object **ret, uint64_t *offset) {
873
874         uint64_t hash;
875
876         assert(f);
877         assert(data || size == 0);
878
879         hash = hash64(data, size);
880
881         return journal_file_find_data_object_with_hash(f,
882                                                        data, size, hash,
883                                                        ret, offset);
884 }
885
886 static int journal_file_append_field(
887                 JournalFile *f,
888                 const void *field, uint64_t size,
889                 Object **ret, uint64_t *offset) {
890
891         uint64_t hash, p;
892         uint64_t osize;
893         Object *o;
894         int r;
895
896         assert(f);
897         assert(field && size > 0);
898
899         hash = hash64(field, size);
900
901         r = journal_file_find_field_object_with_hash(f, field, size, hash, &o, &p);
902         if (r < 0)
903                 return r;
904         else if (r > 0) {
905
906                 if (ret)
907                         *ret = o;
908
909                 if (offset)
910                         *offset = p;
911
912                 return 0;
913         }
914
915         osize = offsetof(Object, field.payload) + size;
916         r = journal_file_append_object(f, OBJECT_FIELD, osize, &o, &p);
917         if (r < 0)
918                 return r;
919
920         o->field.hash = htole64(hash);
921         memcpy(o->field.payload, field, size);
922
923         r = journal_file_link_field(f, o, p, hash);
924         if (r < 0)
925                 return r;
926
927         /* The linking might have altered the window, so let's
928          * refresh our pointer */
929         r = journal_file_move_to_object(f, OBJECT_FIELD, p, &o);
930         if (r < 0)
931                 return r;
932
933 #ifdef HAVE_GCRYPT
934         r = journal_file_hmac_put_object(f, OBJECT_FIELD, o, p);
935         if (r < 0)
936                 return r;
937 #endif
938
939         if (ret)
940                 *ret = o;
941
942         if (offset)
943                 *offset = p;
944
945         return 0;
946 }
947
948 static int journal_file_append_data(
949                 JournalFile *f,
950                 const void *data, uint64_t size,
951                 Object **ret, uint64_t *offset) {
952
953         uint64_t hash, p;
954         uint64_t osize;
955         Object *o;
956         int r, compression = 0;
957         const void *eq;
958
959         assert(f);
960         assert(data || size == 0);
961
962         hash = hash64(data, size);
963
964         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
965         if (r < 0)
966                 return r;
967         else if (r > 0) {
968
969                 if (ret)
970                         *ret = o;
971
972                 if (offset)
973                         *offset = p;
974
975                 return 0;
976         }
977
978         osize = offsetof(Object, data.payload) + size;
979         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
980         if (r < 0)
981                 return r;
982
983         o->data.hash = htole64(hash);
984
985 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
986         if (f->compress_xz &&
987             size >= COMPRESSION_SIZE_THRESHOLD) {
988                 uint64_t rsize;
989
990                 compression = compress_blob(data, size, o->data.payload, &rsize);
991
992                 if (compression) {
993                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
994                         o->object.flags |= compression;
995
996                         log_debug("Compressed data object %"PRIu64" -> %"PRIu64" using %s",
997                                   size, rsize, object_compressed_to_string(compression));
998                 }
999         }
1000 #endif
1001
1002         if (!compression && size > 0)
1003                 memcpy(o->data.payload, data, size);
1004
1005         r = journal_file_link_data(f, o, p, hash);
1006         if (r < 0)
1007                 return r;
1008
1009         /* The linking might have altered the window, so let's
1010          * refresh our pointer */
1011         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1012         if (r < 0)
1013                 return r;
1014
1015         if (!data)
1016                 eq = NULL;
1017         else
1018                 eq = memchr(data, '=', size);
1019         if (eq && eq > data) {
1020                 Object *fo = NULL;
1021                 uint64_t fp;
1022
1023                 /* Create field object ... */
1024                 r = journal_file_append_field(f, data, (uint8_t*) eq - (uint8_t*) data, &fo, &fp);
1025                 if (r < 0)
1026                         return r;
1027
1028                 /* ... and link it in. */
1029                 o->data.next_field_offset = fo->field.head_data_offset;
1030                 fo->field.head_data_offset = le64toh(p);
1031         }
1032
1033 #ifdef HAVE_GCRYPT
1034         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
1035         if (r < 0)
1036                 return r;
1037 #endif
1038
1039         if (ret)
1040                 *ret = o;
1041
1042         if (offset)
1043                 *offset = p;
1044
1045         return 0;
1046 }
1047
1048 uint64_t journal_file_entry_n_items(Object *o) {
1049         assert(o);
1050
1051         if (o->object.type != OBJECT_ENTRY)
1052                 return 0;
1053
1054         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
1055 }
1056
1057 uint64_t journal_file_entry_array_n_items(Object *o) {
1058         assert(o);
1059
1060         if (o->object.type != OBJECT_ENTRY_ARRAY)
1061                 return 0;
1062
1063         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
1064 }
1065
1066 uint64_t journal_file_hash_table_n_items(Object *o) {
1067         assert(o);
1068
1069         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
1070             o->object.type != OBJECT_FIELD_HASH_TABLE)
1071                 return 0;
1072
1073         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
1074 }
1075
1076 static int link_entry_into_array(JournalFile *f,
1077                                  le64_t *first,
1078                                  le64_t *idx,
1079                                  uint64_t p) {
1080         int r;
1081         uint64_t n = 0, ap = 0, q, i, a, hidx;
1082         Object *o;
1083
1084         assert(f);
1085         assert(first);
1086         assert(idx);
1087         assert(p > 0);
1088
1089         a = le64toh(*first);
1090         i = hidx = le64toh(*idx);
1091         while (a > 0) {
1092
1093                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1094                 if (r < 0)
1095                         return r;
1096
1097                 n = journal_file_entry_array_n_items(o);
1098                 if (i < n) {
1099                         o->entry_array.items[i] = htole64(p);
1100                         *idx = htole64(hidx + 1);
1101                         return 0;
1102                 }
1103
1104                 i -= n;
1105                 ap = a;
1106                 a = le64toh(o->entry_array.next_entry_array_offset);
1107         }
1108
1109         if (hidx > n)
1110                 n = (hidx+1) * 2;
1111         else
1112                 n = n * 2;
1113
1114         if (n < 4)
1115                 n = 4;
1116
1117         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
1118                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
1119                                        &o, &q);
1120         if (r < 0)
1121                 return r;
1122
1123 #ifdef HAVE_GCRYPT
1124         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
1125         if (r < 0)
1126                 return r;
1127 #endif
1128
1129         o->entry_array.items[i] = htole64(p);
1130
1131         if (ap == 0)
1132                 *first = htole64(q);
1133         else {
1134                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
1135                 if (r < 0)
1136                         return r;
1137
1138                 o->entry_array.next_entry_array_offset = htole64(q);
1139         }
1140
1141         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1142                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
1143
1144         *idx = htole64(hidx + 1);
1145
1146         return 0;
1147 }
1148
1149 static int link_entry_into_array_plus_one(JournalFile *f,
1150                                           le64_t *extra,
1151                                           le64_t *first,
1152                                           le64_t *idx,
1153                                           uint64_t p) {
1154
1155         int r;
1156
1157         assert(f);
1158         assert(extra);
1159         assert(first);
1160         assert(idx);
1161         assert(p > 0);
1162
1163         if (*idx == 0)
1164                 *extra = htole64(p);
1165         else {
1166                 le64_t i;
1167
1168                 i = htole64(le64toh(*idx) - 1);
1169                 r = link_entry_into_array(f, first, &i, p);
1170                 if (r < 0)
1171                         return r;
1172         }
1173
1174         *idx = htole64(le64toh(*idx) + 1);
1175         return 0;
1176 }
1177
1178 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
1179         uint64_t p;
1180         int r;
1181         assert(f);
1182         assert(o);
1183         assert(offset > 0);
1184
1185         p = le64toh(o->entry.items[i].object_offset);
1186         if (p == 0)
1187                 return -EINVAL;
1188
1189         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
1190         if (r < 0)
1191                 return r;
1192
1193         return link_entry_into_array_plus_one(f,
1194                                               &o->data.entry_offset,
1195                                               &o->data.entry_array_offset,
1196                                               &o->data.n_entries,
1197                                               offset);
1198 }
1199
1200 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
1201         uint64_t n, i;
1202         int r;
1203
1204         assert(f);
1205         assert(o);
1206         assert(offset > 0);
1207
1208         if (o->object.type != OBJECT_ENTRY)
1209                 return -EINVAL;
1210
1211         __sync_synchronize();
1212
1213         /* Link up the entry itself */
1214         r = link_entry_into_array(f,
1215                                   &f->header->entry_array_offset,
1216                                   &f->header->n_entries,
1217                                   offset);
1218         if (r < 0)
1219                 return r;
1220
1221         /* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
1222
1223         if (f->header->head_entry_realtime == 0)
1224                 f->header->head_entry_realtime = o->entry.realtime;
1225
1226         f->header->tail_entry_realtime = o->entry.realtime;
1227         f->header->tail_entry_monotonic = o->entry.monotonic;
1228
1229         f->tail_entry_monotonic_valid = true;
1230
1231         /* Link up the items */
1232         n = journal_file_entry_n_items(o);
1233         for (i = 0; i < n; i++) {
1234                 r = journal_file_link_entry_item(f, o, offset, i);
1235                 if (r < 0)
1236                         return r;
1237         }
1238
1239         return 0;
1240 }
1241
1242 static int journal_file_append_entry_internal(
1243                 JournalFile *f,
1244                 const dual_timestamp *ts,
1245                 uint64_t xor_hash,
1246                 const EntryItem items[], unsigned n_items,
1247                 uint64_t *seqnum,
1248                 Object **ret, uint64_t *offset) {
1249         uint64_t np;
1250         uint64_t osize;
1251         Object *o;
1252         int r;
1253
1254         assert(f);
1255         assert(items || n_items == 0);
1256         assert(ts);
1257
1258         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1259
1260         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1261         if (r < 0)
1262                 return r;
1263
1264         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1265         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1266         o->entry.realtime = htole64(ts->realtime);
1267         o->entry.monotonic = htole64(ts->monotonic);
1268         o->entry.xor_hash = htole64(xor_hash);
1269         o->entry.boot_id = f->header->boot_id;
1270
1271 #ifdef HAVE_GCRYPT
1272         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1273         if (r < 0)
1274                 return r;
1275 #endif
1276
1277         r = journal_file_link_entry(f, o, np);
1278         if (r < 0)
1279                 return r;
1280
1281         if (ret)
1282                 *ret = o;
1283
1284         if (offset)
1285                 *offset = np;
1286
1287         return 0;
1288 }
1289
1290 void journal_file_post_change(JournalFile *f) {
1291         assert(f);
1292
1293         /* inotify() does not receive IN_MODIFY events from file
1294          * accesses done via mmap(). After each access we hence
1295          * trigger IN_MODIFY by truncating the journal file to its
1296          * current size which triggers IN_MODIFY. */
1297
1298         __sync_synchronize();
1299
1300         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1301                 log_error("Failed to truncate file to its own size: %m");
1302 }
1303
1304 static int entry_item_cmp(const void *_a, const void *_b) {
1305         const EntryItem *a = _a, *b = _b;
1306
1307         if (le64toh(a->object_offset) < le64toh(b->object_offset))
1308                 return -1;
1309         if (le64toh(a->object_offset) > le64toh(b->object_offset))
1310                 return 1;
1311         return 0;
1312 }
1313
1314 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1315         unsigned i;
1316         EntryItem *items;
1317         int r;
1318         uint64_t xor_hash = 0;
1319         struct dual_timestamp _ts;
1320
1321         assert(f);
1322         assert(iovec || n_iovec == 0);
1323
1324         if (!ts) {
1325                 dual_timestamp_get(&_ts);
1326                 ts = &_ts;
1327         }
1328
1329         if (f->tail_entry_monotonic_valid &&
1330             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1331                 return -EINVAL;
1332
1333 #ifdef HAVE_GCRYPT
1334         r = journal_file_maybe_append_tag(f, ts->realtime);
1335         if (r < 0)
1336                 return r;
1337 #endif
1338
1339         /* alloca() can't take 0, hence let's allocate at least one */
1340         items = alloca(sizeof(EntryItem) * MAX(1u, n_iovec));
1341
1342         for (i = 0; i < n_iovec; i++) {
1343                 uint64_t p;
1344                 Object *o;
1345
1346                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1347                 if (r < 0)
1348                         return r;
1349
1350                 xor_hash ^= le64toh(o->data.hash);
1351                 items[i].object_offset = htole64(p);
1352                 items[i].hash = o->data.hash;
1353         }
1354
1355         /* Order by the position on disk, in order to improve seek
1356          * times for rotating media. */
1357         qsort_safe(items, n_iovec, sizeof(EntryItem), entry_item_cmp);
1358
1359         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1360
1361         journal_file_post_change(f);
1362
1363         return r;
1364 }
1365
1366 typedef struct ChainCacheItem {
1367         uint64_t first; /* the array at the beginning of the chain */
1368         uint64_t array; /* the cached array */
1369         uint64_t begin; /* the first item in the cached array */
1370         uint64_t total; /* the total number of items in all arrays before this one in the chain */
1371         uint64_t last_index; /* the last index we looked at, to optimize locality when bisecting */
1372 } ChainCacheItem;
1373
1374 static void chain_cache_put(
1375                 Hashmap *h,
1376                 ChainCacheItem *ci,
1377                 uint64_t first,
1378                 uint64_t array,
1379                 uint64_t begin,
1380                 uint64_t total,
1381                 uint64_t last_index) {
1382
1383         if (!ci) {
1384                 /* If the chain item to cache for this chain is the
1385                  * first one it's not worth caching anything */
1386                 if (array == first)
1387                         return;
1388
1389                 if (hashmap_size(h) >= CHAIN_CACHE_MAX)
1390                         ci = hashmap_steal_first(h);
1391                 else {
1392                         ci = new(ChainCacheItem, 1);
1393                         if (!ci)
1394                                 return;
1395                 }
1396
1397                 ci->first = first;
1398
1399                 if (hashmap_put(h, &ci->first, ci) < 0) {
1400                         free(ci);
1401                         return;
1402                 }
1403         } else
1404                 assert(ci->first == first);
1405
1406         ci->array = array;
1407         ci->begin = begin;
1408         ci->total = total;
1409         ci->last_index = last_index;
1410 }
1411
1412 static int generic_array_get(
1413                 JournalFile *f,
1414                 uint64_t first,
1415                 uint64_t i,
1416                 Object **ret, uint64_t *offset) {
1417
1418         Object *o;
1419         uint64_t p = 0, a, t = 0;
1420         int r;
1421         ChainCacheItem *ci;
1422
1423         assert(f);
1424
1425         a = first;
1426
1427         /* Try the chain cache first */
1428         ci = hashmap_get(f->chain_cache, &first);
1429         if (ci && i > ci->total) {
1430                 a = ci->array;
1431                 i -= ci->total;
1432                 t = ci->total;
1433         }
1434
1435         while (a > 0) {
1436                 uint64_t k;
1437
1438                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1439                 if (r < 0)
1440                         return r;
1441
1442                 k = journal_file_entry_array_n_items(o);
1443                 if (i < k) {
1444                         p = le64toh(o->entry_array.items[i]);
1445                         goto found;
1446                 }
1447
1448                 i -= k;
1449                 t += k;
1450                 a = le64toh(o->entry_array.next_entry_array_offset);
1451         }
1452
1453         return 0;
1454
1455 found:
1456         /* Let's cache this item for the next invocation */
1457         chain_cache_put(f->chain_cache, ci, first, a, le64toh(o->entry_array.items[0]), t, i);
1458
1459         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1460         if (r < 0)
1461                 return r;
1462
1463         if (ret)
1464                 *ret = o;
1465
1466         if (offset)
1467                 *offset = p;
1468
1469         return 1;
1470 }
1471
1472 static int generic_array_get_plus_one(
1473                 JournalFile *f,
1474                 uint64_t extra,
1475                 uint64_t first,
1476                 uint64_t i,
1477                 Object **ret, uint64_t *offset) {
1478
1479         Object *o;
1480
1481         assert(f);
1482
1483         if (i == 0) {
1484                 int r;
1485
1486                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1487                 if (r < 0)
1488                         return r;
1489
1490                 if (ret)
1491                         *ret = o;
1492
1493                 if (offset)
1494                         *offset = extra;
1495
1496                 return 1;
1497         }
1498
1499         return generic_array_get(f, first, i-1, ret, offset);
1500 }
1501
1502 enum {
1503         TEST_FOUND,
1504         TEST_LEFT,
1505         TEST_RIGHT
1506 };
1507
1508 static int generic_array_bisect(
1509                 JournalFile *f,
1510                 uint64_t first,
1511                 uint64_t n,
1512                 uint64_t needle,
1513                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1514                 direction_t direction,
1515                 Object **ret,
1516                 uint64_t *offset,
1517                 uint64_t *idx) {
1518
1519         uint64_t a, p, t = 0, i = 0, last_p = 0, last_index = (uint64_t) -1;
1520         bool subtract_one = false;
1521         Object *o, *array = NULL;
1522         int r;
1523         ChainCacheItem *ci;
1524
1525         assert(f);
1526         assert(test_object);
1527
1528         /* Start with the first array in the chain */
1529         a = first;
1530
1531         ci = hashmap_get(f->chain_cache, &first);
1532         if (ci && n > ci->total) {
1533                 /* Ah, we have iterated this bisection array chain
1534                  * previously! Let's see if we can skip ahead in the
1535                  * chain, as far as the last time. But we can't jump
1536                  * backwards in the chain, so let's check that
1537                  * first. */
1538
1539                 r = test_object(f, ci->begin, needle);
1540                 if (r < 0)
1541                         return r;
1542
1543                 if (r == TEST_LEFT) {
1544                         /* OK, what we are looking for is right of the
1545                          * begin of this EntryArray, so let's jump
1546                          * straight to previously cached array in the
1547                          * chain */
1548
1549                         a = ci->array;
1550                         n -= ci->total;
1551                         t = ci->total;
1552                         last_index = ci->last_index;
1553                 }
1554         }
1555
1556         while (a > 0) {
1557                 uint64_t left, right, k, lp;
1558
1559                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1560                 if (r < 0)
1561                         return r;
1562
1563                 k = journal_file_entry_array_n_items(array);
1564                 right = MIN(k, n);
1565                 if (right <= 0)
1566                         return 0;
1567
1568                 i = right - 1;
1569                 lp = p = le64toh(array->entry_array.items[i]);
1570                 if (p <= 0)
1571                         return -EBADMSG;
1572
1573                 r = test_object(f, p, needle);
1574                 if (r < 0)
1575                         return r;
1576
1577                 if (r == TEST_FOUND)
1578                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1579
1580                 if (r == TEST_RIGHT) {
1581                         left = 0;
1582                         right -= 1;
1583
1584                         if (last_index != (uint64_t) -1) {
1585                                 assert(last_index <= right);
1586
1587                                 /* If we cached the last index we
1588                                  * looked at, let's try to not to jump
1589                                  * too wildly around and see if we can
1590                                  * limit the range to look at early to
1591                                  * the immediate neighbors of the last
1592                                  * index we looked at. */
1593
1594                                 if (last_index > 0) {
1595                                         uint64_t x = last_index - 1;
1596
1597                                         p = le64toh(array->entry_array.items[x]);
1598                                         if (p <= 0)
1599                                                 return -EBADMSG;
1600
1601                                         r = test_object(f, p, needle);
1602                                         if (r < 0)
1603                                                 return r;
1604
1605                                         if (r == TEST_FOUND)
1606                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1607
1608                                         if (r == TEST_RIGHT)
1609                                                 right = x;
1610                                         else
1611                                                 left = x + 1;
1612                                 }
1613
1614                                 if (last_index < right) {
1615                                         uint64_t y = last_index + 1;
1616
1617                                         p = le64toh(array->entry_array.items[y]);
1618                                         if (p <= 0)
1619                                                 return -EBADMSG;
1620
1621                                         r = test_object(f, p, needle);
1622                                         if (r < 0)
1623                                                 return r;
1624
1625                                         if (r == TEST_FOUND)
1626                                                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1627
1628                                         if (r == TEST_RIGHT)
1629                                                 right = y;
1630                                         else
1631                                                 left = y + 1;
1632                                 }
1633                         }
1634
1635                         for (;;) {
1636                                 if (left == right) {
1637                                         if (direction == DIRECTION_UP)
1638                                                 subtract_one = true;
1639
1640                                         i = left;
1641                                         goto found;
1642                                 }
1643
1644                                 assert(left < right);
1645                                 i = (left + right) / 2;
1646
1647                                 p = le64toh(array->entry_array.items[i]);
1648                                 if (p <= 0)
1649                                         return -EBADMSG;
1650
1651                                 r = test_object(f, p, needle);
1652                                 if (r < 0)
1653                                         return r;
1654
1655                                 if (r == TEST_FOUND)
1656                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1657
1658                                 if (r == TEST_RIGHT)
1659                                         right = i;
1660                                 else
1661                                         left = i + 1;
1662                         }
1663                 }
1664
1665                 if (k > n) {
1666                         if (direction == DIRECTION_UP) {
1667                                 i = n;
1668                                 subtract_one = true;
1669                                 goto found;
1670                         }
1671
1672                         return 0;
1673                 }
1674
1675                 last_p = lp;
1676
1677                 n -= k;
1678                 t += k;
1679                 last_index = (uint64_t) -1;
1680                 a = le64toh(array->entry_array.next_entry_array_offset);
1681         }
1682
1683         return 0;
1684
1685 found:
1686         if (subtract_one && t == 0 && i == 0)
1687                 return 0;
1688
1689         /* Let's cache this item for the next invocation */
1690         chain_cache_put(f->chain_cache, ci, first, a, le64toh(array->entry_array.items[0]), t, subtract_one ? (i > 0 ? i-1 : (uint64_t) -1) : i);
1691
1692         if (subtract_one && i == 0)
1693                 p = last_p;
1694         else if (subtract_one)
1695                 p = le64toh(array->entry_array.items[i-1]);
1696         else
1697                 p = le64toh(array->entry_array.items[i]);
1698
1699         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1700         if (r < 0)
1701                 return r;
1702
1703         if (ret)
1704                 *ret = o;
1705
1706         if (offset)
1707                 *offset = p;
1708
1709         if (idx)
1710                 *idx = t + i + (subtract_one ? -1 : 0);
1711
1712         return 1;
1713 }
1714
1715
1716 static int generic_array_bisect_plus_one(
1717                 JournalFile *f,
1718                 uint64_t extra,
1719                 uint64_t first,
1720                 uint64_t n,
1721                 uint64_t needle,
1722                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1723                 direction_t direction,
1724                 Object **ret,
1725                 uint64_t *offset,
1726                 uint64_t *idx) {
1727
1728         int r;
1729         bool step_back = false;
1730         Object *o;
1731
1732         assert(f);
1733         assert(test_object);
1734
1735         if (n <= 0)
1736                 return 0;
1737
1738         /* This bisects the array in object 'first', but first checks
1739          * an extra  */
1740         r = test_object(f, extra, needle);
1741         if (r < 0)
1742                 return r;
1743
1744         if (r == TEST_FOUND)
1745                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1746
1747         /* if we are looking with DIRECTION_UP then we need to first
1748            see if in the actual array there is a matching entry, and
1749            return the last one of that. But if there isn't any we need
1750            to return this one. Hence remember this, and return it
1751            below. */
1752         if (r == TEST_LEFT)
1753                 step_back = direction == DIRECTION_UP;
1754
1755         if (r == TEST_RIGHT) {
1756                 if (direction == DIRECTION_DOWN)
1757                         goto found;
1758                 else
1759                         return 0;
1760         }
1761
1762         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1763
1764         if (r == 0 && step_back)
1765                 goto found;
1766
1767         if (r > 0 && idx)
1768                 (*idx) ++;
1769
1770         return r;
1771
1772 found:
1773         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1774         if (r < 0)
1775                 return r;
1776
1777         if (ret)
1778                 *ret = o;
1779
1780         if (offset)
1781                 *offset = extra;
1782
1783         if (idx)
1784                 *idx = 0;
1785
1786         return 1;
1787 }
1788
1789 _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1790         assert(f);
1791         assert(p > 0);
1792
1793         if (p == needle)
1794                 return TEST_FOUND;
1795         else if (p < needle)
1796                 return TEST_LEFT;
1797         else
1798                 return TEST_RIGHT;
1799 }
1800
1801 int journal_file_move_to_entry_by_offset(
1802                 JournalFile *f,
1803                 uint64_t p,
1804                 direction_t direction,
1805                 Object **ret,
1806                 uint64_t *offset) {
1807
1808         return generic_array_bisect(f,
1809                                     le64toh(f->header->entry_array_offset),
1810                                     le64toh(f->header->n_entries),
1811                                     p,
1812                                     test_object_offset,
1813                                     direction,
1814                                     ret, offset, NULL);
1815 }
1816
1817
1818 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1819         Object *o;
1820         int r;
1821
1822         assert(f);
1823         assert(p > 0);
1824
1825         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1826         if (r < 0)
1827                 return r;
1828
1829         if (le64toh(o->entry.seqnum) == needle)
1830                 return TEST_FOUND;
1831         else if (le64toh(o->entry.seqnum) < needle)
1832                 return TEST_LEFT;
1833         else
1834                 return TEST_RIGHT;
1835 }
1836
1837 int journal_file_move_to_entry_by_seqnum(
1838                 JournalFile *f,
1839                 uint64_t seqnum,
1840                 direction_t direction,
1841                 Object **ret,
1842                 uint64_t *offset) {
1843
1844         return generic_array_bisect(f,
1845                                     le64toh(f->header->entry_array_offset),
1846                                     le64toh(f->header->n_entries),
1847                                     seqnum,
1848                                     test_object_seqnum,
1849                                     direction,
1850                                     ret, offset, NULL);
1851 }
1852
1853 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1854         Object *o;
1855         int r;
1856
1857         assert(f);
1858         assert(p > 0);
1859
1860         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1861         if (r < 0)
1862                 return r;
1863
1864         if (le64toh(o->entry.realtime) == needle)
1865                 return TEST_FOUND;
1866         else if (le64toh(o->entry.realtime) < needle)
1867                 return TEST_LEFT;
1868         else
1869                 return TEST_RIGHT;
1870 }
1871
1872 int journal_file_move_to_entry_by_realtime(
1873                 JournalFile *f,
1874                 uint64_t realtime,
1875                 direction_t direction,
1876                 Object **ret,
1877                 uint64_t *offset) {
1878
1879         return generic_array_bisect(f,
1880                                     le64toh(f->header->entry_array_offset),
1881                                     le64toh(f->header->n_entries),
1882                                     realtime,
1883                                     test_object_realtime,
1884                                     direction,
1885                                     ret, offset, NULL);
1886 }
1887
1888 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1889         Object *o;
1890         int r;
1891
1892         assert(f);
1893         assert(p > 0);
1894
1895         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1896         if (r < 0)
1897                 return r;
1898
1899         if (le64toh(o->entry.monotonic) == needle)
1900                 return TEST_FOUND;
1901         else if (le64toh(o->entry.monotonic) < needle)
1902                 return TEST_LEFT;
1903         else
1904                 return TEST_RIGHT;
1905 }
1906
1907 static inline int find_data_object_by_boot_id(
1908                 JournalFile *f,
1909                 sd_id128_t boot_id,
1910                 Object **o,
1911                 uint64_t *b) {
1912         char t[sizeof("_BOOT_ID=")-1 + 32 + 1] = "_BOOT_ID=";
1913
1914         sd_id128_to_string(boot_id, t + 9);
1915         return journal_file_find_data_object(f, t, sizeof(t) - 1, o, b);
1916 }
1917
1918 int journal_file_move_to_entry_by_monotonic(
1919                 JournalFile *f,
1920                 sd_id128_t boot_id,
1921                 uint64_t monotonic,
1922                 direction_t direction,
1923                 Object **ret,
1924                 uint64_t *offset) {
1925
1926         Object *o;
1927         int r;
1928
1929         assert(f);
1930
1931         r = find_data_object_by_boot_id(f, boot_id, &o, NULL);
1932         if (r < 0)
1933                 return r;
1934         if (r == 0)
1935                 return -ENOENT;
1936
1937         return generic_array_bisect_plus_one(f,
1938                                              le64toh(o->data.entry_offset),
1939                                              le64toh(o->data.entry_array_offset),
1940                                              le64toh(o->data.n_entries),
1941                                              monotonic,
1942                                              test_object_monotonic,
1943                                              direction,
1944                                              ret, offset, NULL);
1945 }
1946
1947 int journal_file_next_entry(
1948                 JournalFile *f,
1949                 Object *o, uint64_t p,
1950                 direction_t direction,
1951                 Object **ret, uint64_t *offset) {
1952
1953         uint64_t i, n, ofs;
1954         int r;
1955
1956         assert(f);
1957         assert(p > 0 || !o);
1958
1959         n = le64toh(f->header->n_entries);
1960         if (n <= 0)
1961                 return 0;
1962
1963         if (!o)
1964                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1965         else {
1966                 if (o->object.type != OBJECT_ENTRY)
1967                         return -EINVAL;
1968
1969                 r = generic_array_bisect(f,
1970                                          le64toh(f->header->entry_array_offset),
1971                                          le64toh(f->header->n_entries),
1972                                          p,
1973                                          test_object_offset,
1974                                          DIRECTION_DOWN,
1975                                          NULL, NULL,
1976                                          &i);
1977                 if (r <= 0)
1978                         return r;
1979
1980                 if (direction == DIRECTION_DOWN) {
1981                         if (i >= n - 1)
1982                                 return 0;
1983
1984                         i++;
1985                 } else {
1986                         if (i <= 0)
1987                                 return 0;
1988
1989                         i--;
1990                 }
1991         }
1992
1993         /* And jump to it */
1994         r = generic_array_get(f,
1995                               le64toh(f->header->entry_array_offset),
1996                               i,
1997                               ret, &ofs);
1998         if (r <= 0)
1999                 return r;
2000
2001         if (p > 0 &&
2002             (direction == DIRECTION_DOWN ? ofs <= p : ofs >= p)) {
2003                 log_debug("%s: entry array corrupted at entry %"PRIu64,
2004                           f->path, i);
2005                 return -EBADMSG;
2006         }
2007
2008         if (offset)
2009                 *offset = ofs;
2010
2011         return 1;
2012 }
2013
2014 int journal_file_skip_entry(
2015                 JournalFile *f,
2016                 Object *o, uint64_t p,
2017                 int64_t skip,
2018                 Object **ret, uint64_t *offset) {
2019
2020         uint64_t i, n;
2021         int r;
2022
2023         assert(f);
2024         assert(o);
2025         assert(p > 0);
2026
2027         if (o->object.type != OBJECT_ENTRY)
2028                 return -EINVAL;
2029
2030         r = generic_array_bisect(f,
2031                                  le64toh(f->header->entry_array_offset),
2032                                  le64toh(f->header->n_entries),
2033                                  p,
2034                                  test_object_offset,
2035                                  DIRECTION_DOWN,
2036                                  NULL, NULL,
2037                                  &i);
2038         if (r <= 0)
2039                 return r;
2040
2041         /* Calculate new index */
2042         if (skip < 0) {
2043                 if ((uint64_t) -skip >= i)
2044                         i = 0;
2045                 else
2046                         i = i - (uint64_t) -skip;
2047         } else
2048                 i  += (uint64_t) skip;
2049
2050         n = le64toh(f->header->n_entries);
2051         if (n <= 0)
2052                 return -EBADMSG;
2053
2054         if (i >= n)
2055                 i = n-1;
2056
2057         return generic_array_get(f,
2058                                  le64toh(f->header->entry_array_offset),
2059                                  i,
2060                                  ret, offset);
2061 }
2062
2063 int journal_file_next_entry_for_data(
2064                 JournalFile *f,
2065                 Object *o, uint64_t p,
2066                 uint64_t data_offset,
2067                 direction_t direction,
2068                 Object **ret, uint64_t *offset) {
2069
2070         uint64_t n, i;
2071         int r;
2072         Object *d;
2073
2074         assert(f);
2075         assert(p > 0 || !o);
2076
2077         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2078         if (r < 0)
2079                 return r;
2080
2081         n = le64toh(d->data.n_entries);
2082         if (n <= 0)
2083                 return n;
2084
2085         if (!o)
2086                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
2087         else {
2088                 if (o->object.type != OBJECT_ENTRY)
2089                         return -EINVAL;
2090
2091                 r = generic_array_bisect_plus_one(f,
2092                                                   le64toh(d->data.entry_offset),
2093                                                   le64toh(d->data.entry_array_offset),
2094                                                   le64toh(d->data.n_entries),
2095                                                   p,
2096                                                   test_object_offset,
2097                                                   DIRECTION_DOWN,
2098                                                   NULL, NULL,
2099                                                   &i);
2100
2101                 if (r <= 0)
2102                         return r;
2103
2104                 if (direction == DIRECTION_DOWN) {
2105                         if (i >= n - 1)
2106                                 return 0;
2107
2108                         i++;
2109                 } else {
2110                         if (i <= 0)
2111                                 return 0;
2112
2113                         i--;
2114                 }
2115
2116         }
2117
2118         return generic_array_get_plus_one(f,
2119                                           le64toh(d->data.entry_offset),
2120                                           le64toh(d->data.entry_array_offset),
2121                                           i,
2122                                           ret, offset);
2123 }
2124
2125 int journal_file_move_to_entry_by_offset_for_data(
2126                 JournalFile *f,
2127                 uint64_t data_offset,
2128                 uint64_t p,
2129                 direction_t direction,
2130                 Object **ret, uint64_t *offset) {
2131
2132         int r;
2133         Object *d;
2134
2135         assert(f);
2136
2137         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2138         if (r < 0)
2139                 return r;
2140
2141         return generic_array_bisect_plus_one(f,
2142                                              le64toh(d->data.entry_offset),
2143                                              le64toh(d->data.entry_array_offset),
2144                                              le64toh(d->data.n_entries),
2145                                              p,
2146                                              test_object_offset,
2147                                              direction,
2148                                              ret, offset, NULL);
2149 }
2150
2151 int journal_file_move_to_entry_by_monotonic_for_data(
2152                 JournalFile *f,
2153                 uint64_t data_offset,
2154                 sd_id128_t boot_id,
2155                 uint64_t monotonic,
2156                 direction_t direction,
2157                 Object **ret, uint64_t *offset) {
2158
2159         Object *o, *d;
2160         int r;
2161         uint64_t b, z;
2162
2163         assert(f);
2164
2165         /* First, seek by time */
2166         r = find_data_object_by_boot_id(f, boot_id, &o, &b);
2167         if (r < 0)
2168                 return r;
2169         if (r == 0)
2170                 return -ENOENT;
2171
2172         r = generic_array_bisect_plus_one(f,
2173                                           le64toh(o->data.entry_offset),
2174                                           le64toh(o->data.entry_array_offset),
2175                                           le64toh(o->data.n_entries),
2176                                           monotonic,
2177                                           test_object_monotonic,
2178                                           direction,
2179                                           NULL, &z, NULL);
2180         if (r <= 0)
2181                 return r;
2182
2183         /* And now, continue seeking until we find an entry that
2184          * exists in both bisection arrays */
2185
2186         for (;;) {
2187                 Object *qo;
2188                 uint64_t p, q;
2189
2190                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2191                 if (r < 0)
2192                         return r;
2193
2194                 r = generic_array_bisect_plus_one(f,
2195                                                   le64toh(d->data.entry_offset),
2196                                                   le64toh(d->data.entry_array_offset),
2197                                                   le64toh(d->data.n_entries),
2198                                                   z,
2199                                                   test_object_offset,
2200                                                   direction,
2201                                                   NULL, &p, NULL);
2202                 if (r <= 0)
2203                         return r;
2204
2205                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
2206                 if (r < 0)
2207                         return r;
2208
2209                 r = generic_array_bisect_plus_one(f,
2210                                                   le64toh(o->data.entry_offset),
2211                                                   le64toh(o->data.entry_array_offset),
2212                                                   le64toh(o->data.n_entries),
2213                                                   p,
2214                                                   test_object_offset,
2215                                                   direction,
2216                                                   &qo, &q, NULL);
2217
2218                 if (r <= 0)
2219                         return r;
2220
2221                 if (p == q) {
2222                         if (ret)
2223                                 *ret = qo;
2224                         if (offset)
2225                                 *offset = q;
2226
2227                         return 1;
2228                 }
2229
2230                 z = q;
2231         }
2232 }
2233
2234 int journal_file_move_to_entry_by_seqnum_for_data(
2235                 JournalFile *f,
2236                 uint64_t data_offset,
2237                 uint64_t seqnum,
2238                 direction_t direction,
2239                 Object **ret, uint64_t *offset) {
2240
2241         Object *d;
2242         int r;
2243
2244         assert(f);
2245
2246         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2247         if (r < 0)
2248                 return r;
2249
2250         return generic_array_bisect_plus_one(f,
2251                                              le64toh(d->data.entry_offset),
2252                                              le64toh(d->data.entry_array_offset),
2253                                              le64toh(d->data.n_entries),
2254                                              seqnum,
2255                                              test_object_seqnum,
2256                                              direction,
2257                                              ret, offset, NULL);
2258 }
2259
2260 int journal_file_move_to_entry_by_realtime_for_data(
2261                 JournalFile *f,
2262                 uint64_t data_offset,
2263                 uint64_t realtime,
2264                 direction_t direction,
2265                 Object **ret, uint64_t *offset) {
2266
2267         Object *d;
2268         int r;
2269
2270         assert(f);
2271
2272         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
2273         if (r < 0)
2274                 return r;
2275
2276         return generic_array_bisect_plus_one(f,
2277                                              le64toh(d->data.entry_offset),
2278                                              le64toh(d->data.entry_array_offset),
2279                                              le64toh(d->data.n_entries),
2280                                              realtime,
2281                                              test_object_realtime,
2282                                              direction,
2283                                              ret, offset, NULL);
2284 }
2285
2286 void journal_file_dump(JournalFile *f) {
2287         Object *o;
2288         int r;
2289         uint64_t p;
2290
2291         assert(f);
2292
2293         journal_file_print_header(f);
2294
2295         p = le64toh(f->header->header_size);
2296         while (p != 0) {
2297                 r = journal_file_move_to_object(f, -1, p, &o);
2298                 if (r < 0)
2299                         goto fail;
2300
2301                 switch (o->object.type) {
2302
2303                 case OBJECT_UNUSED:
2304                         printf("Type: OBJECT_UNUSED\n");
2305                         break;
2306
2307                 case OBJECT_DATA:
2308                         printf("Type: OBJECT_DATA\n");
2309                         break;
2310
2311                 case OBJECT_FIELD:
2312                         printf("Type: OBJECT_FIELD\n");
2313                         break;
2314
2315                 case OBJECT_ENTRY:
2316                         printf("Type: OBJECT_ENTRY seqnum=%"PRIu64" monotonic=%"PRIu64" realtime=%"PRIu64"\n",
2317                                le64toh(o->entry.seqnum),
2318                                le64toh(o->entry.monotonic),
2319                                le64toh(o->entry.realtime));
2320                         break;
2321
2322                 case OBJECT_FIELD_HASH_TABLE:
2323                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2324                         break;
2325
2326                 case OBJECT_DATA_HASH_TABLE:
2327                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2328                         break;
2329
2330                 case OBJECT_ENTRY_ARRAY:
2331                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2332                         break;
2333
2334                 case OBJECT_TAG:
2335                         printf("Type: OBJECT_TAG seqnum=%"PRIu64" epoch=%"PRIu64"\n",
2336                                le64toh(o->tag.seqnum),
2337                                le64toh(o->tag.epoch));
2338                         break;
2339
2340                 default:
2341                         printf("Type: unknown (%u)\n", o->object.type);
2342                         break;
2343                 }
2344
2345                 if (o->object.flags & OBJECT_COMPRESSION_MASK)
2346                         printf("Flags: %s\n",
2347                                object_compressed_to_string(o->object.flags & OBJECT_COMPRESSION_MASK));
2348
2349                 if (p == le64toh(f->header->tail_object_offset))
2350                         p = 0;
2351                 else
2352                         p = p + ALIGN64(le64toh(o->object.size));
2353         }
2354
2355         return;
2356 fail:
2357         log_error("File corrupt");
2358 }
2359
2360 static const char* format_timestamp_safe(char *buf, size_t l, usec_t t) {
2361         const char *x;
2362
2363         x = format_timestamp(buf, l, t);
2364         if (x)
2365                 return x;
2366         return " --- ";
2367 }
2368
2369 void journal_file_print_header(JournalFile *f) {
2370         char a[33], b[33], c[33], d[33];
2371         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX], z[FORMAT_TIMESTAMP_MAX];
2372         struct stat st;
2373         char bytes[FORMAT_BYTES_MAX];
2374
2375         assert(f);
2376
2377         printf("File Path: %s\n"
2378                "File ID: %s\n"
2379                "Machine ID: %s\n"
2380                "Boot ID: %s\n"
2381                "Sequential Number ID: %s\n"
2382                "State: %s\n"
2383                "Compatible Flags:%s%s\n"
2384                "Incompatible Flags:%s%s%s\n"
2385                "Header size: %"PRIu64"\n"
2386                "Arena size: %"PRIu64"\n"
2387                "Data Hash Table Size: %"PRIu64"\n"
2388                "Field Hash Table Size: %"PRIu64"\n"
2389                "Rotate Suggested: %s\n"
2390                "Head Sequential Number: %"PRIu64"\n"
2391                "Tail Sequential Number: %"PRIu64"\n"
2392                "Head Realtime Timestamp: %s\n"
2393                "Tail Realtime Timestamp: %s\n"
2394                "Tail Monotonic Timestamp: %s\n"
2395                "Objects: %"PRIu64"\n"
2396                "Entry Objects: %"PRIu64"\n",
2397                f->path,
2398                sd_id128_to_string(f->header->file_id, a),
2399                sd_id128_to_string(f->header->machine_id, b),
2400                sd_id128_to_string(f->header->boot_id, c),
2401                sd_id128_to_string(f->header->seqnum_id, d),
2402                f->header->state == STATE_OFFLINE ? "OFFLINE" :
2403                f->header->state == STATE_ONLINE ? "ONLINE" :
2404                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
2405                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
2406                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_ANY) ? " ???" : "",
2407                JOURNAL_HEADER_COMPRESSED_XZ(f->header) ? " COMPRESSED-XZ" : "",
2408                JOURNAL_HEADER_COMPRESSED_LZ4(f->header) ? " COMPRESSED-LZ4" : "",
2409                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_ANY) ? " ???" : "",
2410                le64toh(f->header->header_size),
2411                le64toh(f->header->arena_size),
2412                le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2413                le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2414                yes_no(journal_file_rotate_suggested(f, 0)),
2415                le64toh(f->header->head_entry_seqnum),
2416                le64toh(f->header->tail_entry_seqnum),
2417                format_timestamp_safe(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2418                format_timestamp_safe(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
2419                format_timespan(z, sizeof(z), le64toh(f->header->tail_entry_monotonic), USEC_PER_MSEC),
2420                le64toh(f->header->n_objects),
2421                le64toh(f->header->n_entries));
2422
2423         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2424                 printf("Data Objects: %"PRIu64"\n"
2425                        "Data Hash Table Fill: %.1f%%\n",
2426                        le64toh(f->header->n_data),
2427                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2428
2429         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2430                 printf("Field Objects: %"PRIu64"\n"
2431                        "Field Hash Table Fill: %.1f%%\n",
2432                        le64toh(f->header->n_fields),
2433                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2434
2435         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
2436                 printf("Tag Objects: %"PRIu64"\n",
2437                        le64toh(f->header->n_tags));
2438         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
2439                 printf("Entry Array Objects: %"PRIu64"\n",
2440                        le64toh(f->header->n_entry_arrays));
2441
2442         if (fstat(f->fd, &st) >= 0)
2443                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
2444 }
2445
2446 int journal_file_open(
2447                 const char *fname,
2448                 int flags,
2449                 mode_t mode,
2450                 bool compress,
2451                 bool seal,
2452                 JournalMetrics *metrics,
2453                 MMapCache *mmap_cache,
2454                 JournalFile *template,
2455                 JournalFile **ret) {
2456
2457         JournalFile *f;
2458         int r;
2459         bool newly_created = false;
2460
2461         assert(fname);
2462         assert(ret);
2463
2464         if ((flags & O_ACCMODE) != O_RDONLY &&
2465             (flags & O_ACCMODE) != O_RDWR)
2466                 return -EINVAL;
2467
2468         if (!endswith(fname, ".journal") &&
2469             !endswith(fname, ".journal~"))
2470                 return -EINVAL;
2471
2472         f = new0(JournalFile, 1);
2473         if (!f)
2474                 return -ENOMEM;
2475
2476         f->fd = -1;
2477         f->mode = mode;
2478
2479         f->flags = flags;
2480         f->prot = prot_from_flags(flags);
2481         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2482 #if defined(HAVE_LZ4)
2483         f->compress_lz4 = compress;
2484 #elif defined(HAVE_XZ)
2485         f->compress_xz = compress;
2486 #endif
2487 #ifdef HAVE_GCRYPT
2488         f->seal = seal;
2489 #endif
2490
2491         if (mmap_cache)
2492                 f->mmap = mmap_cache_ref(mmap_cache);
2493         else {
2494                 f->mmap = mmap_cache_new();
2495                 if (!f->mmap) {
2496                         r = -ENOMEM;
2497                         goto fail;
2498                 }
2499         }
2500
2501         f->path = strdup(fname);
2502         if (!f->path) {
2503                 r = -ENOMEM;
2504                 goto fail;
2505         }
2506
2507         f->chain_cache = hashmap_new(uint64_hash_func, uint64_compare_func);
2508         if (!f->chain_cache) {
2509                 r = -ENOMEM;
2510                 goto fail;
2511         }
2512
2513         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2514         if (f->fd < 0) {
2515                 r = -errno;
2516                 goto fail;
2517         }
2518
2519         if (fstat(f->fd, &f->last_stat) < 0) {
2520                 r = -errno;
2521                 goto fail;
2522         }
2523
2524         if (f->last_stat.st_size == 0 && f->writable) {
2525                 uint64_t crtime;
2526
2527                 /* Let's attach the creation time to the journal file,
2528                  * so that the vacuuming code knows the age of this
2529                  * file even if the file might end up corrupted one
2530                  * day... Ideally we'd just use the creation time many
2531                  * file systems maintain for each file, but there is
2532                  * currently no usable API to query this, hence let's
2533                  * emulate this via extended attributes. If extended
2534                  * attributes are not supported we'll just skip this,
2535                  * and rely solely on mtime/atime/ctime of the file.*/
2536
2537                 crtime = htole64((uint64_t) now(CLOCK_REALTIME));
2538                 fsetxattr(f->fd, "user.crtime_usec", &crtime, sizeof(crtime), XATTR_CREATE);
2539
2540 #ifdef HAVE_GCRYPT
2541                 /* Try to load the FSPRG state, and if we can't, then
2542                  * just don't do sealing */
2543                 if (f->seal) {
2544                         r = journal_file_fss_load(f);
2545                         if (r < 0)
2546                                 f->seal = false;
2547                 }
2548 #endif
2549
2550                 r = journal_file_init_header(f, template);
2551                 if (r < 0)
2552                         goto fail;
2553
2554                 if (fstat(f->fd, &f->last_stat) < 0) {
2555                         r = -errno;
2556                         goto fail;
2557                 }
2558
2559                 newly_created = true;
2560         }
2561
2562         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2563                 r = -EIO;
2564                 goto fail;
2565         }
2566
2567         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2568         if (f->header == MAP_FAILED) {
2569                 f->header = NULL;
2570                 r = -errno;
2571                 goto fail;
2572         }
2573
2574         if (!newly_created) {
2575                 r = journal_file_verify_header(f);
2576                 if (r < 0)
2577                         goto fail;
2578         }
2579
2580 #ifdef HAVE_GCRYPT
2581         if (!newly_created && f->writable) {
2582                 r = journal_file_fss_load(f);
2583                 if (r < 0)
2584                         goto fail;
2585         }
2586 #endif
2587
2588         if (f->writable) {
2589                 if (metrics) {
2590                         journal_default_metrics(metrics, f->fd);
2591                         f->metrics = *metrics;
2592                 } else if (template)
2593                         f->metrics = template->metrics;
2594
2595                 r = journal_file_refresh_header(f);
2596                 if (r < 0)
2597                         goto fail;
2598         }
2599
2600 #ifdef HAVE_GCRYPT
2601         r = journal_file_hmac_setup(f);
2602         if (r < 0)
2603                 goto fail;
2604 #endif
2605
2606         if (newly_created) {
2607                 r = journal_file_setup_field_hash_table(f);
2608                 if (r < 0)
2609                         goto fail;
2610
2611                 r = journal_file_setup_data_hash_table(f);
2612                 if (r < 0)
2613                         goto fail;
2614
2615 #ifdef HAVE_GCRYPT
2616                 r = journal_file_append_first_tag(f);
2617                 if (r < 0)
2618                         goto fail;
2619 #endif
2620         }
2621
2622         r = journal_file_map_field_hash_table(f);
2623         if (r < 0)
2624                 goto fail;
2625
2626         r = journal_file_map_data_hash_table(f);
2627         if (r < 0)
2628                 goto fail;
2629
2630         *ret = f;
2631         return 0;
2632
2633 fail:
2634         journal_file_close(f);
2635
2636         return r;
2637 }
2638
2639 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2640         _cleanup_free_ char *p = NULL;
2641         size_t l;
2642         JournalFile *old_file, *new_file = NULL;
2643         int r;
2644
2645         assert(f);
2646         assert(*f);
2647
2648         old_file = *f;
2649
2650         if (!old_file->writable)
2651                 return -EINVAL;
2652
2653         if (!endswith(old_file->path, ".journal"))
2654                 return -EINVAL;
2655
2656         l = strlen(old_file->path);
2657         r = asprintf(&p, "%.*s@" SD_ID128_FORMAT_STR "-%016"PRIx64"-%016"PRIx64".journal",
2658                      (int) l - 8, old_file->path,
2659                      SD_ID128_FORMAT_VAL(old_file->header->seqnum_id),
2660                      le64toh((*f)->header->head_entry_seqnum),
2661                      le64toh((*f)->header->head_entry_realtime));
2662         if (r < 0)
2663                 return -ENOMEM;
2664
2665         r = rename(old_file->path, p);
2666         if (r < 0)
2667                 return -errno;
2668
2669         old_file->header->state = STATE_ARCHIVED;
2670
2671         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2672         journal_file_close(old_file);
2673
2674         *f = new_file;
2675         return r;
2676 }
2677
2678 int journal_file_open_reliably(
2679                 const char *fname,
2680                 int flags,
2681                 mode_t mode,
2682                 bool compress,
2683                 bool seal,
2684                 JournalMetrics *metrics,
2685                 MMapCache *mmap_cache,
2686                 JournalFile *template,
2687                 JournalFile **ret) {
2688
2689         int r;
2690         size_t l;
2691         _cleanup_free_ char *p = NULL;
2692
2693         r = journal_file_open(fname, flags, mode, compress, seal,
2694                               metrics, mmap_cache, template, ret);
2695         if (r != -EBADMSG && /* corrupted */
2696             r != -ENODATA && /* truncated */
2697             r != -EHOSTDOWN && /* other machine */
2698             r != -EPROTONOSUPPORT && /* incompatible feature */
2699             r != -EBUSY && /* unclean shutdown */
2700             r != -ESHUTDOWN /* already archived */)
2701                 return r;
2702
2703         if ((flags & O_ACCMODE) == O_RDONLY)
2704                 return r;
2705
2706         if (!(flags & O_CREAT))
2707                 return r;
2708
2709         if (!endswith(fname, ".journal"))
2710                 return r;
2711
2712         /* The file is corrupted. Rotate it away and try it again (but only once) */
2713
2714         l = strlen(fname);
2715         if (asprintf(&p, "%.*s@%016llx-%016" PRIx64 ".journal~",
2716                      (int) l - 8, fname,
2717                      (unsigned long long) now(CLOCK_REALTIME),
2718                      random_u64()) < 0)
2719                 return -ENOMEM;
2720
2721         r = rename(fname, p);
2722         if (r < 0)
2723                 return -errno;
2724
2725         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2726
2727         return journal_file_open(fname, flags, mode, compress, seal,
2728                                  metrics, mmap_cache, template, ret);
2729 }
2730
2731 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2732         uint64_t i, n;
2733         uint64_t q, xor_hash = 0;
2734         int r;
2735         EntryItem *items;
2736         dual_timestamp ts;
2737
2738         assert(from);
2739         assert(to);
2740         assert(o);
2741         assert(p);
2742
2743         if (!to->writable)
2744                 return -EPERM;
2745
2746         ts.monotonic = le64toh(o->entry.monotonic);
2747         ts.realtime = le64toh(o->entry.realtime);
2748
2749         n = journal_file_entry_n_items(o);
2750         /* alloca() can't take 0, hence let's allocate at least one */
2751         items = alloca(sizeof(EntryItem) * MAX(1u, n));
2752
2753         for (i = 0; i < n; i++) {
2754                 uint64_t l, h;
2755                 le64_t le_hash;
2756                 size_t t;
2757                 void *data;
2758                 Object *u;
2759
2760                 q = le64toh(o->entry.items[i].object_offset);
2761                 le_hash = o->entry.items[i].hash;
2762
2763                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2764                 if (r < 0)
2765                         return r;
2766
2767                 if (le_hash != o->data.hash)
2768                         return -EBADMSG;
2769
2770                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2771                 t = (size_t) l;
2772
2773                 /* We hit the limit on 32bit machines */
2774                 if ((uint64_t) t != l)
2775                         return -E2BIG;
2776
2777                 if (o->object.flags & OBJECT_COMPRESSION_MASK) {
2778 #if defined(HAVE_XZ) || defined(HAVE_LZ4)
2779                         uint64_t rsize;
2780
2781                         r = decompress_blob(o->object.flags & OBJECT_COMPRESSION_MASK,
2782                                             o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize, 0);
2783                         if (r < 0)
2784                                 return r;
2785
2786                         data = from->compress_buffer;
2787                         l = rsize;
2788 #else
2789                         return -EPROTONOSUPPORT;
2790 #endif
2791                 } else
2792                         data = o->data.payload;
2793
2794                 r = journal_file_append_data(to, data, l, &u, &h);
2795                 if (r < 0)
2796                         return r;
2797
2798                 xor_hash ^= le64toh(u->data.hash);
2799                 items[i].object_offset = htole64(h);
2800                 items[i].hash = u->data.hash;
2801
2802                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2803                 if (r < 0)
2804                         return r;
2805         }
2806
2807         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2808 }
2809
2810 void journal_default_metrics(JournalMetrics *m, int fd) {
2811         uint64_t fs_size = 0;
2812         struct statvfs ss;
2813         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2814
2815         assert(m);
2816         assert(fd >= 0);
2817
2818         if (fstatvfs(fd, &ss) >= 0)
2819                 fs_size = ss.f_frsize * ss.f_blocks;
2820
2821         if (m->max_use == (uint64_t) -1) {
2822
2823                 if (fs_size > 0) {
2824                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2825
2826                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2827                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2828
2829                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2830                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2831                 } else
2832                         m->max_use = DEFAULT_MAX_USE_LOWER;
2833         } else {
2834                 m->max_use = PAGE_ALIGN(m->max_use);
2835
2836                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2837                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2838         }
2839
2840         if (m->max_size == (uint64_t) -1) {
2841                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2842
2843                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2844                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2845         } else
2846                 m->max_size = PAGE_ALIGN(m->max_size);
2847
2848         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2849                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2850
2851         if (m->max_size*2 > m->max_use)
2852                 m->max_use = m->max_size*2;
2853
2854         if (m->min_size == (uint64_t) -1)
2855                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2856         else {
2857                 m->min_size = PAGE_ALIGN(m->min_size);
2858
2859                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2860                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2861
2862                 if (m->min_size > m->max_size)
2863                         m->max_size = m->min_size;
2864         }
2865
2866         if (m->keep_free == (uint64_t) -1) {
2867
2868                 if (fs_size > 0) {
2869                         m->keep_free = PAGE_ALIGN(fs_size * 3 / 20); /* 15% of file system size */
2870
2871                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2872                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2873
2874                 } else
2875                         m->keep_free = DEFAULT_KEEP_FREE;
2876         }
2877
2878         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2879                   format_bytes(a, sizeof(a), m->max_use),
2880                   format_bytes(b, sizeof(b), m->max_size),
2881                   format_bytes(c, sizeof(c), m->min_size),
2882                   format_bytes(d, sizeof(d), m->keep_free));
2883 }
2884
2885 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2886         assert(f);
2887         assert(from || to);
2888
2889         if (from) {
2890                 if (f->header->head_entry_realtime == 0)
2891                         return -ENOENT;
2892
2893                 *from = le64toh(f->header->head_entry_realtime);
2894         }
2895
2896         if (to) {
2897                 if (f->header->tail_entry_realtime == 0)
2898                         return -ENOENT;
2899
2900                 *to = le64toh(f->header->tail_entry_realtime);
2901         }
2902
2903         return 1;
2904 }
2905
2906 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2907         Object *o;
2908         uint64_t p;
2909         int r;
2910
2911         assert(f);
2912         assert(from || to);
2913
2914         r = find_data_object_by_boot_id(f, boot_id, &o, &p);
2915         if (r <= 0)
2916                 return r;
2917
2918         if (le64toh(o->data.n_entries) <= 0)
2919                 return 0;
2920
2921         if (from) {
2922                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2923                 if (r < 0)
2924                         return r;
2925
2926                 *from = le64toh(o->entry.monotonic);
2927         }
2928
2929         if (to) {
2930                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2931                 if (r < 0)
2932                         return r;
2933
2934                 r = generic_array_get_plus_one(f,
2935                                                le64toh(o->data.entry_offset),
2936                                                le64toh(o->data.entry_array_offset),
2937                                                le64toh(o->data.n_entries)-1,
2938                                                &o, NULL);
2939                 if (r <= 0)
2940                         return r;
2941
2942                 *to = le64toh(o->entry.monotonic);
2943         }
2944
2945         return 1;
2946 }
2947
2948 bool journal_file_rotate_suggested(JournalFile *f, usec_t max_file_usec) {
2949         assert(f);
2950
2951         /* If we gained new header fields we gained new features,
2952          * hence suggest a rotation */
2953         if (le64toh(f->header->header_size) < sizeof(Header)) {
2954                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2955                 return true;
2956         }
2957
2958         /* Let's check if the hash tables grew over a certain fill
2959          * level (75%, borrowing this value from Java's hash table
2960          * implementation), and if so suggest a rotation. To calculate
2961          * the fill level we need the n_data field, which only exists
2962          * in newer versions. */
2963
2964         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2965                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2966                         log_debug("Data hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items, %llu file size, %"PRIu64" bytes per hash table item), suggesting rotation.",
2967                                   f->path,
2968                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2969                                   le64toh(f->header->n_data),
2970                                   le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2971                                   (unsigned long long) f->last_stat.st_size,
2972                                   f->last_stat.st_size / le64toh(f->header->n_data));
2973                         return true;
2974                 }
2975
2976         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2977                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2978                         log_debug("Field hash table of %s has a fill level at %.1f (%"PRIu64" of %"PRIu64" items), suggesting rotation.",
2979                                   f->path,
2980                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2981                                   le64toh(f->header->n_fields),
2982                                   le64toh(f->header->field_hash_table_size) / sizeof(HashItem));
2983                         return true;
2984                 }
2985
2986         /* Are the data objects properly indexed by field objects? */
2987         if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
2988             JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
2989             le64toh(f->header->n_data) > 0 &&
2990             le64toh(f->header->n_fields) == 0)
2991                 return true;
2992
2993         if (max_file_usec > 0) {
2994                 usec_t t, h;
2995
2996                 h = le64toh(f->header->head_entry_realtime);
2997                 t = now(CLOCK_REALTIME);
2998
2999                 if (h > 0 && t > h + max_file_usec)
3000                         return true;
3001         }
3002
3003         return false;
3004 }