chiark / gitweb /
journal: include tag object header in hmac
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "lookup3.h"
33 #include "compress.h"
34 #include "fsprg.h"
35
36 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
37 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
38
39 #define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
40
41 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
42
43 /* This is the minimum journal file size */
44 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
45
46 /* These are the lower and upper bounds if we deduce the max_use value
47  * from the file system size */
48 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
49 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
50
51 /* This is the upper bound if we deduce max_size from max_use */
52 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
53
54 /* This is the upper bound if we deduce the keep_free value from the
55  * file system size */
56 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
57
58 /* This is the keep_free value when we can't determine the system
59  * size */
60 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
61
62 /* n_data was the first entry we added after the initial file format design */
63 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
64
65 #define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
66
67 #define JOURNAL_HEADER_CONTAINS(h, field) \
68         (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
69
70 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
71 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
72
73 void journal_file_close(JournalFile *f) {
74         int t;
75
76         assert(f);
77
78         /* Write the final tag */
79         if (f->authenticate)
80                 journal_file_append_tag(f);
81
82         /* Sync everything to disk, before we mark the file offline */
83         for (t = 0; t < _WINDOW_MAX; t++)
84                 if (f->windows[t].ptr)
85                         munmap(f->windows[t].ptr, f->windows[t].size);
86
87         if (f->writable && f->fd >= 0)
88                 fdatasync(f->fd);
89
90         if (f->header) {
91                 /* Mark the file offline. Don't override the archived state if it already is set */
92                 if (f->writable && f->header->state == STATE_ONLINE)
93                         f->header->state = STATE_OFFLINE;
94
95                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
96         }
97
98         if (f->fd >= 0)
99                 close_nointr_nofail(f->fd);
100
101         free(f->path);
102
103 #ifdef HAVE_XZ
104         free(f->compress_buffer);
105 #endif
106
107 #ifdef HAVE_GCRYPT
108         if (f->fsprg_header)
109                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
110
111         if (f->hmac)
112                 gcry_md_close(f->hmac);
113 #endif
114
115         free(f);
116 }
117
118 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
119         Header h;
120         ssize_t k;
121         int r;
122
123         assert(f);
124
125         zero(h);
126         memcpy(h.signature, HEADER_SIGNATURE, 8);
127         h.header_size = htole64(ALIGN64(sizeof(h)));
128
129         h.incompatible_flags =
130                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
131
132         h.compatible_flags =
133                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
134
135         r = sd_id128_randomize(&h.file_id);
136         if (r < 0)
137                 return r;
138
139         if (template) {
140                 h.seqnum_id = template->header->seqnum_id;
141                 h.tail_seqnum = template->header->tail_seqnum;
142         } else
143                 h.seqnum_id = h.file_id;
144
145         k = pwrite(f->fd, &h, sizeof(h), 0);
146         if (k < 0)
147                 return -errno;
148
149         if (k != sizeof(h))
150                 return -EIO;
151
152         return 0;
153 }
154
155 static int journal_file_refresh_header(JournalFile *f) {
156         int r;
157         sd_id128_t boot_id;
158
159         assert(f);
160
161         r = sd_id128_get_machine(&f->header->machine_id);
162         if (r < 0)
163                 return r;
164
165         r = sd_id128_get_boot(&boot_id);
166         if (r < 0)
167                 return r;
168
169         if (sd_id128_equal(boot_id, f->header->boot_id))
170                 f->tail_entry_monotonic_valid = true;
171
172         f->header->boot_id = boot_id;
173
174         f->header->state = STATE_ONLINE;
175
176         /* Sync the online state to disk */
177         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
178         fdatasync(f->fd);
179
180         return 0;
181 }
182
183 static int journal_file_verify_header(JournalFile *f) {
184         assert(f);
185
186         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
187                 return -EBADMSG;
188
189         /* In both read and write mode we refuse to open files with
190          * incompatible flags we don't know */
191 #ifdef HAVE_XZ
192         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
193                 return -EPROTONOSUPPORT;
194 #else
195         if (f->header->incompatible_flags != 0)
196                 return -EPROTONOSUPPORT;
197 #endif
198
199         /* When open for writing we refuse to open files with
200          * compatible flags, too */
201         if (f->writable) {
202 #ifdef HAVE_GCRYPT
203                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
204                         return -EPROTONOSUPPORT;
205 #else
206                 if (f->header->compatible_flags != 0)
207                         return -EPROTONOSUPPORT;
208 #endif
209         }
210
211         /* The first addition was n_data, so check that we are at least this large */
212         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
213                 return -EBADMSG;
214
215         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
216                 return -ENODATA;
217
218         if (f->writable) {
219                 uint8_t state;
220                 sd_id128_t machine_id;
221                 int r;
222
223                 r = sd_id128_get_machine(&machine_id);
224                 if (r < 0)
225                         return r;
226
227                 if (!sd_id128_equal(machine_id, f->header->machine_id))
228                         return -EHOSTDOWN;
229
230                 state = f->header->state;
231
232                 if (state == STATE_ONLINE) {
233                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
234                         return -EBUSY;
235                 } else if (state == STATE_ARCHIVED)
236                         return -ESHUTDOWN;
237                 else if (state != STATE_OFFLINE) {
238                         log_debug("Journal file %s has unknown state %u.", f->path, state);
239                         return -EBUSY;
240                 }
241         }
242
243         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
244         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
245
246         return 0;
247 }
248
249 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
250         uint64_t old_size, new_size;
251         int r;
252
253         assert(f);
254
255         /* We assume that this file is not sparse, and we know that
256          * for sure, since we always call posix_fallocate()
257          * ourselves */
258
259         old_size =
260                 le64toh(f->header->header_size) +
261                 le64toh(f->header->arena_size);
262
263         new_size = PAGE_ALIGN(offset + size);
264         if (new_size < le64toh(f->header->header_size))
265                 new_size = le64toh(f->header->header_size);
266
267         if (new_size <= old_size)
268                 return 0;
269
270         if (f->metrics.max_size > 0 &&
271             new_size > f->metrics.max_size)
272                 return -E2BIG;
273
274         if (new_size > f->metrics.min_size &&
275             f->metrics.keep_free > 0) {
276                 struct statvfs svfs;
277
278                 if (fstatvfs(f->fd, &svfs) >= 0) {
279                         uint64_t available;
280
281                         available = svfs.f_bfree * svfs.f_bsize;
282
283                         if (available >= f->metrics.keep_free)
284                                 available -= f->metrics.keep_free;
285                         else
286                                 available = 0;
287
288                         if (new_size - old_size > available)
289                                 return -E2BIG;
290                 }
291         }
292
293         /* Note that the glibc fallocate() fallback is very
294            inefficient, hence we try to minimize the allocation area
295            as we can. */
296         r = posix_fallocate(f->fd, old_size, new_size - old_size);
297         if (r != 0)
298                 return -r;
299
300         if (fstat(f->fd, &f->last_stat) < 0)
301                 return -errno;
302
303         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
304
305         return 0;
306 }
307
308 static int journal_file_map(
309                 JournalFile *f,
310                 uint64_t offset,
311                 uint64_t size,
312                 void **_window,
313                 uint64_t *_woffset,
314                 uint64_t *_wsize,
315                 void **ret) {
316
317         uint64_t woffset, wsize;
318         void *window;
319
320         assert(f);
321         assert(size > 0);
322         assert(ret);
323
324         woffset = offset & ~((uint64_t) page_size() - 1ULL);
325         wsize = size + (offset - woffset);
326         wsize = PAGE_ALIGN(wsize);
327
328         /* Avoid SIGBUS on invalid accesses */
329         if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
330                 return -EADDRNOTAVAIL;
331
332         window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
333         if (window == MAP_FAILED)
334                 return -errno;
335
336         if (_window)
337                 *_window = window;
338
339         if (_woffset)
340                 *_woffset = woffset;
341
342         if (_wsize)
343                 *_wsize = wsize;
344
345         *ret = (uint8_t*) window + (offset - woffset);
346
347         return 0;
348 }
349
350 static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
351         void *p = NULL;
352         uint64_t delta;
353         int r;
354         Window *w;
355
356         assert(f);
357         assert(ret);
358         assert(wt >= 0);
359         assert(wt < _WINDOW_MAX);
360
361         if (offset + size > (uint64_t) f->last_stat.st_size) {
362                 /* Hmm, out of range? Let's refresh the fstat() data
363                  * first, before we trust that check. */
364
365                 if (fstat(f->fd, &f->last_stat) < 0 ||
366                     offset + size > (uint64_t) f->last_stat.st_size)
367                         return -EADDRNOTAVAIL;
368         }
369
370         w = f->windows + wt;
371
372         if (_likely_(w->ptr &&
373                      w->offset <= offset &&
374                      w->offset + w->size >= offset + size)) {
375
376                 *ret = (uint8_t*) w->ptr + (offset - w->offset);
377                 return 0;
378         }
379
380         if (w->ptr) {
381                 if (munmap(w->ptr, w->size) < 0)
382                         return -errno;
383
384                 w->ptr = NULL;
385                 w->size = w->offset = 0;
386         }
387
388         if (size < DEFAULT_WINDOW_SIZE) {
389                 /* If the default window size is larger then what was
390                  * asked for extend the mapping a bit in the hope to
391                  * minimize needed remappings later on. We add half
392                  * the window space before and half behind the
393                  * requested mapping */
394
395                 delta = (DEFAULT_WINDOW_SIZE - size) / 2;
396
397                 if (delta > offset)
398                         delta = offset;
399
400                 offset -= delta;
401                 size = DEFAULT_WINDOW_SIZE;
402         } else
403                 delta = 0;
404
405         if (offset + size > (uint64_t) f->last_stat.st_size)
406                 size = (uint64_t) f->last_stat.st_size - offset;
407
408         if (size <= 0)
409                 return -EADDRNOTAVAIL;
410
411         r = journal_file_map(f,
412                              offset, size,
413                              &w->ptr, &w->offset, &w->size,
414                              &p);
415
416         if (r < 0)
417                 return r;
418
419         *ret = (uint8_t*) p + delta;
420         return 0;
421 }
422
423 static bool verify_hash(Object *o) {
424         uint64_t h1, h2;
425
426         assert(o);
427
428         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
429                 h1 = le64toh(o->data.hash);
430                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
431         } else if (o->object.type == OBJECT_FIELD) {
432                 h1 = le64toh(o->field.hash);
433                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
434         } else
435                 return true;
436
437         return h1 == h2;
438 }
439
440 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
441         int r;
442         void *t;
443         Object *o;
444         uint64_t s;
445
446         assert(f);
447         assert(ret);
448         assert(type < _OBJECT_TYPE_MAX);
449
450         r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
451         if (r < 0)
452                 return r;
453
454         o = (Object*) t;
455         s = le64toh(o->object.size);
456
457         if (s < sizeof(ObjectHeader))
458                 return -EBADMSG;
459
460         if (type >= 0 && o->object.type != type)
461                 return -EBADMSG;
462
463         if (s > sizeof(ObjectHeader)) {
464                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
465                 if (r < 0)
466                         return r;
467
468                 o = (Object*) t;
469         }
470
471         if (!verify_hash(o))
472                 return -EBADMSG;
473
474         *ret = o;
475         return 0;
476 }
477
478 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
479         uint64_t r;
480
481         assert(f);
482
483         r = le64toh(f->header->tail_seqnum) + 1;
484
485         if (seqnum) {
486                 /* If an external seqnum counter was passed, we update
487                  * both the local and the external one, and set it to
488                  * the maximum of both */
489
490                 if (*seqnum + 1 > r)
491                         r = *seqnum + 1;
492
493                 *seqnum = r;
494         }
495
496         f->header->tail_seqnum = htole64(r);
497
498         if (f->header->head_seqnum == 0)
499                 f->header->head_seqnum = htole64(r);
500
501         return r;
502 }
503
504 static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
505         int r;
506         uint64_t p;
507         Object *tail, *o;
508         void *t;
509
510         assert(f);
511         assert(size >= sizeof(ObjectHeader));
512         assert(offset);
513         assert(ret);
514
515         p = le64toh(f->header->tail_object_offset);
516         if (p == 0)
517                 p = le64toh(f->header->header_size);
518         else {
519                 r = journal_file_move_to_object(f, -1, p, &tail);
520                 if (r < 0)
521                         return r;
522
523                 p += ALIGN64(le64toh(tail->object.size));
524         }
525
526         r = journal_file_allocate(f, p, size);
527         if (r < 0)
528                 return r;
529
530         r = journal_file_move_to(f, type, p, size, &t);
531         if (r < 0)
532                 return r;
533
534         o = (Object*) t;
535
536         zero(o->object);
537         o->object.type = type;
538         o->object.size = htole64(size);
539
540         f->header->tail_object_offset = htole64(p);
541         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
542
543         *ret = o;
544         *offset = p;
545
546         return 0;
547 }
548
549 static int journal_file_setup_data_hash_table(JournalFile *f) {
550         uint64_t s, p;
551         Object *o;
552         int r;
553
554         assert(f);
555
556         /* We estimate that we need 1 hash table entry per 768 of
557            journal file and we want to make sure we never get beyond
558            75% fill level. Calculate the hash table size for the
559            maximum file size based on these metrics. */
560
561         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
562         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
563                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
564
565         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
566
567         r = journal_file_append_object(f,
568                                        OBJECT_DATA_HASH_TABLE,
569                                        offsetof(Object, hash_table.items) + s,
570                                        &o, &p);
571         if (r < 0)
572                 return r;
573
574         memset(o->hash_table.items, 0, s);
575
576         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
577         f->header->data_hash_table_size = htole64(s);
578
579         return 0;
580 }
581
582 static int journal_file_setup_field_hash_table(JournalFile *f) {
583         uint64_t s, p;
584         Object *o;
585         int r;
586
587         assert(f);
588
589         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
590         r = journal_file_append_object(f,
591                                        OBJECT_FIELD_HASH_TABLE,
592                                        offsetof(Object, hash_table.items) + s,
593                                        &o, &p);
594         if (r < 0)
595                 return r;
596
597         memset(o->hash_table.items, 0, s);
598
599         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
600         f->header->field_hash_table_size = htole64(s);
601
602         return 0;
603 }
604
605 static int journal_file_map_data_hash_table(JournalFile *f) {
606         uint64_t s, p;
607         void *t;
608         int r;
609
610         assert(f);
611
612         p = le64toh(f->header->data_hash_table_offset);
613         s = le64toh(f->header->data_hash_table_size);
614
615         r = journal_file_move_to(f,
616                                  WINDOW_DATA_HASH_TABLE,
617                                  p, s,
618                                  &t);
619         if (r < 0)
620                 return r;
621
622         f->data_hash_table = t;
623         return 0;
624 }
625
626 static int journal_file_map_field_hash_table(JournalFile *f) {
627         uint64_t s, p;
628         void *t;
629         int r;
630
631         assert(f);
632
633         p = le64toh(f->header->field_hash_table_offset);
634         s = le64toh(f->header->field_hash_table_size);
635
636         r = journal_file_move_to(f,
637                                  WINDOW_FIELD_HASH_TABLE,
638                                  p, s,
639                                  &t);
640         if (r < 0)
641                 return r;
642
643         f->field_hash_table = t;
644         return 0;
645 }
646
647 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
648         uint64_t p, h;
649         int r;
650
651         assert(f);
652         assert(o);
653         assert(offset > 0);
654         assert(o->object.type == OBJECT_DATA);
655
656         /* This might alter the window we are looking at */
657
658         o->data.next_hash_offset = o->data.next_field_offset = 0;
659         o->data.entry_offset = o->data.entry_array_offset = 0;
660         o->data.n_entries = 0;
661
662         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
663         p = le64toh(f->data_hash_table[h].tail_hash_offset);
664         if (p == 0) {
665                 /* Only entry in the hash table is easy */
666                 f->data_hash_table[h].head_hash_offset = htole64(offset);
667         } else {
668                 /* Move back to the previous data object, to patch in
669                  * pointer */
670
671                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
672                 if (r < 0)
673                         return r;
674
675                 o->data.next_hash_offset = htole64(offset);
676         }
677
678         f->data_hash_table[h].tail_hash_offset = htole64(offset);
679
680         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
681                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
682
683         return 0;
684 }
685
686 int journal_file_find_data_object_with_hash(
687                 JournalFile *f,
688                 const void *data, uint64_t size, uint64_t hash,
689                 Object **ret, uint64_t *offset) {
690
691         uint64_t p, osize, h;
692         int r;
693
694         assert(f);
695         assert(data || size == 0);
696
697         osize = offsetof(Object, data.payload) + size;
698
699         if (f->header->data_hash_table_size == 0)
700                 return -EBADMSG;
701
702         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
703         p = le64toh(f->data_hash_table[h].head_hash_offset);
704
705         while (p > 0) {
706                 Object *o;
707
708                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
709                 if (r < 0)
710                         return r;
711
712                 if (le64toh(o->data.hash) != hash)
713                         goto next;
714
715                 if (o->object.flags & OBJECT_COMPRESSED) {
716 #ifdef HAVE_XZ
717                         uint64_t l, rsize;
718
719                         l = le64toh(o->object.size);
720                         if (l <= offsetof(Object, data.payload))
721                                 return -EBADMSG;
722
723                         l -= offsetof(Object, data.payload);
724
725                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
726                                 return -EBADMSG;
727
728                         if (rsize == size &&
729                             memcmp(f->compress_buffer, data, size) == 0) {
730
731                                 if (ret)
732                                         *ret = o;
733
734                                 if (offset)
735                                         *offset = p;
736
737                                 return 1;
738                         }
739 #else
740                         return -EPROTONOSUPPORT;
741 #endif
742
743                 } else if (le64toh(o->object.size) == osize &&
744                            memcmp(o->data.payload, data, size) == 0) {
745
746                         if (ret)
747                                 *ret = o;
748
749                         if (offset)
750                                 *offset = p;
751
752                         return 1;
753                 }
754
755         next:
756                 p = le64toh(o->data.next_hash_offset);
757         }
758
759         return 0;
760 }
761
762 int journal_file_find_data_object(
763                 JournalFile *f,
764                 const void *data, uint64_t size,
765                 Object **ret, uint64_t *offset) {
766
767         uint64_t hash;
768
769         assert(f);
770         assert(data || size == 0);
771
772         hash = hash64(data, size);
773
774         return journal_file_find_data_object_with_hash(f,
775                                                        data, size, hash,
776                                                        ret, offset);
777 }
778
779 static int journal_file_append_data(
780                 JournalFile *f,
781                 const void *data, uint64_t size,
782                 Object **ret, uint64_t *offset) {
783
784         uint64_t hash, p;
785         uint64_t osize;
786         Object *o;
787         int r;
788         bool compressed = false;
789
790         assert(f);
791         assert(data || size == 0);
792
793         hash = hash64(data, size);
794
795         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
796         if (r < 0)
797                 return r;
798         else if (r > 0) {
799
800                 if (ret)
801                         *ret = o;
802
803                 if (offset)
804                         *offset = p;
805
806                 return 0;
807         }
808
809         osize = offsetof(Object, data.payload) + size;
810         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
811         if (r < 0)
812                 return r;
813
814         o->data.hash = htole64(hash);
815
816 #ifdef HAVE_XZ
817         if (f->compress &&
818             size >= COMPRESSION_SIZE_THRESHOLD) {
819                 uint64_t rsize;
820
821                 compressed = compress_blob(data, size, o->data.payload, &rsize);
822
823                 if (compressed) {
824                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
825                         o->object.flags |= OBJECT_COMPRESSED;
826
827                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
828                 }
829         }
830 #endif
831
832         if (!compressed && size > 0)
833                 memcpy(o->data.payload, data, size);
834
835         r = journal_file_link_data(f, o, p, hash);
836         if (r < 0)
837                 return r;
838
839         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
840         if (r < 0)
841                 return r;
842
843         /* The linking might have altered the window, so let's
844          * refresh our pointer */
845         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
846         if (r < 0)
847                 return r;
848
849         if (ret)
850                 *ret = o;
851
852         if (offset)
853                 *offset = p;
854
855         return 0;
856 }
857
858 uint64_t journal_file_entry_n_items(Object *o) {
859         assert(o);
860         assert(o->object.type == OBJECT_ENTRY);
861
862         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
863 }
864
865 static uint64_t journal_file_entry_array_n_items(Object *o) {
866         assert(o);
867         assert(o->object.type == OBJECT_ENTRY_ARRAY);
868
869         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
870 }
871
872 static int link_entry_into_array(JournalFile *f,
873                                  le64_t *first,
874                                  le64_t *idx,
875                                  uint64_t p) {
876         int r;
877         uint64_t n = 0, ap = 0, q, i, a, hidx;
878         Object *o;
879
880         assert(f);
881         assert(first);
882         assert(idx);
883         assert(p > 0);
884
885         a = le64toh(*first);
886         i = hidx = le64toh(*idx);
887         while (a > 0) {
888
889                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
890                 if (r < 0)
891                         return r;
892
893                 n = journal_file_entry_array_n_items(o);
894                 if (i < n) {
895                         o->entry_array.items[i] = htole64(p);
896                         *idx = htole64(hidx + 1);
897                         return 0;
898                 }
899
900                 i -= n;
901                 ap = a;
902                 a = le64toh(o->entry_array.next_entry_array_offset);
903         }
904
905         if (hidx > n)
906                 n = (hidx+1) * 2;
907         else
908                 n = n * 2;
909
910         if (n < 4)
911                 n = 4;
912
913         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
914                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
915                                        &o, &q);
916         if (r < 0)
917                 return r;
918
919         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
920         if (r < 0)
921                 return r;
922
923         o->entry_array.items[i] = htole64(p);
924
925         if (ap == 0)
926                 *first = htole64(q);
927         else {
928                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
929                 if (r < 0)
930                         return r;
931
932                 o->entry_array.next_entry_array_offset = htole64(q);
933         }
934
935         *idx = htole64(hidx + 1);
936
937         return 0;
938 }
939
940 static int link_entry_into_array_plus_one(JournalFile *f,
941                                           le64_t *extra,
942                                           le64_t *first,
943                                           le64_t *idx,
944                                           uint64_t p) {
945
946         int r;
947
948         assert(f);
949         assert(extra);
950         assert(first);
951         assert(idx);
952         assert(p > 0);
953
954         if (*idx == 0)
955                 *extra = htole64(p);
956         else {
957                 le64_t i;
958
959                 i = htole64(le64toh(*idx) - 1);
960                 r = link_entry_into_array(f, first, &i, p);
961                 if (r < 0)
962                         return r;
963         }
964
965         *idx = htole64(le64toh(*idx) + 1);
966         return 0;
967 }
968
969 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
970         uint64_t p;
971         int r;
972         assert(f);
973         assert(o);
974         assert(offset > 0);
975
976         p = le64toh(o->entry.items[i].object_offset);
977         if (p == 0)
978                 return -EINVAL;
979
980         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
981         if (r < 0)
982                 return r;
983
984         return link_entry_into_array_plus_one(f,
985                                               &o->data.entry_offset,
986                                               &o->data.entry_array_offset,
987                                               &o->data.n_entries,
988                                               offset);
989 }
990
991 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
992         uint64_t n, i;
993         int r;
994
995         assert(f);
996         assert(o);
997         assert(offset > 0);
998         assert(o->object.type == OBJECT_ENTRY);
999
1000         __sync_synchronize();
1001
1002         /* Link up the entry itself */
1003         r = link_entry_into_array(f,
1004                                   &f->header->entry_array_offset,
1005                                   &f->header->n_entries,
1006                                   offset);
1007         if (r < 0)
1008                 return r;
1009
1010         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
1011
1012         if (f->header->head_entry_realtime == 0)
1013                 f->header->head_entry_realtime = o->entry.realtime;
1014
1015         f->header->tail_entry_realtime = o->entry.realtime;
1016         f->header->tail_entry_monotonic = o->entry.monotonic;
1017
1018         f->tail_entry_monotonic_valid = true;
1019
1020         /* Link up the items */
1021         n = journal_file_entry_n_items(o);
1022         for (i = 0; i < n; i++) {
1023                 r = journal_file_link_entry_item(f, o, offset, i);
1024                 if (r < 0)
1025                         return r;
1026         }
1027
1028         return 0;
1029 }
1030
1031 static int journal_file_append_entry_internal(
1032                 JournalFile *f,
1033                 const dual_timestamp *ts,
1034                 uint64_t xor_hash,
1035                 const EntryItem items[], unsigned n_items,
1036                 uint64_t *seqnum,
1037                 Object **ret, uint64_t *offset) {
1038         uint64_t np;
1039         uint64_t osize;
1040         Object *o;
1041         int r;
1042
1043         assert(f);
1044         assert(items || n_items == 0);
1045         assert(ts);
1046
1047         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1048
1049         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1050         if (r < 0)
1051                 return r;
1052
1053         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1054         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1055         o->entry.realtime = htole64(ts->realtime);
1056         o->entry.monotonic = htole64(ts->monotonic);
1057         o->entry.xor_hash = htole64(xor_hash);
1058         o->entry.boot_id = f->header->boot_id;
1059
1060         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1061         if (r < 0)
1062                 return r;
1063
1064         r = journal_file_link_entry(f, o, np);
1065         if (r < 0)
1066                 return r;
1067
1068         if (ret)
1069                 *ret = o;
1070
1071         if (offset)
1072                 *offset = np;
1073
1074         return 0;
1075 }
1076
1077 void journal_file_post_change(JournalFile *f) {
1078         assert(f);
1079
1080         /* inotify() does not receive IN_MODIFY events from file
1081          * accesses done via mmap(). After each access we hence
1082          * trigger IN_MODIFY by truncating the journal file to its
1083          * current size which triggers IN_MODIFY. */
1084
1085         __sync_synchronize();
1086
1087         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1088                 log_error("Failed to to truncate file to its own size: %m");
1089 }
1090
1091 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1092         unsigned i;
1093         EntryItem *items;
1094         int r;
1095         uint64_t xor_hash = 0;
1096         struct dual_timestamp _ts;
1097
1098         assert(f);
1099         assert(iovec || n_iovec == 0);
1100
1101         if (!f->writable)
1102                 return -EPERM;
1103
1104         if (!ts) {
1105                 dual_timestamp_get(&_ts);
1106                 ts = &_ts;
1107         }
1108
1109         if (f->tail_entry_monotonic_valid &&
1110             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1111                 return -EINVAL;
1112
1113         r = journal_file_maybe_append_tag(f, ts->realtime);
1114         if (r < 0)
1115                 return r;
1116
1117         /* alloca() can't take 0, hence let's allocate at least one */
1118         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1119
1120         for (i = 0; i < n_iovec; i++) {
1121                 uint64_t p;
1122                 Object *o;
1123
1124                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1125                 if (r < 0)
1126                         return r;
1127
1128                 xor_hash ^= le64toh(o->data.hash);
1129                 items[i].object_offset = htole64(p);
1130                 items[i].hash = o->data.hash;
1131         }
1132
1133         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1134
1135         journal_file_post_change(f);
1136
1137         return r;
1138 }
1139
1140 static int generic_array_get(JournalFile *f,
1141                              uint64_t first,
1142                              uint64_t i,
1143                              Object **ret, uint64_t *offset) {
1144
1145         Object *o;
1146         uint64_t p = 0, a;
1147         int r;
1148
1149         assert(f);
1150
1151         a = first;
1152         while (a > 0) {
1153                 uint64_t n;
1154
1155                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1156                 if (r < 0)
1157                         return r;
1158
1159                 n = journal_file_entry_array_n_items(o);
1160                 if (i < n) {
1161                         p = le64toh(o->entry_array.items[i]);
1162                         break;
1163                 }
1164
1165                 i -= n;
1166                 a = le64toh(o->entry_array.next_entry_array_offset);
1167         }
1168
1169         if (a <= 0 || p <= 0)
1170                 return 0;
1171
1172         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1173         if (r < 0)
1174                 return r;
1175
1176         if (ret)
1177                 *ret = o;
1178
1179         if (offset)
1180                 *offset = p;
1181
1182         return 1;
1183 }
1184
1185 static int generic_array_get_plus_one(JournalFile *f,
1186                                       uint64_t extra,
1187                                       uint64_t first,
1188                                       uint64_t i,
1189                                       Object **ret, uint64_t *offset) {
1190
1191         Object *o;
1192
1193         assert(f);
1194
1195         if (i == 0) {
1196                 int r;
1197
1198                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1199                 if (r < 0)
1200                         return r;
1201
1202                 if (ret)
1203                         *ret = o;
1204
1205                 if (offset)
1206                         *offset = extra;
1207
1208                 return 1;
1209         }
1210
1211         return generic_array_get(f, first, i-1, ret, offset);
1212 }
1213
1214 enum {
1215         TEST_FOUND,
1216         TEST_LEFT,
1217         TEST_RIGHT
1218 };
1219
1220 static int generic_array_bisect(JournalFile *f,
1221                                 uint64_t first,
1222                                 uint64_t n,
1223                                 uint64_t needle,
1224                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1225                                 direction_t direction,
1226                                 Object **ret,
1227                                 uint64_t *offset,
1228                                 uint64_t *idx) {
1229
1230         uint64_t a, p, t = 0, i = 0, last_p = 0;
1231         bool subtract_one = false;
1232         Object *o, *array = NULL;
1233         int r;
1234
1235         assert(f);
1236         assert(test_object);
1237
1238         a = first;
1239         while (a > 0) {
1240                 uint64_t left, right, k, lp;
1241
1242                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1243                 if (r < 0)
1244                         return r;
1245
1246                 k = journal_file_entry_array_n_items(array);
1247                 right = MIN(k, n);
1248                 if (right <= 0)
1249                         return 0;
1250
1251                 i = right - 1;
1252                 lp = p = le64toh(array->entry_array.items[i]);
1253                 if (p <= 0)
1254                         return -EBADMSG;
1255
1256                 r = test_object(f, p, needle);
1257                 if (r < 0)
1258                         return r;
1259
1260                 if (r == TEST_FOUND)
1261                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1262
1263                 if (r == TEST_RIGHT) {
1264                         left = 0;
1265                         right -= 1;
1266                         for (;;) {
1267                                 if (left == right) {
1268                                         if (direction == DIRECTION_UP)
1269                                                 subtract_one = true;
1270
1271                                         i = left;
1272                                         goto found;
1273                                 }
1274
1275                                 assert(left < right);
1276
1277                                 i = (left + right) / 2;
1278                                 p = le64toh(array->entry_array.items[i]);
1279                                 if (p <= 0)
1280                                         return -EBADMSG;
1281
1282                                 r = test_object(f, p, needle);
1283                                 if (r < 0)
1284                                         return r;
1285
1286                                 if (r == TEST_FOUND)
1287                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1288
1289                                 if (r == TEST_RIGHT)
1290                                         right = i;
1291                                 else
1292                                         left = i + 1;
1293                         }
1294                 }
1295
1296                 if (k > n) {
1297                         if (direction == DIRECTION_UP) {
1298                                 i = n;
1299                                 subtract_one = true;
1300                                 goto found;
1301                         }
1302
1303                         return 0;
1304                 }
1305
1306                 last_p = lp;
1307
1308                 n -= k;
1309                 t += k;
1310                 a = le64toh(array->entry_array.next_entry_array_offset);
1311         }
1312
1313         return 0;
1314
1315 found:
1316         if (subtract_one && t == 0 && i == 0)
1317                 return 0;
1318
1319         if (subtract_one && i == 0)
1320                 p = last_p;
1321         else if (subtract_one)
1322                 p = le64toh(array->entry_array.items[i-1]);
1323         else
1324                 p = le64toh(array->entry_array.items[i]);
1325
1326         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1327         if (r < 0)
1328                 return r;
1329
1330         if (ret)
1331                 *ret = o;
1332
1333         if (offset)
1334                 *offset = p;
1335
1336         if (idx)
1337                 *idx = t + i + (subtract_one ? -1 : 0);
1338
1339         return 1;
1340 }
1341
1342 static int generic_array_bisect_plus_one(JournalFile *f,
1343                                          uint64_t extra,
1344                                          uint64_t first,
1345                                          uint64_t n,
1346                                          uint64_t needle,
1347                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1348                                          direction_t direction,
1349                                          Object **ret,
1350                                          uint64_t *offset,
1351                                          uint64_t *idx) {
1352
1353         int r;
1354         bool step_back = false;
1355         Object *o;
1356
1357         assert(f);
1358         assert(test_object);
1359
1360         if (n <= 0)
1361                 return 0;
1362
1363         /* This bisects the array in object 'first', but first checks
1364          * an extra  */
1365         r = test_object(f, extra, needle);
1366         if (r < 0)
1367                 return r;
1368
1369         if (r == TEST_FOUND)
1370                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1371
1372         /* if we are looking with DIRECTION_UP then we need to first
1373            see if in the actual array there is a matching entry, and
1374            return the last one of that. But if there isn't any we need
1375            to return this one. Hence remember this, and return it
1376            below. */
1377         if (r == TEST_LEFT)
1378                 step_back = direction == DIRECTION_UP;
1379
1380         if (r == TEST_RIGHT) {
1381                 if (direction == DIRECTION_DOWN)
1382                         goto found;
1383                 else
1384                         return 0;
1385         }
1386
1387         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1388
1389         if (r == 0 && step_back)
1390                 goto found;
1391
1392         if (r > 0 && idx)
1393                 (*idx) ++;
1394
1395         return r;
1396
1397 found:
1398         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1399         if (r < 0)
1400                 return r;
1401
1402         if (ret)
1403                 *ret = o;
1404
1405         if (offset)
1406                 *offset = extra;
1407
1408         if (idx)
1409                 *idx = 0;
1410
1411         return 1;
1412 }
1413
1414 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1415         assert(f);
1416         assert(p > 0);
1417
1418         if (p == needle)
1419                 return TEST_FOUND;
1420         else if (p < needle)
1421                 return TEST_LEFT;
1422         else
1423                 return TEST_RIGHT;
1424 }
1425
1426 int journal_file_move_to_entry_by_offset(
1427                 JournalFile *f,
1428                 uint64_t p,
1429                 direction_t direction,
1430                 Object **ret,
1431                 uint64_t *offset) {
1432
1433         return generic_array_bisect(f,
1434                                     le64toh(f->header->entry_array_offset),
1435                                     le64toh(f->header->n_entries),
1436                                     p,
1437                                     test_object_offset,
1438                                     direction,
1439                                     ret, offset, NULL);
1440 }
1441
1442
1443 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1444         Object *o;
1445         int r;
1446
1447         assert(f);
1448         assert(p > 0);
1449
1450         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1451         if (r < 0)
1452                 return r;
1453
1454         if (le64toh(o->entry.seqnum) == needle)
1455                 return TEST_FOUND;
1456         else if (le64toh(o->entry.seqnum) < needle)
1457                 return TEST_LEFT;
1458         else
1459                 return TEST_RIGHT;
1460 }
1461
1462 int journal_file_move_to_entry_by_seqnum(
1463                 JournalFile *f,
1464                 uint64_t seqnum,
1465                 direction_t direction,
1466                 Object **ret,
1467                 uint64_t *offset) {
1468
1469         return generic_array_bisect(f,
1470                                     le64toh(f->header->entry_array_offset),
1471                                     le64toh(f->header->n_entries),
1472                                     seqnum,
1473                                     test_object_seqnum,
1474                                     direction,
1475                                     ret, offset, NULL);
1476 }
1477
1478 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1479         Object *o;
1480         int r;
1481
1482         assert(f);
1483         assert(p > 0);
1484
1485         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1486         if (r < 0)
1487                 return r;
1488
1489         if (le64toh(o->entry.realtime) == needle)
1490                 return TEST_FOUND;
1491         else if (le64toh(o->entry.realtime) < needle)
1492                 return TEST_LEFT;
1493         else
1494                 return TEST_RIGHT;
1495 }
1496
1497 int journal_file_move_to_entry_by_realtime(
1498                 JournalFile *f,
1499                 uint64_t realtime,
1500                 direction_t direction,
1501                 Object **ret,
1502                 uint64_t *offset) {
1503
1504         return generic_array_bisect(f,
1505                                     le64toh(f->header->entry_array_offset),
1506                                     le64toh(f->header->n_entries),
1507                                     realtime,
1508                                     test_object_realtime,
1509                                     direction,
1510                                     ret, offset, NULL);
1511 }
1512
1513 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1514         Object *o;
1515         int r;
1516
1517         assert(f);
1518         assert(p > 0);
1519
1520         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1521         if (r < 0)
1522                 return r;
1523
1524         if (le64toh(o->entry.monotonic) == needle)
1525                 return TEST_FOUND;
1526         else if (le64toh(o->entry.monotonic) < needle)
1527                 return TEST_LEFT;
1528         else
1529                 return TEST_RIGHT;
1530 }
1531
1532 int journal_file_move_to_entry_by_monotonic(
1533                 JournalFile *f,
1534                 sd_id128_t boot_id,
1535                 uint64_t monotonic,
1536                 direction_t direction,
1537                 Object **ret,
1538                 uint64_t *offset) {
1539
1540         char t[9+32+1] = "_BOOT_ID=";
1541         Object *o;
1542         int r;
1543
1544         assert(f);
1545
1546         sd_id128_to_string(boot_id, t + 9);
1547         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1548         if (r < 0)
1549                 return r;
1550         if (r == 0)
1551                 return -ENOENT;
1552
1553         return generic_array_bisect_plus_one(f,
1554                                              le64toh(o->data.entry_offset),
1555                                              le64toh(o->data.entry_array_offset),
1556                                              le64toh(o->data.n_entries),
1557                                              monotonic,
1558                                              test_object_monotonic,
1559                                              direction,
1560                                              ret, offset, NULL);
1561 }
1562
1563 int journal_file_next_entry(
1564                 JournalFile *f,
1565                 Object *o, uint64_t p,
1566                 direction_t direction,
1567                 Object **ret, uint64_t *offset) {
1568
1569         uint64_t i, n;
1570         int r;
1571
1572         assert(f);
1573         assert(p > 0 || !o);
1574
1575         n = le64toh(f->header->n_entries);
1576         if (n <= 0)
1577                 return 0;
1578
1579         if (!o)
1580                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1581         else {
1582                 if (o->object.type != OBJECT_ENTRY)
1583                         return -EINVAL;
1584
1585                 r = generic_array_bisect(f,
1586                                          le64toh(f->header->entry_array_offset),
1587                                          le64toh(f->header->n_entries),
1588                                          p,
1589                                          test_object_offset,
1590                                          DIRECTION_DOWN,
1591                                          NULL, NULL,
1592                                          &i);
1593                 if (r <= 0)
1594                         return r;
1595
1596                 if (direction == DIRECTION_DOWN) {
1597                         if (i >= n - 1)
1598                                 return 0;
1599
1600                         i++;
1601                 } else {
1602                         if (i <= 0)
1603                                 return 0;
1604
1605                         i--;
1606                 }
1607         }
1608
1609         /* And jump to it */
1610         return generic_array_get(f,
1611                                  le64toh(f->header->entry_array_offset),
1612                                  i,
1613                                  ret, offset);
1614 }
1615
1616 int journal_file_skip_entry(
1617                 JournalFile *f,
1618                 Object *o, uint64_t p,
1619                 int64_t skip,
1620                 Object **ret, uint64_t *offset) {
1621
1622         uint64_t i, n;
1623         int r;
1624
1625         assert(f);
1626         assert(o);
1627         assert(p > 0);
1628
1629         if (o->object.type != OBJECT_ENTRY)
1630                 return -EINVAL;
1631
1632         r = generic_array_bisect(f,
1633                                  le64toh(f->header->entry_array_offset),
1634                                  le64toh(f->header->n_entries),
1635                                  p,
1636                                  test_object_offset,
1637                                  DIRECTION_DOWN,
1638                                  NULL, NULL,
1639                                  &i);
1640         if (r <= 0)
1641                 return r;
1642
1643         /* Calculate new index */
1644         if (skip < 0) {
1645                 if ((uint64_t) -skip >= i)
1646                         i = 0;
1647                 else
1648                         i = i - (uint64_t) -skip;
1649         } else
1650                 i  += (uint64_t) skip;
1651
1652         n = le64toh(f->header->n_entries);
1653         if (n <= 0)
1654                 return -EBADMSG;
1655
1656         if (i >= n)
1657                 i = n-1;
1658
1659         return generic_array_get(f,
1660                                  le64toh(f->header->entry_array_offset),
1661                                  i,
1662                                  ret, offset);
1663 }
1664
1665 int journal_file_next_entry_for_data(
1666                 JournalFile *f,
1667                 Object *o, uint64_t p,
1668                 uint64_t data_offset,
1669                 direction_t direction,
1670                 Object **ret, uint64_t *offset) {
1671
1672         uint64_t n, i;
1673         int r;
1674         Object *d;
1675
1676         assert(f);
1677         assert(p > 0 || !o);
1678
1679         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1680         if (r < 0)
1681                 return r;
1682
1683         n = le64toh(d->data.n_entries);
1684         if (n <= 0)
1685                 return n;
1686
1687         if (!o)
1688                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1689         else {
1690                 if (o->object.type != OBJECT_ENTRY)
1691                         return -EINVAL;
1692
1693                 r = generic_array_bisect_plus_one(f,
1694                                                   le64toh(d->data.entry_offset),
1695                                                   le64toh(d->data.entry_array_offset),
1696                                                   le64toh(d->data.n_entries),
1697                                                   p,
1698                                                   test_object_offset,
1699                                                   DIRECTION_DOWN,
1700                                                   NULL, NULL,
1701                                                   &i);
1702
1703                 if (r <= 0)
1704                         return r;
1705
1706                 if (direction == DIRECTION_DOWN) {
1707                         if (i >= n - 1)
1708                                 return 0;
1709
1710                         i++;
1711                 } else {
1712                         if (i <= 0)
1713                                 return 0;
1714
1715                         i--;
1716                 }
1717
1718         }
1719
1720         return generic_array_get_plus_one(f,
1721                                           le64toh(d->data.entry_offset),
1722                                           le64toh(d->data.entry_array_offset),
1723                                           i,
1724                                           ret, offset);
1725 }
1726
1727 int journal_file_move_to_entry_by_offset_for_data(
1728                 JournalFile *f,
1729                 uint64_t data_offset,
1730                 uint64_t p,
1731                 direction_t direction,
1732                 Object **ret, uint64_t *offset) {
1733
1734         int r;
1735         Object *d;
1736
1737         assert(f);
1738
1739         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1740         if (r < 0)
1741                 return r;
1742
1743         return generic_array_bisect_plus_one(f,
1744                                              le64toh(d->data.entry_offset),
1745                                              le64toh(d->data.entry_array_offset),
1746                                              le64toh(d->data.n_entries),
1747                                              p,
1748                                              test_object_offset,
1749                                              direction,
1750                                              ret, offset, NULL);
1751 }
1752
1753 int journal_file_move_to_entry_by_monotonic_for_data(
1754                 JournalFile *f,
1755                 uint64_t data_offset,
1756                 sd_id128_t boot_id,
1757                 uint64_t monotonic,
1758                 direction_t direction,
1759                 Object **ret, uint64_t *offset) {
1760
1761         char t[9+32+1] = "_BOOT_ID=";
1762         Object *o, *d;
1763         int r;
1764         uint64_t b, z;
1765
1766         assert(f);
1767
1768         /* First, seek by time */
1769         sd_id128_to_string(boot_id, t + 9);
1770         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1771         if (r < 0)
1772                 return r;
1773         if (r == 0)
1774                 return -ENOENT;
1775
1776         r = generic_array_bisect_plus_one(f,
1777                                           le64toh(o->data.entry_offset),
1778                                           le64toh(o->data.entry_array_offset),
1779                                           le64toh(o->data.n_entries),
1780                                           monotonic,
1781                                           test_object_monotonic,
1782                                           direction,
1783                                           NULL, &z, NULL);
1784         if (r <= 0)
1785                 return r;
1786
1787         /* And now, continue seeking until we find an entry that
1788          * exists in both bisection arrays */
1789
1790         for (;;) {
1791                 Object *qo;
1792                 uint64_t p, q;
1793
1794                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1795                 if (r < 0)
1796                         return r;
1797
1798                 r = generic_array_bisect_plus_one(f,
1799                                                   le64toh(d->data.entry_offset),
1800                                                   le64toh(d->data.entry_array_offset),
1801                                                   le64toh(d->data.n_entries),
1802                                                   z,
1803                                                   test_object_offset,
1804                                                   direction,
1805                                                   NULL, &p, NULL);
1806                 if (r <= 0)
1807                         return r;
1808
1809                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1810                 if (r < 0)
1811                         return r;
1812
1813                 r = generic_array_bisect_plus_one(f,
1814                                                   le64toh(o->data.entry_offset),
1815                                                   le64toh(o->data.entry_array_offset),
1816                                                   le64toh(o->data.n_entries),
1817                                                   p,
1818                                                   test_object_offset,
1819                                                   direction,
1820                                                   &qo, &q, NULL);
1821
1822                 if (r <= 0)
1823                         return r;
1824
1825                 if (p == q) {
1826                         if (ret)
1827                                 *ret = qo;
1828                         if (offset)
1829                                 *offset = q;
1830
1831                         return 1;
1832                 }
1833
1834                 z = q;
1835         }
1836
1837         return 0;
1838 }
1839
1840 int journal_file_move_to_entry_by_seqnum_for_data(
1841                 JournalFile *f,
1842                 uint64_t data_offset,
1843                 uint64_t seqnum,
1844                 direction_t direction,
1845                 Object **ret, uint64_t *offset) {
1846
1847         Object *d;
1848         int r;
1849
1850         assert(f);
1851
1852         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1853         if (r < 0)
1854                 return r;
1855
1856         return generic_array_bisect_plus_one(f,
1857                                              le64toh(d->data.entry_offset),
1858                                              le64toh(d->data.entry_array_offset),
1859                                              le64toh(d->data.n_entries),
1860                                              seqnum,
1861                                              test_object_seqnum,
1862                                              direction,
1863                                              ret, offset, NULL);
1864 }
1865
1866 int journal_file_move_to_entry_by_realtime_for_data(
1867                 JournalFile *f,
1868                 uint64_t data_offset,
1869                 uint64_t realtime,
1870                 direction_t direction,
1871                 Object **ret, uint64_t *offset) {
1872
1873         Object *d;
1874         int r;
1875
1876         assert(f);
1877
1878         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1879         if (r < 0)
1880                 return r;
1881
1882         return generic_array_bisect_plus_one(f,
1883                                              le64toh(d->data.entry_offset),
1884                                              le64toh(d->data.entry_array_offset),
1885                                              le64toh(d->data.n_entries),
1886                                              realtime,
1887                                              test_object_realtime,
1888                                              direction,
1889                                              ret, offset, NULL);
1890 }
1891
1892 static void *fsprg_state(JournalFile *f) {
1893         uint64_t a, b;
1894         assert(f);
1895
1896         if (!f->authenticate)
1897                 return NULL;
1898
1899         a = le64toh(f->fsprg_header->header_size);
1900         b = le64toh(f->fsprg_header->state_size);
1901
1902         if (a + b > f->fsprg_size)
1903                 return NULL;
1904
1905         return (uint8_t*) f->fsprg_header + a;
1906 }
1907
1908 static uint64_t journal_file_tag_seqnum(JournalFile *f) {
1909         uint64_t r;
1910
1911         assert(f);
1912
1913         r = le64toh(f->header->n_tags) + 1;
1914         f->header->n_tags = htole64(r);
1915
1916         return r;
1917 }
1918
1919 int journal_file_append_tag(JournalFile *f) {
1920         Object *o;
1921         uint64_t p;
1922         int r;
1923
1924         assert(f);
1925
1926         if (!f->authenticate)
1927                 return 0;
1928
1929         if (!f->hmac_running)
1930                 return 0;
1931
1932         log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
1933
1934         assert(f->hmac);
1935
1936         r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
1937         if (r < 0)
1938                 return r;
1939
1940         o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
1941
1942         /* Add the tag object itself, so that we can protect its
1943          * header. This will exclude the actual hash value in it */
1944         r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
1945         if (r < 0)
1946                 return r;
1947
1948         /* Get the HMAC tag and store it in the object */
1949         memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
1950         f->hmac_running = false;
1951
1952         return 0;
1953 }
1954
1955 static int journal_file_hmac_start(JournalFile *f) {
1956         uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
1957
1958         assert(f);
1959
1960         if (!f->authenticate)
1961                 return 0;
1962
1963         if (f->hmac_running)
1964                 return 0;
1965
1966         /* Prepare HMAC for next cycle */
1967         gcry_md_reset(f->hmac);
1968         FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
1969         gcry_md_setkey(f->hmac, key, sizeof(key));
1970
1971         f->hmac_running = true;
1972
1973         return 0;
1974 }
1975
1976 static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
1977         uint64_t t;
1978
1979         assert(f);
1980         assert(epoch);
1981         assert(f->authenticate);
1982
1983         if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
1984             le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
1985                 return -ENOTSUP;
1986
1987         if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
1988                 return -ESTALE;
1989
1990         t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
1991         t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
1992
1993         *epoch = t;
1994         return 0;
1995 }
1996
1997 static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
1998         uint64_t goal, epoch;
1999         int r;
2000         assert(f);
2001
2002         if (!f->authenticate)
2003                 return 0;
2004
2005         r = journal_file_get_epoch(f, realtime, &goal);
2006         if (r < 0)
2007                 return r;
2008
2009         epoch = FSPRG_GetEpoch(fsprg_state(f));
2010         if (epoch > goal)
2011                 return -ESTALE;
2012
2013         return epoch != goal;
2014 }
2015
2016 static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
2017         uint64_t goal, epoch;
2018         int r;
2019
2020         assert(f);
2021
2022         if (!f->authenticate)
2023                 return 0;
2024
2025         r = journal_file_get_epoch(f, realtime, &goal);
2026         if (r < 0)
2027                 return r;
2028
2029         epoch = FSPRG_GetEpoch(fsprg_state(f));
2030         if (epoch < goal)
2031                 log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
2032
2033         for (;;) {
2034                 if (epoch > goal)
2035                         return -ESTALE;
2036                 if (epoch == goal)
2037                         return 0;
2038
2039                 FSPRG_Evolve(fsprg_state(f));
2040                 epoch = FSPRG_GetEpoch(fsprg_state(f));
2041         }
2042 }
2043
2044 static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
2045         int r;
2046
2047         assert(f);
2048
2049         if (!f->authenticate)
2050                 return 0;
2051
2052         r = journal_file_need_evolve(f, realtime);
2053         if (r <= 0)
2054                 return 0;
2055
2056         r = journal_file_append_tag(f);
2057         if (r < 0)
2058                 return r;
2059
2060         r = journal_file_evolve(f, realtime);
2061         if (r < 0)
2062                 return r;
2063
2064         r = journal_file_hmac_start(f);
2065         if (r < 0)
2066                 return r;
2067
2068         return 0;
2069 }
2070
2071 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
2072         int r;
2073         Object *o;
2074
2075         assert(f);
2076
2077         if (!f->authenticate)
2078                 return 0;
2079
2080         r = journal_file_hmac_start(f);
2081         if (r < 0)
2082                 return r;
2083
2084         r = journal_file_move_to_object(f, type, p, &o);
2085         if (r < 0)
2086                 return r;
2087
2088         gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
2089
2090         switch (o->object.type) {
2091
2092         case OBJECT_DATA:
2093                 /* All but: hash and payload are mutable */
2094                 gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
2095                 gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
2096                 break;
2097
2098         case OBJECT_ENTRY:
2099                 /* All */
2100                 gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
2101                 break;
2102
2103         case OBJECT_FIELD_HASH_TABLE:
2104         case OBJECT_DATA_HASH_TABLE:
2105         case OBJECT_ENTRY_ARRAY:
2106                 /* Nothing: everything is mutable */
2107                 break;
2108
2109         case OBJECT_TAG:
2110                 /* All but the tag itself */
2111                 gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
2112                 break;
2113         default:
2114                 return -EINVAL;
2115         }
2116
2117         return 0;
2118 }
2119
2120 static int journal_file_hmac_put_header(JournalFile *f) {
2121         int r;
2122
2123         assert(f);
2124
2125         if (!f->authenticate)
2126                 return 0;
2127
2128         r = journal_file_hmac_start(f);
2129         if (r < 0)
2130                 return r;
2131
2132         /* All but state+reserved, boot_id, arena_size,
2133          * tail_object_offset, n_objects, n_entries, tail_seqnum,
2134          * head_entry_realtime, tail_entry_realtime,
2135          * tail_entry_monotonic, n_data, n_fields, header_tag */
2136
2137         gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
2138         gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
2139         gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
2140         gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
2141         gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
2142
2143         return 0;
2144 }
2145
2146 static int journal_file_load_fsprg(JournalFile *f) {
2147         int r, fd = -1;
2148         char *p = NULL;
2149         struct stat st;
2150         FSPRGHeader *m = NULL;
2151         sd_id128_t machine;
2152
2153         assert(f);
2154
2155         if (!f->authenticate)
2156                 return 0;
2157
2158         r = sd_id128_get_machine(&machine);
2159         if (r < 0)
2160                 return r;
2161
2162         if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
2163                      SD_ID128_FORMAT_VAL(machine)) < 0)
2164                 return -ENOMEM;
2165
2166         fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
2167         if (fd < 0) {
2168                 log_error("Failed to open %s: %m", p);
2169                 r = -errno;
2170                 goto finish;
2171         }
2172
2173         if (fstat(fd, &st) < 0) {
2174                 r = -errno;
2175                 goto finish;
2176         }
2177
2178         if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
2179                 r = -ENODATA;
2180                 goto finish;
2181         }
2182
2183         m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
2184         if (m == MAP_FAILED) {
2185                 m = NULL;
2186                 r = -errno;
2187                 goto finish;
2188         }
2189
2190         if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
2191                 r = -EBADMSG;
2192                 goto finish;
2193         }
2194
2195         if (m->incompatible_flags != 0) {
2196                 r = -EPROTONOSUPPORT;
2197                 goto finish;
2198         }
2199
2200         if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
2201                 r = -EBADMSG;
2202                 goto finish;
2203         }
2204
2205         if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
2206                 r = -EBADMSG;
2207                 goto finish;
2208         }
2209
2210         f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
2211         if ((uint64_t) st.st_size < f->fsprg_size) {
2212                 r = -ENODATA;
2213                 goto finish;
2214         }
2215
2216         if (!sd_id128_equal(machine, m->machine_id)) {
2217                 r = -EHOSTDOWN;
2218                 goto finish;
2219         }
2220
2221         if (le64toh(m->fsprg_start_usec) <= 0 ||
2222             le64toh(m->fsprg_interval_usec) <= 0) {
2223                 r = -EBADMSG;
2224                 goto finish;
2225         }
2226
2227         f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2228         if (f->fsprg_header == MAP_FAILED) {
2229                 f->fsprg_header = NULL;
2230                 r = -errno;
2231                 goto finish;
2232         }
2233
2234         r = 0;
2235
2236 finish:
2237         if (m)
2238                 munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
2239
2240         if (fd >= 0)
2241                 close_nointr_nofail(fd);
2242
2243         free(p);
2244         return r;
2245 }
2246
2247 static int journal_file_setup_hmac(JournalFile *f) {
2248         gcry_error_t e;
2249
2250         if (!f->authenticate)
2251                 return 0;
2252
2253         e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
2254         if (e != 0)
2255                 return -ENOTSUP;
2256
2257         return 0;
2258 }
2259
2260 static int journal_file_append_first_tag(JournalFile *f) {
2261         int r;
2262         uint64_t p;
2263
2264         if (!f->authenticate)
2265                 return 0;
2266
2267         log_debug("Calculating first tag...");
2268
2269         r = journal_file_hmac_put_header(f);
2270         if (r < 0)
2271                 return r;
2272
2273         p = le64toh(f->header->field_hash_table_offset);
2274         if (p < offsetof(Object, hash_table.items))
2275                 return -EINVAL;
2276         p -= offsetof(Object, hash_table.items);
2277
2278         r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
2279         if (r < 0)
2280                 return r;
2281
2282         p = le64toh(f->header->data_hash_table_offset);
2283         if (p < offsetof(Object, hash_table.items))
2284                 return -EINVAL;
2285         p -= offsetof(Object, hash_table.items);
2286
2287         r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
2288         if (r < 0)
2289                 return r;
2290
2291         r = journal_file_append_tag(f);
2292         if (r < 0)
2293                 return r;
2294
2295         return 0;
2296 }
2297
2298 void journal_file_dump(JournalFile *f) {
2299         Object *o;
2300         int r;
2301         uint64_t p;
2302
2303         assert(f);
2304
2305         journal_file_print_header(f);
2306
2307         p = le64toh(f->header->header_size);
2308         while (p != 0) {
2309                 r = journal_file_move_to_object(f, -1, p, &o);
2310                 if (r < 0)
2311                         goto fail;
2312
2313                 switch (o->object.type) {
2314
2315                 case OBJECT_UNUSED:
2316                         printf("Type: OBJECT_UNUSED\n");
2317                         break;
2318
2319                 case OBJECT_DATA:
2320                         printf("Type: OBJECT_DATA\n");
2321                         break;
2322
2323                 case OBJECT_ENTRY:
2324                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
2325                                (unsigned long long) le64toh(o->entry.seqnum),
2326                                (unsigned long long) le64toh(o->entry.monotonic),
2327                                (unsigned long long) le64toh(o->entry.realtime));
2328                         break;
2329
2330                 case OBJECT_FIELD_HASH_TABLE:
2331                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
2332                         break;
2333
2334                 case OBJECT_DATA_HASH_TABLE:
2335                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
2336                         break;
2337
2338                 case OBJECT_ENTRY_ARRAY:
2339                         printf("Type: OBJECT_ENTRY_ARRAY\n");
2340                         break;
2341
2342                 case OBJECT_TAG:
2343                         printf("Type: OBJECT_TAG %llu\n",
2344                                (unsigned long long) le64toh(o->tag.seqnum));
2345                         break;
2346                 }
2347
2348                 if (o->object.flags & OBJECT_COMPRESSED)
2349                         printf("Flags: COMPRESSED\n");
2350
2351                 if (p == le64toh(f->header->tail_object_offset))
2352                         p = 0;
2353                 else
2354                         p = p + ALIGN64(le64toh(o->object.size));
2355         }
2356
2357         return;
2358 fail:
2359         log_error("File corrupt");
2360 }
2361
2362 void journal_file_print_header(JournalFile *f) {
2363         char a[33], b[33], c[33];
2364         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
2365
2366         assert(f);
2367
2368         printf("File Path: %s\n"
2369                "File ID: %s\n"
2370                "Machine ID: %s\n"
2371                "Boot ID: %s\n"
2372                "Sequential Number ID: %s\n"
2373                "State: %s\n"
2374                "Compatible Flags:%s%s\n"
2375                "Incompatible Flags:%s%s\n"
2376                "Header size: %llu\n"
2377                "Arena size: %llu\n"
2378                "Data Hash Table Size: %llu\n"
2379                "Field Hash Table Size: %llu\n"
2380                "Objects: %llu\n"
2381                "Entry Objects: %llu\n"
2382                "Rotate Suggested: %s\n"
2383                "Head Sequential Number: %llu\n"
2384                "Tail Sequential Number: %llu\n"
2385                "Head Realtime Timestamp: %s\n"
2386                "Tail Realtime Timestamp: %s\n",
2387                f->path,
2388                sd_id128_to_string(f->header->file_id, a),
2389                sd_id128_to_string(f->header->machine_id, b),
2390                sd_id128_to_string(f->header->boot_id, c),
2391                sd_id128_to_string(f->header->seqnum_id, c),
2392                f->header->state == STATE_OFFLINE ? "offline" :
2393                f->header->state == STATE_ONLINE ? "online" :
2394                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
2395                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
2396                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
2397                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
2398                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
2399                (unsigned long long) le64toh(f->header->header_size),
2400                (unsigned long long) le64toh(f->header->arena_size),
2401                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
2402                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
2403                (unsigned long long) le64toh(f->header->n_objects),
2404                (unsigned long long) le64toh(f->header->n_entries),
2405                yes_no(journal_file_rotate_suggested(f)),
2406                (unsigned long long) le64toh(f->header->head_seqnum),
2407                (unsigned long long) le64toh(f->header->tail_seqnum),
2408                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
2409                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
2410
2411         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2412                 printf("Data Objects: %llu\n"
2413                        "Data Hash Table Fill: %.1f%%\n",
2414                        (unsigned long long) le64toh(f->header->n_data),
2415                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
2416
2417         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2418                 printf("Field Objects: %llu\n"
2419                        "Field Hash Table Fill: %.1f%%\n",
2420                        (unsigned long long) le64toh(f->header->n_fields),
2421                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
2422 }
2423
2424 int journal_file_open(
2425                 const char *fname,
2426                 int flags,
2427                 mode_t mode,
2428                 bool compress,
2429                 bool authenticate,
2430                 JournalMetrics *metrics,
2431                 JournalFile *template,
2432                 JournalFile **ret) {
2433
2434         JournalFile *f;
2435         int r;
2436         bool newly_created = false;
2437
2438         assert(fname);
2439
2440         if ((flags & O_ACCMODE) != O_RDONLY &&
2441             (flags & O_ACCMODE) != O_RDWR)
2442                 return -EINVAL;
2443
2444         if (!endswith(fname, ".journal"))
2445                 return -EINVAL;
2446
2447         f = new0(JournalFile, 1);
2448         if (!f)
2449                 return -ENOMEM;
2450
2451         f->fd = -1;
2452         f->mode = mode;
2453
2454         f->flags = flags;
2455         f->prot = prot_from_flags(flags);
2456         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2457         f->compress = compress;
2458         f->authenticate = authenticate;
2459
2460         f->path = strdup(fname);
2461         if (!f->path) {
2462                 r = -ENOMEM;
2463                 goto fail;
2464         }
2465
2466         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2467         if (f->fd < 0) {
2468                 r = -errno;
2469                 goto fail;
2470         }
2471
2472         if (fstat(f->fd, &f->last_stat) < 0) {
2473                 r = -errno;
2474                 goto fail;
2475         }
2476
2477         if (f->last_stat.st_size == 0 && f->writable) {
2478                 newly_created = true;
2479
2480                 /* Try to load the FSPRG state, and if we can't, then
2481                  * just don't do authentication */
2482                 r = journal_file_load_fsprg(f);
2483                 if (r < 0)
2484                         f->authenticate = false;
2485
2486                 r = journal_file_init_header(f, template);
2487                 if (r < 0)
2488                         goto fail;
2489
2490                 if (fstat(f->fd, &f->last_stat) < 0) {
2491                         r = -errno;
2492                         goto fail;
2493                 }
2494         }
2495
2496         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2497                 r = -EIO;
2498                 goto fail;
2499         }
2500
2501         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2502         if (f->header == MAP_FAILED) {
2503                 f->header = NULL;
2504                 r = -errno;
2505                 goto fail;
2506         }
2507
2508         if (!newly_created) {
2509                 r = journal_file_verify_header(f);
2510                 if (r < 0)
2511                         goto fail;
2512         }
2513
2514         if (!newly_created && f->writable) {
2515                 r = journal_file_load_fsprg(f);
2516                 if (r < 0)
2517                         goto fail;
2518         }
2519
2520         if (f->writable) {
2521                 if (metrics) {
2522                         journal_default_metrics(metrics, f->fd);
2523                         f->metrics = *metrics;
2524                 } else if (template)
2525                         f->metrics = template->metrics;
2526
2527                 r = journal_file_refresh_header(f);
2528                 if (r < 0)
2529                         goto fail;
2530
2531                 r = journal_file_setup_hmac(f);
2532                 if (r < 0)
2533                         goto fail;
2534         }
2535
2536         if (newly_created) {
2537                 r = journal_file_setup_field_hash_table(f);
2538                 if (r < 0)
2539                         goto fail;
2540
2541                 r = journal_file_setup_data_hash_table(f);
2542                 if (r < 0)
2543                         goto fail;
2544
2545                 r = journal_file_append_first_tag(f);
2546                 if (r < 0)
2547                         goto fail;
2548         }
2549
2550         r = journal_file_map_field_hash_table(f);
2551         if (r < 0)
2552                 goto fail;
2553
2554         r = journal_file_map_data_hash_table(f);
2555         if (r < 0)
2556                 goto fail;
2557
2558         if (ret)
2559                 *ret = f;
2560
2561         return 0;
2562
2563 fail:
2564         journal_file_close(f);
2565
2566         return r;
2567 }
2568
2569 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2570         char *p;
2571         size_t l;
2572         JournalFile *old_file, *new_file = NULL;
2573         int r;
2574
2575         assert(f);
2576         assert(*f);
2577
2578         old_file = *f;
2579
2580         if (!old_file->writable)
2581                 return -EINVAL;
2582
2583         if (!endswith(old_file->path, ".journal"))
2584                 return -EINVAL;
2585
2586         l = strlen(old_file->path);
2587
2588         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2589         if (!p)
2590                 return -ENOMEM;
2591
2592         memcpy(p, old_file->path, l - 8);
2593         p[l-8] = '@';
2594         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2595         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2596                  "-%016llx-%016llx.journal",
2597                  (unsigned long long) le64toh((*f)->header->tail_seqnum),
2598                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2599
2600         r = rename(old_file->path, p);
2601         free(p);
2602
2603         if (r < 0)
2604                 return -errno;
2605
2606         old_file->header->state = STATE_ARCHIVED;
2607
2608         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
2609         journal_file_close(old_file);
2610
2611         *f = new_file;
2612         return r;
2613 }
2614
2615 int journal_file_open_reliably(
2616                 const char *fname,
2617                 int flags,
2618                 mode_t mode,
2619                 bool compress,
2620                 bool authenticate,
2621                 JournalMetrics *metrics,
2622                 JournalFile *template,
2623                 JournalFile **ret) {
2624
2625         int r;
2626         size_t l;
2627         char *p;
2628
2629         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2630         if (r != -EBADMSG && /* corrupted */
2631             r != -ENODATA && /* truncated */
2632             r != -EHOSTDOWN && /* other machine */
2633             r != -EPROTONOSUPPORT && /* incompatible feature */
2634             r != -EBUSY && /* unclean shutdown */
2635             r != -ESHUTDOWN /* already archived */)
2636                 return r;
2637
2638         if ((flags & O_ACCMODE) == O_RDONLY)
2639                 return r;
2640
2641         if (!(flags & O_CREAT))
2642                 return r;
2643
2644         if (!endswith(fname, ".journal"))
2645                 return r;
2646
2647         /* The file is corrupted. Rotate it away and try it again (but only once) */
2648
2649         l = strlen(fname);
2650         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2651                      (int) (l-8), fname,
2652                      (unsigned long long) now(CLOCK_REALTIME),
2653                      random_ull()) < 0)
2654                 return -ENOMEM;
2655
2656         r = rename(fname, p);
2657         free(p);
2658         if (r < 0)
2659                 return -errno;
2660
2661         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2662
2663         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
2664 }
2665
2666 struct vacuum_info {
2667         off_t usage;
2668         char *filename;
2669
2670         uint64_t realtime;
2671         sd_id128_t seqnum_id;
2672         uint64_t seqnum;
2673
2674         bool have_seqnum;
2675 };
2676
2677 static int vacuum_compare(const void *_a, const void *_b) {
2678         const struct vacuum_info *a, *b;
2679
2680         a = _a;
2681         b = _b;
2682
2683         if (a->have_seqnum && b->have_seqnum &&
2684             sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
2685                 if (a->seqnum < b->seqnum)
2686                         return -1;
2687                 else if (a->seqnum > b->seqnum)
2688                         return 1;
2689                 else
2690                         return 0;
2691         }
2692
2693         if (a->realtime < b->realtime)
2694                 return -1;
2695         else if (a->realtime > b->realtime)
2696                 return 1;
2697         else if (a->have_seqnum && b->have_seqnum)
2698                 return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
2699         else
2700                 return strcmp(a->filename, b->filename);
2701 }
2702
2703 int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
2704         DIR *d;
2705         int r = 0;
2706         struct vacuum_info *list = NULL;
2707         unsigned n_list = 0, n_allocated = 0, i;
2708         uint64_t sum = 0;
2709
2710         assert(directory);
2711
2712         if (max_use <= 0)
2713                 return 0;
2714
2715         d = opendir(directory);
2716         if (!d)
2717                 return -errno;
2718
2719         for (;;) {
2720                 int k;
2721                 struct dirent buf, *de;
2722                 size_t q;
2723                 struct stat st;
2724                 char *p;
2725                 unsigned long long seqnum = 0, realtime;
2726                 sd_id128_t seqnum_id;
2727                 bool have_seqnum;
2728
2729                 k = readdir_r(d, &buf, &de);
2730                 if (k != 0) {
2731                         r = -k;
2732                         goto finish;
2733                 }
2734
2735                 if (!de)
2736                         break;
2737
2738                 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
2739                         continue;
2740
2741                 if (!S_ISREG(st.st_mode))
2742                         continue;
2743
2744                 q = strlen(de->d_name);
2745
2746                 if (endswith(de->d_name, ".journal")) {
2747
2748                         /* Vacuum archived files */
2749
2750                         if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
2751                                 continue;
2752
2753                         if (de->d_name[q-8-16-1] != '-' ||
2754                             de->d_name[q-8-16-1-16-1] != '-' ||
2755                             de->d_name[q-8-16-1-16-1-32-1] != '@')
2756                                 continue;
2757
2758                         p = strdup(de->d_name);
2759                         if (!p) {
2760                                 r = -ENOMEM;
2761                                 goto finish;
2762                         }
2763
2764                         de->d_name[q-8-16-1-16-1] = 0;
2765                         if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
2766                                 free(p);
2767                                 continue;
2768                         }
2769
2770                         if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
2771                                 free(p);
2772                                 continue;
2773                         }
2774
2775                         have_seqnum = true;
2776
2777                 } else if (endswith(de->d_name, ".journal~")) {
2778                         unsigned long long tmp;
2779
2780                         /* Vacuum corrupted files */
2781
2782                         if (q < 1 + 16 + 1 + 16 + 8 + 1)
2783                                 continue;
2784
2785                         if (de->d_name[q-1-8-16-1] != '-' ||
2786                             de->d_name[q-1-8-16-1-16-1] != '@')
2787                                 continue;
2788
2789                         p = strdup(de->d_name);
2790                         if (!p) {
2791                                 r = -ENOMEM;
2792                                 goto finish;
2793                         }
2794
2795                         if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
2796                                 free(p);
2797                                 continue;
2798                         }
2799
2800                         have_seqnum = false;
2801                 } else
2802                         continue;
2803
2804                 if (n_list >= n_allocated) {
2805                         struct vacuum_info *j;
2806
2807                         n_allocated = MAX(n_allocated * 2U, 8U);
2808                         j = realloc(list, n_allocated * sizeof(struct vacuum_info));
2809                         if (!j) {
2810                                 free(p);
2811                                 r = -ENOMEM;
2812                                 goto finish;
2813                         }
2814
2815                         list = j;
2816                 }
2817
2818                 list[n_list].filename = p;
2819                 list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
2820                 list[n_list].seqnum = seqnum;
2821                 list[n_list].realtime = realtime;
2822                 list[n_list].seqnum_id = seqnum_id;
2823                 list[n_list].have_seqnum = have_seqnum;
2824
2825                 sum += list[n_list].usage;
2826
2827                 n_list ++;
2828         }
2829
2830         if (n_list > 0)
2831                 qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
2832
2833         for(i = 0; i < n_list; i++) {
2834                 struct statvfs ss;
2835
2836                 if (fstatvfs(dirfd(d), &ss) < 0) {
2837                         r = -errno;
2838                         goto finish;
2839                 }
2840
2841                 if (sum <= max_use &&
2842                     (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
2843                         break;
2844
2845                 if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
2846                         log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
2847                         sum -= list[i].usage;
2848                 } else if (errno != ENOENT)
2849                         log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
2850         }
2851
2852 finish:
2853         for (i = 0; i < n_list; i++)
2854                 free(list[i].filename);
2855
2856         free(list);
2857
2858         if (d)
2859                 closedir(d);
2860
2861         return r;
2862 }
2863
2864 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2865         uint64_t i, n;
2866         uint64_t q, xor_hash = 0;
2867         int r;
2868         EntryItem *items;
2869         dual_timestamp ts;
2870
2871         assert(from);
2872         assert(to);
2873         assert(o);
2874         assert(p);
2875
2876         if (!to->writable)
2877                 return -EPERM;
2878
2879         ts.monotonic = le64toh(o->entry.monotonic);
2880         ts.realtime = le64toh(o->entry.realtime);
2881
2882         if (to->tail_entry_monotonic_valid &&
2883             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2884                 return -EINVAL;
2885
2886         n = journal_file_entry_n_items(o);
2887         items = alloca(sizeof(EntryItem) * n);
2888
2889         for (i = 0; i < n; i++) {
2890                 uint64_t l, h;
2891                 le64_t le_hash;
2892                 size_t t;
2893                 void *data;
2894                 Object *u;
2895
2896                 q = le64toh(o->entry.items[i].object_offset);
2897                 le_hash = o->entry.items[i].hash;
2898
2899                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2900                 if (r < 0)
2901                         return r;
2902
2903                 if (le_hash != o->data.hash)
2904                         return -EBADMSG;
2905
2906                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2907                 t = (size_t) l;
2908
2909                 /* We hit the limit on 32bit machines */
2910                 if ((uint64_t) t != l)
2911                         return -E2BIG;
2912
2913                 if (o->object.flags & OBJECT_COMPRESSED) {
2914 #ifdef HAVE_XZ
2915                         uint64_t rsize;
2916
2917                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2918                                 return -EBADMSG;
2919
2920                         data = from->compress_buffer;
2921                         l = rsize;
2922 #else
2923                         return -EPROTONOSUPPORT;
2924 #endif
2925                 } else
2926                         data = o->data.payload;
2927
2928                 r = journal_file_append_data(to, data, l, &u, &h);
2929                 if (r < 0)
2930                         return r;
2931
2932                 xor_hash ^= le64toh(u->data.hash);
2933                 items[i].object_offset = htole64(h);
2934                 items[i].hash = u->data.hash;
2935
2936                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2937                 if (r < 0)
2938                         return r;
2939         }
2940
2941         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2942 }
2943
2944 void journal_default_metrics(JournalMetrics *m, int fd) {
2945         uint64_t fs_size = 0;
2946         struct statvfs ss;
2947         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2948
2949         assert(m);
2950         assert(fd >= 0);
2951
2952         if (fstatvfs(fd, &ss) >= 0)
2953                 fs_size = ss.f_frsize * ss.f_blocks;
2954
2955         if (m->max_use == (uint64_t) -1) {
2956
2957                 if (fs_size > 0) {
2958                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2959
2960                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2961                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2962
2963                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2964                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2965                 } else
2966                         m->max_use = DEFAULT_MAX_USE_LOWER;
2967         } else {
2968                 m->max_use = PAGE_ALIGN(m->max_use);
2969
2970                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2971                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2972         }
2973
2974         if (m->max_size == (uint64_t) -1) {
2975                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2976
2977                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2978                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2979         } else
2980                 m->max_size = PAGE_ALIGN(m->max_size);
2981
2982         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2983                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2984
2985         if (m->max_size*2 > m->max_use)
2986                 m->max_use = m->max_size*2;
2987
2988         if (m->min_size == (uint64_t) -1)
2989                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2990         else {
2991                 m->min_size = PAGE_ALIGN(m->min_size);
2992
2993                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2994                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2995
2996                 if (m->min_size > m->max_size)
2997                         m->max_size = m->min_size;
2998         }
2999
3000         if (m->keep_free == (uint64_t) -1) {
3001
3002                 if (fs_size > 0) {
3003                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
3004
3005                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
3006                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
3007
3008                 } else
3009                         m->keep_free = DEFAULT_KEEP_FREE;
3010         }
3011
3012         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
3013                  format_bytes(a, sizeof(a), m->max_use),
3014                  format_bytes(b, sizeof(b), m->max_size),
3015                  format_bytes(c, sizeof(c), m->min_size),
3016                  format_bytes(d, sizeof(d), m->keep_free));
3017 }
3018
3019 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
3020         assert(f);
3021         assert(from || to);
3022
3023         if (from) {
3024                 if (f->header->head_entry_realtime == 0)
3025                         return -ENOENT;
3026
3027                 *from = le64toh(f->header->head_entry_realtime);
3028         }
3029
3030         if (to) {
3031                 if (f->header->tail_entry_realtime == 0)
3032                         return -ENOENT;
3033
3034                 *to = le64toh(f->header->tail_entry_realtime);
3035         }
3036
3037         return 1;
3038 }
3039
3040 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
3041         char t[9+32+1] = "_BOOT_ID=";
3042         Object *o;
3043         uint64_t p;
3044         int r;
3045
3046         assert(f);
3047         assert(from || to);
3048
3049         sd_id128_to_string(boot_id, t + 9);
3050
3051         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
3052         if (r <= 0)
3053                 return r;
3054
3055         if (le64toh(o->data.n_entries) <= 0)
3056                 return 0;
3057
3058         if (from) {
3059                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
3060                 if (r < 0)
3061                         return r;
3062
3063                 *from = le64toh(o->entry.monotonic);
3064         }
3065
3066         if (to) {
3067                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
3068                 if (r < 0)
3069                         return r;
3070
3071                 r = generic_array_get_plus_one(f,
3072                                                le64toh(o->data.entry_offset),
3073                                                le64toh(o->data.entry_array_offset),
3074                                                le64toh(o->data.n_entries)-1,
3075                                                &o, NULL);
3076                 if (r <= 0)
3077                         return r;
3078
3079                 *to = le64toh(o->entry.monotonic);
3080         }
3081
3082         return 1;
3083 }
3084
3085 bool journal_file_rotate_suggested(JournalFile *f) {
3086         assert(f);
3087
3088         /* If we gained new header fields we gained new features,
3089          * hence suggest a rotation */
3090         if (le64toh(f->header->header_size) < sizeof(Header)) {
3091                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
3092                 return true;
3093         }
3094
3095         /* Let's check if the hash tables grew over a certain fill
3096          * level (75%, borrowing this value from Java's hash table
3097          * implementation), and if so suggest a rotation. To calculate
3098          * the fill level we need the n_data field, which only exists
3099          * in newer versions. */
3100
3101         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
3102                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3103                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
3104                                   f->path,
3105                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
3106                                   (unsigned long long) le64toh(f->header->n_data),
3107                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
3108                                   (unsigned long long) (f->last_stat.st_size),
3109                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
3110                         return true;
3111                 }
3112
3113         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
3114                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
3115                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
3116                                   f->path,
3117                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
3118                                   (unsigned long long) le64toh(f->header->n_fields),
3119                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
3120                         return true;
3121                 }
3122
3123         return false;
3124 }