chiark / gitweb /
81952a08eeac72cf6d2e853f51d6bddd94b5d62d
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->authenticate)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fsprg_header)
100                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
101
102         if (f->hmac)
103                 gcry_md_close(f->hmac);
104 #endif
105
106         free(f);
107 }
108
109 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
110         Header h;
111         ssize_t k;
112         int r;
113
114         assert(f);
115
116         zero(h);
117         memcpy(h.signature, HEADER_SIGNATURE, 8);
118         h.header_size = htole64(ALIGN64(sizeof(h)));
119
120         h.incompatible_flags =
121                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
122
123         h.compatible_flags =
124                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
125
126         r = sd_id128_randomize(&h.file_id);
127         if (r < 0)
128                 return r;
129
130         if (template) {
131                 h.seqnum_id = template->header->seqnum_id;
132                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
133         } else
134                 h.seqnum_id = h.file_id;
135
136         k = pwrite(f->fd, &h, sizeof(h), 0);
137         if (k < 0)
138                 return -errno;
139
140         if (k != sizeof(h))
141                 return -EIO;
142
143         return 0;
144 }
145
146 static int journal_file_refresh_header(JournalFile *f) {
147         int r;
148         sd_id128_t boot_id;
149
150         assert(f);
151
152         r = sd_id128_get_machine(&f->header->machine_id);
153         if (r < 0)
154                 return r;
155
156         r = sd_id128_get_boot(&boot_id);
157         if (r < 0)
158                 return r;
159
160         if (sd_id128_equal(boot_id, f->header->boot_id))
161                 f->tail_entry_monotonic_valid = true;
162
163         f->header->boot_id = boot_id;
164
165         f->header->state = STATE_ONLINE;
166
167         /* Sync the online state to disk */
168         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
169         fdatasync(f->fd);
170
171         return 0;
172 }
173
174 static int journal_file_verify_header(JournalFile *f) {
175         assert(f);
176
177         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
178                 return -EBADMSG;
179
180         /* In both read and write mode we refuse to open files with
181          * incompatible flags we don't know */
182 #ifdef HAVE_XZ
183         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
184                 return -EPROTONOSUPPORT;
185 #else
186         if (f->header->incompatible_flags != 0)
187                 return -EPROTONOSUPPORT;
188 #endif
189
190         /* When open for writing we refuse to open files with
191          * compatible flags, too */
192         if (f->writable) {
193 #ifdef HAVE_GCRYPT
194                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
195                         return -EPROTONOSUPPORT;
196 #else
197                 if (f->header->compatible_flags != 0)
198                         return -EPROTONOSUPPORT;
199 #endif
200         }
201
202         /* The first addition was n_data, so check that we are at least this large */
203         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
204                 return -EBADMSG;
205
206         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
207                 !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
208                 return -EBADMSG;
209
210         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
211                 return -ENODATA;
212
213         if (f->writable) {
214                 uint8_t state;
215                 sd_id128_t machine_id;
216                 int r;
217
218                 r = sd_id128_get_machine(&machine_id);
219                 if (r < 0)
220                         return r;
221
222                 if (!sd_id128_equal(machine_id, f->header->machine_id))
223                         return -EHOSTDOWN;
224
225                 state = f->header->state;
226
227                 if (state == STATE_ONLINE) {
228                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229                         return -EBUSY;
230                 } else if (state == STATE_ARCHIVED)
231                         return -ESHUTDOWN;
232                 else if (state != STATE_OFFLINE) {
233                         log_debug("Journal file %s has unknown state %u.", f->path, state);
234                         return -EBUSY;
235                 }
236         }
237
238         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
241         return 0;
242 }
243
244 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
245         uint64_t old_size, new_size;
246         int r;
247
248         assert(f);
249
250         /* We assume that this file is not sparse, and we know that
251          * for sure, since we always call posix_fallocate()
252          * ourselves */
253
254         old_size =
255                 le64toh(f->header->header_size) +
256                 le64toh(f->header->arena_size);
257
258         new_size = PAGE_ALIGN(offset + size);
259         if (new_size < le64toh(f->header->header_size))
260                 new_size = le64toh(f->header->header_size);
261
262         if (new_size <= old_size)
263                 return 0;
264
265         if (f->metrics.max_size > 0 &&
266             new_size > f->metrics.max_size)
267                 return -E2BIG;
268
269         if (new_size > f->metrics.min_size &&
270             f->metrics.keep_free > 0) {
271                 struct statvfs svfs;
272
273                 if (fstatvfs(f->fd, &svfs) >= 0) {
274                         uint64_t available;
275
276                         available = svfs.f_bfree * svfs.f_bsize;
277
278                         if (available >= f->metrics.keep_free)
279                                 available -= f->metrics.keep_free;
280                         else
281                                 available = 0;
282
283                         if (new_size - old_size > available)
284                                 return -E2BIG;
285                 }
286         }
287
288         /* Note that the glibc fallocate() fallback is very
289            inefficient, hence we try to minimize the allocation area
290            as we can. */
291         r = posix_fallocate(f->fd, old_size, new_size - old_size);
292         if (r != 0)
293                 return -r;
294
295         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
296
297         if (fstat(f->fd, &f->last_stat) < 0)
298                 return -errno;
299
300         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
301
302         return 0;
303 }
304
305 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
306         assert(f);
307         assert(ret);
308
309         /* Avoid SIGBUS on invalid accesses */
310         if (offset + size > (uint64_t) f->last_stat.st_size) {
311                 /* Hmm, out of range? Let's refresh the fstat() data
312                  * first, before we trust that check. */
313
314                 if (fstat(f->fd, &f->last_stat) < 0 ||
315                     offset + size > (uint64_t) f->last_stat.st_size)
316                         return -EADDRNOTAVAIL;
317         }
318
319         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
320 }
321
322 static uint64_t minimum_header_size(Object *o) {
323
324         static uint64_t table[] = {
325                 [OBJECT_DATA] = sizeof(DataObject),
326                 [OBJECT_FIELD] = sizeof(FieldObject),
327                 [OBJECT_ENTRY] = sizeof(EntryObject),
328                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
329                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
330                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
331                 [OBJECT_TAG] = sizeof(TagObject),
332         };
333
334         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
335                 return sizeof(ObjectHeader);
336
337         return table[o->object.type];
338 }
339
340 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
341         int r;
342         void *t;
343         Object *o;
344         uint64_t s;
345         unsigned context;
346
347         assert(f);
348         assert(ret);
349
350         /* One context for each type, plus one catch-all for the rest */
351         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
352
353         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
354         if (r < 0)
355                 return r;
356
357         o = (Object*) t;
358         s = le64toh(o->object.size);
359
360         if (s < sizeof(ObjectHeader))
361                 return -EBADMSG;
362
363         if (o->object.type <= OBJECT_UNUSED)
364                 return -EBADMSG;
365
366         if (s < minimum_header_size(o))
367                 return -EBADMSG;
368
369         if (type >= 0 && o->object.type != type)
370                 return -EBADMSG;
371
372         if (s > sizeof(ObjectHeader)) {
373                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
374                 if (r < 0)
375                         return r;
376
377                 o = (Object*) t;
378         }
379
380         *ret = o;
381         return 0;
382 }
383
384 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
385         uint64_t r;
386
387         assert(f);
388
389         r = le64toh(f->header->tail_entry_seqnum) + 1;
390
391         if (seqnum) {
392                 /* If an external seqnum counter was passed, we update
393                  * both the local and the external one, and set it to
394                  * the maximum of both */
395
396                 if (*seqnum + 1 > r)
397                         r = *seqnum + 1;
398
399                 *seqnum = r;
400         }
401
402         f->header->tail_entry_seqnum = htole64(r);
403
404         if (f->header->head_entry_seqnum == 0)
405                 f->header->head_entry_seqnum = htole64(r);
406
407         return r;
408 }
409
410 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
411         int r;
412         uint64_t p;
413         Object *tail, *o;
414         void *t;
415
416         assert(f);
417         assert(type > 0 && type < _OBJECT_TYPE_MAX);
418         assert(size >= sizeof(ObjectHeader));
419         assert(offset);
420         assert(ret);
421
422         p = le64toh(f->header->tail_object_offset);
423         if (p == 0)
424                 p = le64toh(f->header->header_size);
425         else {
426                 r = journal_file_move_to_object(f, -1, p, &tail);
427                 if (r < 0)
428                         return r;
429
430                 p += ALIGN64(le64toh(tail->object.size));
431         }
432
433         r = journal_file_allocate(f, p, size);
434         if (r < 0)
435                 return r;
436
437         r = journal_file_move_to(f, type, p, size, &t);
438         if (r < 0)
439                 return r;
440
441         o = (Object*) t;
442
443         zero(o->object);
444         o->object.type = type;
445         o->object.size = htole64(size);
446
447         f->header->tail_object_offset = htole64(p);
448         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
449
450         *ret = o;
451         *offset = p;
452
453         return 0;
454 }
455
456 static int journal_file_setup_data_hash_table(JournalFile *f) {
457         uint64_t s, p;
458         Object *o;
459         int r;
460
461         assert(f);
462
463         /* We estimate that we need 1 hash table entry per 768 of
464            journal file and we want to make sure we never get beyond
465            75% fill level. Calculate the hash table size for the
466            maximum file size based on these metrics. */
467
468         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
469         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
470                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
471
472         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
473
474         r = journal_file_append_object(f,
475                                        OBJECT_DATA_HASH_TABLE,
476                                        offsetof(Object, hash_table.items) + s,
477                                        &o, &p);
478         if (r < 0)
479                 return r;
480
481         memset(o->hash_table.items, 0, s);
482
483         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
484         f->header->data_hash_table_size = htole64(s);
485
486         return 0;
487 }
488
489 static int journal_file_setup_field_hash_table(JournalFile *f) {
490         uint64_t s, p;
491         Object *o;
492         int r;
493
494         assert(f);
495
496         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
497         r = journal_file_append_object(f,
498                                        OBJECT_FIELD_HASH_TABLE,
499                                        offsetof(Object, hash_table.items) + s,
500                                        &o, &p);
501         if (r < 0)
502                 return r;
503
504         memset(o->hash_table.items, 0, s);
505
506         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
507         f->header->field_hash_table_size = htole64(s);
508
509         return 0;
510 }
511
512 static int journal_file_map_data_hash_table(JournalFile *f) {
513         uint64_t s, p;
514         void *t;
515         int r;
516
517         assert(f);
518
519         p = le64toh(f->header->data_hash_table_offset);
520         s = le64toh(f->header->data_hash_table_size);
521
522         r = journal_file_move_to(f,
523                                  OBJECT_DATA_HASH_TABLE,
524                                  p, s,
525                                  &t);
526         if (r < 0)
527                 return r;
528
529         f->data_hash_table = t;
530         return 0;
531 }
532
533 static int journal_file_map_field_hash_table(JournalFile *f) {
534         uint64_t s, p;
535         void *t;
536         int r;
537
538         assert(f);
539
540         p = le64toh(f->header->field_hash_table_offset);
541         s = le64toh(f->header->field_hash_table_size);
542
543         r = journal_file_move_to(f,
544                                  OBJECT_FIELD_HASH_TABLE,
545                                  p, s,
546                                  &t);
547         if (r < 0)
548                 return r;
549
550         f->field_hash_table = t;
551         return 0;
552 }
553
554 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
555         uint64_t p, h;
556         int r;
557
558         assert(f);
559         assert(o);
560         assert(offset > 0);
561         assert(o->object.type == OBJECT_DATA);
562
563         /* This might alter the window we are looking at */
564
565         o->data.next_hash_offset = o->data.next_field_offset = 0;
566         o->data.entry_offset = o->data.entry_array_offset = 0;
567         o->data.n_entries = 0;
568
569         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
570         p = le64toh(f->data_hash_table[h].tail_hash_offset);
571         if (p == 0) {
572                 /* Only entry in the hash table is easy */
573                 f->data_hash_table[h].head_hash_offset = htole64(offset);
574         } else {
575                 /* Move back to the previous data object, to patch in
576                  * pointer */
577
578                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
579                 if (r < 0)
580                         return r;
581
582                 o->data.next_hash_offset = htole64(offset);
583         }
584
585         f->data_hash_table[h].tail_hash_offset = htole64(offset);
586
587         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
588                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
589
590         return 0;
591 }
592
593 int journal_file_find_data_object_with_hash(
594                 JournalFile *f,
595                 const void *data, uint64_t size, uint64_t hash,
596                 Object **ret, uint64_t *offset) {
597
598         uint64_t p, osize, h;
599         int r;
600
601         assert(f);
602         assert(data || size == 0);
603
604         osize = offsetof(Object, data.payload) + size;
605
606         if (f->header->data_hash_table_size == 0)
607                 return -EBADMSG;
608
609         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
610         p = le64toh(f->data_hash_table[h].head_hash_offset);
611
612         while (p > 0) {
613                 Object *o;
614
615                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
616                 if (r < 0)
617                         return r;
618
619                 if (le64toh(o->data.hash) != hash)
620                         goto next;
621
622                 if (o->object.flags & OBJECT_COMPRESSED) {
623 #ifdef HAVE_XZ
624                         uint64_t l, rsize;
625
626                         l = le64toh(o->object.size);
627                         if (l <= offsetof(Object, data.payload))
628                                 return -EBADMSG;
629
630                         l -= offsetof(Object, data.payload);
631
632                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
633                                 return -EBADMSG;
634
635                         if (rsize == size &&
636                             memcmp(f->compress_buffer, data, size) == 0) {
637
638                                 if (ret)
639                                         *ret = o;
640
641                                 if (offset)
642                                         *offset = p;
643
644                                 return 1;
645                         }
646 #else
647                         return -EPROTONOSUPPORT;
648 #endif
649
650                 } else if (le64toh(o->object.size) == osize &&
651                            memcmp(o->data.payload, data, size) == 0) {
652
653                         if (ret)
654                                 *ret = o;
655
656                         if (offset)
657                                 *offset = p;
658
659                         return 1;
660                 }
661
662         next:
663                 p = le64toh(o->data.next_hash_offset);
664         }
665
666         return 0;
667 }
668
669 int journal_file_find_data_object(
670                 JournalFile *f,
671                 const void *data, uint64_t size,
672                 Object **ret, uint64_t *offset) {
673
674         uint64_t hash;
675
676         assert(f);
677         assert(data || size == 0);
678
679         hash = hash64(data, size);
680
681         return journal_file_find_data_object_with_hash(f,
682                                                        data, size, hash,
683                                                        ret, offset);
684 }
685
686 static int journal_file_append_data(
687                 JournalFile *f,
688                 const void *data, uint64_t size,
689                 Object **ret, uint64_t *offset) {
690
691         uint64_t hash, p;
692         uint64_t osize;
693         Object *o;
694         int r;
695         bool compressed = false;
696
697         assert(f);
698         assert(data || size == 0);
699
700         hash = hash64(data, size);
701
702         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
703         if (r < 0)
704                 return r;
705         else if (r > 0) {
706
707                 if (ret)
708                         *ret = o;
709
710                 if (offset)
711                         *offset = p;
712
713                 return 0;
714         }
715
716         osize = offsetof(Object, data.payload) + size;
717         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
718         if (r < 0)
719                 return r;
720
721         o->data.hash = htole64(hash);
722
723 #ifdef HAVE_XZ
724         if (f->compress &&
725             size >= COMPRESSION_SIZE_THRESHOLD) {
726                 uint64_t rsize;
727
728                 compressed = compress_blob(data, size, o->data.payload, &rsize);
729
730                 if (compressed) {
731                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
732                         o->object.flags |= OBJECT_COMPRESSED;
733
734                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
735                 }
736         }
737 #endif
738
739         if (!compressed && size > 0)
740                 memcpy(o->data.payload, data, size);
741
742         r = journal_file_link_data(f, o, p, hash);
743         if (r < 0)
744                 return r;
745
746         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
747         if (r < 0)
748                 return r;
749
750         /* The linking might have altered the window, so let's
751          * refresh our pointer */
752         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
753         if (r < 0)
754                 return r;
755
756         if (ret)
757                 *ret = o;
758
759         if (offset)
760                 *offset = p;
761
762         return 0;
763 }
764
765 uint64_t journal_file_entry_n_items(Object *o) {
766         assert(o);
767         assert(o->object.type == OBJECT_ENTRY);
768
769         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
770 }
771
772 uint64_t journal_file_entry_array_n_items(Object *o) {
773         assert(o);
774         assert(o->object.type == OBJECT_ENTRY_ARRAY);
775
776         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
777 }
778
779 static int link_entry_into_array(JournalFile *f,
780                                  le64_t *first,
781                                  le64_t *idx,
782                                  uint64_t p) {
783         int r;
784         uint64_t n = 0, ap = 0, q, i, a, hidx;
785         Object *o;
786
787         assert(f);
788         assert(first);
789         assert(idx);
790         assert(p > 0);
791
792         a = le64toh(*first);
793         i = hidx = le64toh(*idx);
794         while (a > 0) {
795
796                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
797                 if (r < 0)
798                         return r;
799
800                 n = journal_file_entry_array_n_items(o);
801                 if (i < n) {
802                         o->entry_array.items[i] = htole64(p);
803                         *idx = htole64(hidx + 1);
804                         return 0;
805                 }
806
807                 i -= n;
808                 ap = a;
809                 a = le64toh(o->entry_array.next_entry_array_offset);
810         }
811
812         if (hidx > n)
813                 n = (hidx+1) * 2;
814         else
815                 n = n * 2;
816
817         if (n < 4)
818                 n = 4;
819
820         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
821                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
822                                        &o, &q);
823         if (r < 0)
824                 return r;
825
826         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
827         if (r < 0)
828                 return r;
829
830         o->entry_array.items[i] = htole64(p);
831
832         if (ap == 0)
833                 *first = htole64(q);
834         else {
835                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
836                 if (r < 0)
837                         return r;
838
839                 o->entry_array.next_entry_array_offset = htole64(q);
840         }
841
842         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
843                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
844
845         *idx = htole64(hidx + 1);
846
847         return 0;
848 }
849
850 static int link_entry_into_array_plus_one(JournalFile *f,
851                                           le64_t *extra,
852                                           le64_t *first,
853                                           le64_t *idx,
854                                           uint64_t p) {
855
856         int r;
857
858         assert(f);
859         assert(extra);
860         assert(first);
861         assert(idx);
862         assert(p > 0);
863
864         if (*idx == 0)
865                 *extra = htole64(p);
866         else {
867                 le64_t i;
868
869                 i = htole64(le64toh(*idx) - 1);
870                 r = link_entry_into_array(f, first, &i, p);
871                 if (r < 0)
872                         return r;
873         }
874
875         *idx = htole64(le64toh(*idx) + 1);
876         return 0;
877 }
878
879 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
880         uint64_t p;
881         int r;
882         assert(f);
883         assert(o);
884         assert(offset > 0);
885
886         p = le64toh(o->entry.items[i].object_offset);
887         if (p == 0)
888                 return -EINVAL;
889
890         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
891         if (r < 0)
892                 return r;
893
894         return link_entry_into_array_plus_one(f,
895                                               &o->data.entry_offset,
896                                               &o->data.entry_array_offset,
897                                               &o->data.n_entries,
898                                               offset);
899 }
900
901 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
902         uint64_t n, i;
903         int r;
904
905         assert(f);
906         assert(o);
907         assert(offset > 0);
908         assert(o->object.type == OBJECT_ENTRY);
909
910         __sync_synchronize();
911
912         /* Link up the entry itself */
913         r = link_entry_into_array(f,
914                                   &f->header->entry_array_offset,
915                                   &f->header->n_entries,
916                                   offset);
917         if (r < 0)
918                 return r;
919
920         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
921
922         if (f->header->head_entry_realtime == 0)
923                 f->header->head_entry_realtime = o->entry.realtime;
924
925         f->header->tail_entry_realtime = o->entry.realtime;
926         f->header->tail_entry_monotonic = o->entry.monotonic;
927
928         f->tail_entry_monotonic_valid = true;
929
930         /* Link up the items */
931         n = journal_file_entry_n_items(o);
932         for (i = 0; i < n; i++) {
933                 r = journal_file_link_entry_item(f, o, offset, i);
934                 if (r < 0)
935                         return r;
936         }
937
938         return 0;
939 }
940
941 static int journal_file_append_entry_internal(
942                 JournalFile *f,
943                 const dual_timestamp *ts,
944                 uint64_t xor_hash,
945                 const EntryItem items[], unsigned n_items,
946                 uint64_t *seqnum,
947                 Object **ret, uint64_t *offset) {
948         uint64_t np;
949         uint64_t osize;
950         Object *o;
951         int r;
952
953         assert(f);
954         assert(items || n_items == 0);
955         assert(ts);
956
957         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
958
959         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
960         if (r < 0)
961                 return r;
962
963         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
964         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
965         o->entry.realtime = htole64(ts->realtime);
966         o->entry.monotonic = htole64(ts->monotonic);
967         o->entry.xor_hash = htole64(xor_hash);
968         o->entry.boot_id = f->header->boot_id;
969
970         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
971         if (r < 0)
972                 return r;
973
974         r = journal_file_link_entry(f, o, np);
975         if (r < 0)
976                 return r;
977
978         if (ret)
979                 *ret = o;
980
981         if (offset)
982                 *offset = np;
983
984         return 0;
985 }
986
987 void journal_file_post_change(JournalFile *f) {
988         assert(f);
989
990         /* inotify() does not receive IN_MODIFY events from file
991          * accesses done via mmap(). After each access we hence
992          * trigger IN_MODIFY by truncating the journal file to its
993          * current size which triggers IN_MODIFY. */
994
995         __sync_synchronize();
996
997         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
998                 log_error("Failed to to truncate file to its own size: %m");
999 }
1000
1001 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1002         unsigned i;
1003         EntryItem *items;
1004         int r;
1005         uint64_t xor_hash = 0;
1006         struct dual_timestamp _ts;
1007
1008         assert(f);
1009         assert(iovec || n_iovec == 0);
1010
1011         if (!f->writable)
1012                 return -EPERM;
1013
1014         if (!ts) {
1015                 dual_timestamp_get(&_ts);
1016                 ts = &_ts;
1017         }
1018
1019         if (f->tail_entry_monotonic_valid &&
1020             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1021                 return -EINVAL;
1022
1023         r = journal_file_maybe_append_tag(f, ts->realtime);
1024         if (r < 0)
1025                 return r;
1026
1027         /* alloca() can't take 0, hence let's allocate at least one */
1028         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1029
1030         for (i = 0; i < n_iovec; i++) {
1031                 uint64_t p;
1032                 Object *o;
1033
1034                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1035                 if (r < 0)
1036                         return r;
1037
1038                 xor_hash ^= le64toh(o->data.hash);
1039                 items[i].object_offset = htole64(p);
1040                 items[i].hash = o->data.hash;
1041         }
1042
1043         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1044
1045         journal_file_post_change(f);
1046
1047         return r;
1048 }
1049
1050 static int generic_array_get(JournalFile *f,
1051                              uint64_t first,
1052                              uint64_t i,
1053                              Object **ret, uint64_t *offset) {
1054
1055         Object *o;
1056         uint64_t p = 0, a;
1057         int r;
1058
1059         assert(f);
1060
1061         a = first;
1062         while (a > 0) {
1063                 uint64_t n;
1064
1065                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1066                 if (r < 0)
1067                         return r;
1068
1069                 n = journal_file_entry_array_n_items(o);
1070                 if (i < n) {
1071                         p = le64toh(o->entry_array.items[i]);
1072                         break;
1073                 }
1074
1075                 i -= n;
1076                 a = le64toh(o->entry_array.next_entry_array_offset);
1077         }
1078
1079         if (a <= 0 || p <= 0)
1080                 return 0;
1081
1082         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1083         if (r < 0)
1084                 return r;
1085
1086         if (ret)
1087                 *ret = o;
1088
1089         if (offset)
1090                 *offset = p;
1091
1092         return 1;
1093 }
1094
1095 static int generic_array_get_plus_one(JournalFile *f,
1096                                       uint64_t extra,
1097                                       uint64_t first,
1098                                       uint64_t i,
1099                                       Object **ret, uint64_t *offset) {
1100
1101         Object *o;
1102
1103         assert(f);
1104
1105         if (i == 0) {
1106                 int r;
1107
1108                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1109                 if (r < 0)
1110                         return r;
1111
1112                 if (ret)
1113                         *ret = o;
1114
1115                 if (offset)
1116                         *offset = extra;
1117
1118                 return 1;
1119         }
1120
1121         return generic_array_get(f, first, i-1, ret, offset);
1122 }
1123
1124 enum {
1125         TEST_FOUND,
1126         TEST_LEFT,
1127         TEST_RIGHT
1128 };
1129
1130 static int generic_array_bisect(JournalFile *f,
1131                                 uint64_t first,
1132                                 uint64_t n,
1133                                 uint64_t needle,
1134                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1135                                 direction_t direction,
1136                                 Object **ret,
1137                                 uint64_t *offset,
1138                                 uint64_t *idx) {
1139
1140         uint64_t a, p, t = 0, i = 0, last_p = 0;
1141         bool subtract_one = false;
1142         Object *o, *array = NULL;
1143         int r;
1144
1145         assert(f);
1146         assert(test_object);
1147
1148         a = first;
1149         while (a > 0) {
1150                 uint64_t left, right, k, lp;
1151
1152                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1153                 if (r < 0)
1154                         return r;
1155
1156                 k = journal_file_entry_array_n_items(array);
1157                 right = MIN(k, n);
1158                 if (right <= 0)
1159                         return 0;
1160
1161                 i = right - 1;
1162                 lp = p = le64toh(array->entry_array.items[i]);
1163                 if (p <= 0)
1164                         return -EBADMSG;
1165
1166                 r = test_object(f, p, needle);
1167                 if (r < 0)
1168                         return r;
1169
1170                 if (r == TEST_FOUND)
1171                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1172
1173                 if (r == TEST_RIGHT) {
1174                         left = 0;
1175                         right -= 1;
1176                         for (;;) {
1177                                 if (left == right) {
1178                                         if (direction == DIRECTION_UP)
1179                                                 subtract_one = true;
1180
1181                                         i = left;
1182                                         goto found;
1183                                 }
1184
1185                                 assert(left < right);
1186
1187                                 i = (left + right) / 2;
1188                                 p = le64toh(array->entry_array.items[i]);
1189                                 if (p <= 0)
1190                                         return -EBADMSG;
1191
1192                                 r = test_object(f, p, needle);
1193                                 if (r < 0)
1194                                         return r;
1195
1196                                 if (r == TEST_FOUND)
1197                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1198
1199                                 if (r == TEST_RIGHT)
1200                                         right = i;
1201                                 else
1202                                         left = i + 1;
1203                         }
1204                 }
1205
1206                 if (k > n) {
1207                         if (direction == DIRECTION_UP) {
1208                                 i = n;
1209                                 subtract_one = true;
1210                                 goto found;
1211                         }
1212
1213                         return 0;
1214                 }
1215
1216                 last_p = lp;
1217
1218                 n -= k;
1219                 t += k;
1220                 a = le64toh(array->entry_array.next_entry_array_offset);
1221         }
1222
1223         return 0;
1224
1225 found:
1226         if (subtract_one && t == 0 && i == 0)
1227                 return 0;
1228
1229         if (subtract_one && i == 0)
1230                 p = last_p;
1231         else if (subtract_one)
1232                 p = le64toh(array->entry_array.items[i-1]);
1233         else
1234                 p = le64toh(array->entry_array.items[i]);
1235
1236         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1237         if (r < 0)
1238                 return r;
1239
1240         if (ret)
1241                 *ret = o;
1242
1243         if (offset)
1244                 *offset = p;
1245
1246         if (idx)
1247                 *idx = t + i + (subtract_one ? -1 : 0);
1248
1249         return 1;
1250 }
1251
1252 static int generic_array_bisect_plus_one(JournalFile *f,
1253                                          uint64_t extra,
1254                                          uint64_t first,
1255                                          uint64_t n,
1256                                          uint64_t needle,
1257                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1258                                          direction_t direction,
1259                                          Object **ret,
1260                                          uint64_t *offset,
1261                                          uint64_t *idx) {
1262
1263         int r;
1264         bool step_back = false;
1265         Object *o;
1266
1267         assert(f);
1268         assert(test_object);
1269
1270         if (n <= 0)
1271                 return 0;
1272
1273         /* This bisects the array in object 'first', but first checks
1274          * an extra  */
1275         r = test_object(f, extra, needle);
1276         if (r < 0)
1277                 return r;
1278
1279         if (r == TEST_FOUND)
1280                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1281
1282         /* if we are looking with DIRECTION_UP then we need to first
1283            see if in the actual array there is a matching entry, and
1284            return the last one of that. But if there isn't any we need
1285            to return this one. Hence remember this, and return it
1286            below. */
1287         if (r == TEST_LEFT)
1288                 step_back = direction == DIRECTION_UP;
1289
1290         if (r == TEST_RIGHT) {
1291                 if (direction == DIRECTION_DOWN)
1292                         goto found;
1293                 else
1294                         return 0;
1295         }
1296
1297         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1298
1299         if (r == 0 && step_back)
1300                 goto found;
1301
1302         if (r > 0 && idx)
1303                 (*idx) ++;
1304
1305         return r;
1306
1307 found:
1308         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1309         if (r < 0)
1310                 return r;
1311
1312         if (ret)
1313                 *ret = o;
1314
1315         if (offset)
1316                 *offset = extra;
1317
1318         if (idx)
1319                 *idx = 0;
1320
1321         return 1;
1322 }
1323
1324 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1325         assert(f);
1326         assert(p > 0);
1327
1328         if (p == needle)
1329                 return TEST_FOUND;
1330         else if (p < needle)
1331                 return TEST_LEFT;
1332         else
1333                 return TEST_RIGHT;
1334 }
1335
1336 int journal_file_move_to_entry_by_offset(
1337                 JournalFile *f,
1338                 uint64_t p,
1339                 direction_t direction,
1340                 Object **ret,
1341                 uint64_t *offset) {
1342
1343         return generic_array_bisect(f,
1344                                     le64toh(f->header->entry_array_offset),
1345                                     le64toh(f->header->n_entries),
1346                                     p,
1347                                     test_object_offset,
1348                                     direction,
1349                                     ret, offset, NULL);
1350 }
1351
1352
1353 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1354         Object *o;
1355         int r;
1356
1357         assert(f);
1358         assert(p > 0);
1359
1360         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1361         if (r < 0)
1362                 return r;
1363
1364         if (le64toh(o->entry.seqnum) == needle)
1365                 return TEST_FOUND;
1366         else if (le64toh(o->entry.seqnum) < needle)
1367                 return TEST_LEFT;
1368         else
1369                 return TEST_RIGHT;
1370 }
1371
1372 int journal_file_move_to_entry_by_seqnum(
1373                 JournalFile *f,
1374                 uint64_t seqnum,
1375                 direction_t direction,
1376                 Object **ret,
1377                 uint64_t *offset) {
1378
1379         return generic_array_bisect(f,
1380                                     le64toh(f->header->entry_array_offset),
1381                                     le64toh(f->header->n_entries),
1382                                     seqnum,
1383                                     test_object_seqnum,
1384                                     direction,
1385                                     ret, offset, NULL);
1386 }
1387
1388 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1389         Object *o;
1390         int r;
1391
1392         assert(f);
1393         assert(p > 0);
1394
1395         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1396         if (r < 0)
1397                 return r;
1398
1399         if (le64toh(o->entry.realtime) == needle)
1400                 return TEST_FOUND;
1401         else if (le64toh(o->entry.realtime) < needle)
1402                 return TEST_LEFT;
1403         else
1404                 return TEST_RIGHT;
1405 }
1406
1407 int journal_file_move_to_entry_by_realtime(
1408                 JournalFile *f,
1409                 uint64_t realtime,
1410                 direction_t direction,
1411                 Object **ret,
1412                 uint64_t *offset) {
1413
1414         return generic_array_bisect(f,
1415                                     le64toh(f->header->entry_array_offset),
1416                                     le64toh(f->header->n_entries),
1417                                     realtime,
1418                                     test_object_realtime,
1419                                     direction,
1420                                     ret, offset, NULL);
1421 }
1422
1423 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1424         Object *o;
1425         int r;
1426
1427         assert(f);
1428         assert(p > 0);
1429
1430         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1431         if (r < 0)
1432                 return r;
1433
1434         if (le64toh(o->entry.monotonic) == needle)
1435                 return TEST_FOUND;
1436         else if (le64toh(o->entry.monotonic) < needle)
1437                 return TEST_LEFT;
1438         else
1439                 return TEST_RIGHT;
1440 }
1441
1442 int journal_file_move_to_entry_by_monotonic(
1443                 JournalFile *f,
1444                 sd_id128_t boot_id,
1445                 uint64_t monotonic,
1446                 direction_t direction,
1447                 Object **ret,
1448                 uint64_t *offset) {
1449
1450         char t[9+32+1] = "_BOOT_ID=";
1451         Object *o;
1452         int r;
1453
1454         assert(f);
1455
1456         sd_id128_to_string(boot_id, t + 9);
1457         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1458         if (r < 0)
1459                 return r;
1460         if (r == 0)
1461                 return -ENOENT;
1462
1463         return generic_array_bisect_plus_one(f,
1464                                              le64toh(o->data.entry_offset),
1465                                              le64toh(o->data.entry_array_offset),
1466                                              le64toh(o->data.n_entries),
1467                                              monotonic,
1468                                              test_object_monotonic,
1469                                              direction,
1470                                              ret, offset, NULL);
1471 }
1472
1473 int journal_file_next_entry(
1474                 JournalFile *f,
1475                 Object *o, uint64_t p,
1476                 direction_t direction,
1477                 Object **ret, uint64_t *offset) {
1478
1479         uint64_t i, n;
1480         int r;
1481
1482         assert(f);
1483         assert(p > 0 || !o);
1484
1485         n = le64toh(f->header->n_entries);
1486         if (n <= 0)
1487                 return 0;
1488
1489         if (!o)
1490                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1491         else {
1492                 if (o->object.type != OBJECT_ENTRY)
1493                         return -EINVAL;
1494
1495                 r = generic_array_bisect(f,
1496                                          le64toh(f->header->entry_array_offset),
1497                                          le64toh(f->header->n_entries),
1498                                          p,
1499                                          test_object_offset,
1500                                          DIRECTION_DOWN,
1501                                          NULL, NULL,
1502                                          &i);
1503                 if (r <= 0)
1504                         return r;
1505
1506                 if (direction == DIRECTION_DOWN) {
1507                         if (i >= n - 1)
1508                                 return 0;
1509
1510                         i++;
1511                 } else {
1512                         if (i <= 0)
1513                                 return 0;
1514
1515                         i--;
1516                 }
1517         }
1518
1519         /* And jump to it */
1520         return generic_array_get(f,
1521                                  le64toh(f->header->entry_array_offset),
1522                                  i,
1523                                  ret, offset);
1524 }
1525
1526 int journal_file_skip_entry(
1527                 JournalFile *f,
1528                 Object *o, uint64_t p,
1529                 int64_t skip,
1530                 Object **ret, uint64_t *offset) {
1531
1532         uint64_t i, n;
1533         int r;
1534
1535         assert(f);
1536         assert(o);
1537         assert(p > 0);
1538
1539         if (o->object.type != OBJECT_ENTRY)
1540                 return -EINVAL;
1541
1542         r = generic_array_bisect(f,
1543                                  le64toh(f->header->entry_array_offset),
1544                                  le64toh(f->header->n_entries),
1545                                  p,
1546                                  test_object_offset,
1547                                  DIRECTION_DOWN,
1548                                  NULL, NULL,
1549                                  &i);
1550         if (r <= 0)
1551                 return r;
1552
1553         /* Calculate new index */
1554         if (skip < 0) {
1555                 if ((uint64_t) -skip >= i)
1556                         i = 0;
1557                 else
1558                         i = i - (uint64_t) -skip;
1559         } else
1560                 i  += (uint64_t) skip;
1561
1562         n = le64toh(f->header->n_entries);
1563         if (n <= 0)
1564                 return -EBADMSG;
1565
1566         if (i >= n)
1567                 i = n-1;
1568
1569         return generic_array_get(f,
1570                                  le64toh(f->header->entry_array_offset),
1571                                  i,
1572                                  ret, offset);
1573 }
1574
1575 int journal_file_next_entry_for_data(
1576                 JournalFile *f,
1577                 Object *o, uint64_t p,
1578                 uint64_t data_offset,
1579                 direction_t direction,
1580                 Object **ret, uint64_t *offset) {
1581
1582         uint64_t n, i;
1583         int r;
1584         Object *d;
1585
1586         assert(f);
1587         assert(p > 0 || !o);
1588
1589         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1590         if (r < 0)
1591                 return r;
1592
1593         n = le64toh(d->data.n_entries);
1594         if (n <= 0)
1595                 return n;
1596
1597         if (!o)
1598                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1599         else {
1600                 if (o->object.type != OBJECT_ENTRY)
1601                         return -EINVAL;
1602
1603                 r = generic_array_bisect_plus_one(f,
1604                                                   le64toh(d->data.entry_offset),
1605                                                   le64toh(d->data.entry_array_offset),
1606                                                   le64toh(d->data.n_entries),
1607                                                   p,
1608                                                   test_object_offset,
1609                                                   DIRECTION_DOWN,
1610                                                   NULL, NULL,
1611                                                   &i);
1612
1613                 if (r <= 0)
1614                         return r;
1615
1616                 if (direction == DIRECTION_DOWN) {
1617                         if (i >= n - 1)
1618                                 return 0;
1619
1620                         i++;
1621                 } else {
1622                         if (i <= 0)
1623                                 return 0;
1624
1625                         i--;
1626                 }
1627
1628         }
1629
1630         return generic_array_get_plus_one(f,
1631                                           le64toh(d->data.entry_offset),
1632                                           le64toh(d->data.entry_array_offset),
1633                                           i,
1634                                           ret, offset);
1635 }
1636
1637 int journal_file_move_to_entry_by_offset_for_data(
1638                 JournalFile *f,
1639                 uint64_t data_offset,
1640                 uint64_t p,
1641                 direction_t direction,
1642                 Object **ret, uint64_t *offset) {
1643
1644         int r;
1645         Object *d;
1646
1647         assert(f);
1648
1649         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1650         if (r < 0)
1651                 return r;
1652
1653         return generic_array_bisect_plus_one(f,
1654                                              le64toh(d->data.entry_offset),
1655                                              le64toh(d->data.entry_array_offset),
1656                                              le64toh(d->data.n_entries),
1657                                              p,
1658                                              test_object_offset,
1659                                              direction,
1660                                              ret, offset, NULL);
1661 }
1662
1663 int journal_file_move_to_entry_by_monotonic_for_data(
1664                 JournalFile *f,
1665                 uint64_t data_offset,
1666                 sd_id128_t boot_id,
1667                 uint64_t monotonic,
1668                 direction_t direction,
1669                 Object **ret, uint64_t *offset) {
1670
1671         char t[9+32+1] = "_BOOT_ID=";
1672         Object *o, *d;
1673         int r;
1674         uint64_t b, z;
1675
1676         assert(f);
1677
1678         /* First, seek by time */
1679         sd_id128_to_string(boot_id, t + 9);
1680         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1681         if (r < 0)
1682                 return r;
1683         if (r == 0)
1684                 return -ENOENT;
1685
1686         r = generic_array_bisect_plus_one(f,
1687                                           le64toh(o->data.entry_offset),
1688                                           le64toh(o->data.entry_array_offset),
1689                                           le64toh(o->data.n_entries),
1690                                           monotonic,
1691                                           test_object_monotonic,
1692                                           direction,
1693                                           NULL, &z, NULL);
1694         if (r <= 0)
1695                 return r;
1696
1697         /* And now, continue seeking until we find an entry that
1698          * exists in both bisection arrays */
1699
1700         for (;;) {
1701                 Object *qo;
1702                 uint64_t p, q;
1703
1704                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1705                 if (r < 0)
1706                         return r;
1707
1708                 r = generic_array_bisect_plus_one(f,
1709                                                   le64toh(d->data.entry_offset),
1710                                                   le64toh(d->data.entry_array_offset),
1711                                                   le64toh(d->data.n_entries),
1712                                                   z,
1713                                                   test_object_offset,
1714                                                   direction,
1715                                                   NULL, &p, NULL);
1716                 if (r <= 0)
1717                         return r;
1718
1719                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1720                 if (r < 0)
1721                         return r;
1722
1723                 r = generic_array_bisect_plus_one(f,
1724                                                   le64toh(o->data.entry_offset),
1725                                                   le64toh(o->data.entry_array_offset),
1726                                                   le64toh(o->data.n_entries),
1727                                                   p,
1728                                                   test_object_offset,
1729                                                   direction,
1730                                                   &qo, &q, NULL);
1731
1732                 if (r <= 0)
1733                         return r;
1734
1735                 if (p == q) {
1736                         if (ret)
1737                                 *ret = qo;
1738                         if (offset)
1739                                 *offset = q;
1740
1741                         return 1;
1742                 }
1743
1744                 z = q;
1745         }
1746
1747         return 0;
1748 }
1749
1750 int journal_file_move_to_entry_by_seqnum_for_data(
1751                 JournalFile *f,
1752                 uint64_t data_offset,
1753                 uint64_t seqnum,
1754                 direction_t direction,
1755                 Object **ret, uint64_t *offset) {
1756
1757         Object *d;
1758         int r;
1759
1760         assert(f);
1761
1762         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1763         if (r < 0)
1764                 return r;
1765
1766         return generic_array_bisect_plus_one(f,
1767                                              le64toh(d->data.entry_offset),
1768                                              le64toh(d->data.entry_array_offset),
1769                                              le64toh(d->data.n_entries),
1770                                              seqnum,
1771                                              test_object_seqnum,
1772                                              direction,
1773                                              ret, offset, NULL);
1774 }
1775
1776 int journal_file_move_to_entry_by_realtime_for_data(
1777                 JournalFile *f,
1778                 uint64_t data_offset,
1779                 uint64_t realtime,
1780                 direction_t direction,
1781                 Object **ret, uint64_t *offset) {
1782
1783         Object *d;
1784         int r;
1785
1786         assert(f);
1787
1788         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1789         if (r < 0)
1790                 return r;
1791
1792         return generic_array_bisect_plus_one(f,
1793                                              le64toh(d->data.entry_offset),
1794                                              le64toh(d->data.entry_array_offset),
1795                                              le64toh(d->data.n_entries),
1796                                              realtime,
1797                                              test_object_realtime,
1798                                              direction,
1799                                              ret, offset, NULL);
1800 }
1801
1802 void journal_file_dump(JournalFile *f) {
1803         Object *o;
1804         int r;
1805         uint64_t p;
1806
1807         assert(f);
1808
1809         journal_file_print_header(f);
1810
1811         p = le64toh(f->header->header_size);
1812         while (p != 0) {
1813                 r = journal_file_move_to_object(f, -1, p, &o);
1814                 if (r < 0)
1815                         goto fail;
1816
1817                 switch (o->object.type) {
1818
1819                 case OBJECT_UNUSED:
1820                         printf("Type: OBJECT_UNUSED\n");
1821                         break;
1822
1823                 case OBJECT_DATA:
1824                         printf("Type: OBJECT_DATA\n");
1825                         break;
1826
1827                 case OBJECT_ENTRY:
1828                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1829                                (unsigned long long) le64toh(o->entry.seqnum),
1830                                (unsigned long long) le64toh(o->entry.monotonic),
1831                                (unsigned long long) le64toh(o->entry.realtime));
1832                         break;
1833
1834                 case OBJECT_FIELD_HASH_TABLE:
1835                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1836                         break;
1837
1838                 case OBJECT_DATA_HASH_TABLE:
1839                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1840                         break;
1841
1842                 case OBJECT_ENTRY_ARRAY:
1843                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1844                         break;
1845
1846                 case OBJECT_TAG:
1847                         printf("Type: OBJECT_TAG %llu\n",
1848                                (unsigned long long) le64toh(o->tag.seqnum));
1849                         break;
1850                 }
1851
1852                 if (o->object.flags & OBJECT_COMPRESSED)
1853                         printf("Flags: COMPRESSED\n");
1854
1855                 if (p == le64toh(f->header->tail_object_offset))
1856                         p = 0;
1857                 else
1858                         p = p + ALIGN64(le64toh(o->object.size));
1859         }
1860
1861         return;
1862 fail:
1863         log_error("File corrupt");
1864 }
1865
1866 void journal_file_print_header(JournalFile *f) {
1867         char a[33], b[33], c[33];
1868         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1869
1870         assert(f);
1871
1872         printf("File Path: %s\n"
1873                "File ID: %s\n"
1874                "Machine ID: %s\n"
1875                "Boot ID: %s\n"
1876                "Sequential Number ID: %s\n"
1877                "State: %s\n"
1878                "Compatible Flags:%s%s\n"
1879                "Incompatible Flags:%s%s\n"
1880                "Header size: %llu\n"
1881                "Arena size: %llu\n"
1882                "Data Hash Table Size: %llu\n"
1883                "Field Hash Table Size: %llu\n"
1884                "Objects: %llu\n"
1885                "Entry Objects: %llu\n"
1886                "Rotate Suggested: %s\n"
1887                "Head Sequential Number: %llu\n"
1888                "Tail Sequential Number: %llu\n"
1889                "Head Realtime Timestamp: %s\n"
1890                "Tail Realtime Timestamp: %s\n",
1891                f->path,
1892                sd_id128_to_string(f->header->file_id, a),
1893                sd_id128_to_string(f->header->machine_id, b),
1894                sd_id128_to_string(f->header->boot_id, c),
1895                sd_id128_to_string(f->header->seqnum_id, c),
1896                f->header->state == STATE_OFFLINE ? "offline" :
1897                f->header->state == STATE_ONLINE ? "online" :
1898                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1899                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
1900                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
1901                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1902                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1903                (unsigned long long) le64toh(f->header->header_size),
1904                (unsigned long long) le64toh(f->header->arena_size),
1905                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1906                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1907                (unsigned long long) le64toh(f->header->n_objects),
1908                (unsigned long long) le64toh(f->header->n_entries),
1909                yes_no(journal_file_rotate_suggested(f)),
1910                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1911                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1912                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1913                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1914
1915         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1916                 printf("Data Objects: %llu\n"
1917                        "Data Hash Table Fill: %.1f%%\n",
1918                        (unsigned long long) le64toh(f->header->n_data),
1919                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1920
1921         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1922                 printf("Field Objects: %llu\n"
1923                        "Field Hash Table Fill: %.1f%%\n",
1924                        (unsigned long long) le64toh(f->header->n_fields),
1925                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1926 }
1927
1928 int journal_file_open(
1929                 const char *fname,
1930                 int flags,
1931                 mode_t mode,
1932                 bool compress,
1933                 bool authenticate,
1934                 JournalMetrics *metrics,
1935                 MMapCache *mmap_cache,
1936                 JournalFile *template,
1937                 JournalFile **ret) {
1938
1939         JournalFile *f;
1940         int r;
1941         bool newly_created = false;
1942
1943         assert(fname);
1944
1945         if ((flags & O_ACCMODE) != O_RDONLY &&
1946             (flags & O_ACCMODE) != O_RDWR)
1947                 return -EINVAL;
1948
1949         if (!endswith(fname, ".journal"))
1950                 return -EINVAL;
1951
1952         f = new0(JournalFile, 1);
1953         if (!f)
1954                 return -ENOMEM;
1955
1956         f->fd = -1;
1957         f->mode = mode;
1958
1959         f->flags = flags;
1960         f->prot = prot_from_flags(flags);
1961         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1962         f->compress = compress;
1963         f->authenticate = authenticate;
1964
1965         if (mmap_cache)
1966                 f->mmap = mmap_cache_ref(mmap_cache);
1967         else {
1968                 /* One context for each type, plus the zeroth catchall
1969                  * context. One fd for the file plus one for each type
1970                  * (which we need during verification */
1971                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1972                 if (!f->mmap) {
1973                         r = -ENOMEM;
1974                         goto fail;
1975                 }
1976         }
1977
1978         f->path = strdup(fname);
1979         if (!f->path) {
1980                 r = -ENOMEM;
1981                 goto fail;
1982         }
1983
1984         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1985         if (f->fd < 0) {
1986                 r = -errno;
1987                 goto fail;
1988         }
1989
1990         if (fstat(f->fd, &f->last_stat) < 0) {
1991                 r = -errno;
1992                 goto fail;
1993         }
1994
1995         if (f->last_stat.st_size == 0 && f->writable) {
1996                 newly_created = true;
1997
1998                 /* Try to load the FSPRG state, and if we can't, then
1999                  * just don't do authentication */
2000                 r = journal_file_load_fsprg(f);
2001                 if (r < 0)
2002                         f->authenticate = false;
2003
2004                 r = journal_file_init_header(f, template);
2005                 if (r < 0)
2006                         goto fail;
2007
2008                 if (fstat(f->fd, &f->last_stat) < 0) {
2009                         r = -errno;
2010                         goto fail;
2011                 }
2012         }
2013
2014         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2015                 r = -EIO;
2016                 goto fail;
2017         }
2018
2019         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2020         if (f->header == MAP_FAILED) {
2021                 f->header = NULL;
2022                 r = -errno;
2023                 goto fail;
2024         }
2025
2026         if (!newly_created) {
2027                 r = journal_file_verify_header(f);
2028                 if (r < 0)
2029                         goto fail;
2030         }
2031
2032         if (!newly_created && f->writable) {
2033                 r = journal_file_load_fsprg(f);
2034                 if (r < 0)
2035                         goto fail;
2036         }
2037
2038         if (f->writable) {
2039                 if (metrics) {
2040                         journal_default_metrics(metrics, f->fd);
2041                         f->metrics = *metrics;
2042                 } else if (template)
2043                         f->metrics = template->metrics;
2044
2045                 r = journal_file_refresh_header(f);
2046                 if (r < 0)
2047                         goto fail;
2048
2049                 r = journal_file_setup_hmac(f);
2050                 if (r < 0)
2051                         goto fail;
2052         }
2053
2054         if (newly_created) {
2055                 r = journal_file_setup_field_hash_table(f);
2056                 if (r < 0)
2057                         goto fail;
2058
2059                 r = journal_file_setup_data_hash_table(f);
2060                 if (r < 0)
2061                         goto fail;
2062
2063                 r = journal_file_append_first_tag(f);
2064                 if (r < 0)
2065                         goto fail;
2066         }
2067
2068         r = journal_file_map_field_hash_table(f);
2069         if (r < 0)
2070                 goto fail;
2071
2072         r = journal_file_map_data_hash_table(f);
2073         if (r < 0)
2074                 goto fail;
2075
2076         if (ret)
2077                 *ret = f;
2078
2079         return 0;
2080
2081 fail:
2082         journal_file_close(f);
2083
2084         return r;
2085 }
2086
2087 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2088         char *p;
2089         size_t l;
2090         JournalFile *old_file, *new_file = NULL;
2091         int r;
2092
2093         assert(f);
2094         assert(*f);
2095
2096         old_file = *f;
2097
2098         if (!old_file->writable)
2099                 return -EINVAL;
2100
2101         if (!endswith(old_file->path, ".journal"))
2102                 return -EINVAL;
2103
2104         l = strlen(old_file->path);
2105
2106         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2107         if (!p)
2108                 return -ENOMEM;
2109
2110         memcpy(p, old_file->path, l - 8);
2111         p[l-8] = '@';
2112         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2113         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2114                  "-%016llx-%016llx.journal",
2115                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2116                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2117
2118         r = rename(old_file->path, p);
2119         free(p);
2120
2121         if (r < 0)
2122                 return -errno;
2123
2124         old_file->header->state = STATE_ARCHIVED;
2125
2126         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
2127         journal_file_close(old_file);
2128
2129         *f = new_file;
2130         return r;
2131 }
2132
2133 int journal_file_open_reliably(
2134                 const char *fname,
2135                 int flags,
2136                 mode_t mode,
2137                 bool compress,
2138                 bool authenticate,
2139                 JournalMetrics *metrics,
2140                 MMapCache *mmap_cache,
2141                 JournalFile *template,
2142                 JournalFile **ret) {
2143
2144         int r;
2145         size_t l;
2146         char *p;
2147
2148         r = journal_file_open(fname, flags, mode, compress, authenticate,
2149                               metrics, mmap_cache, template, ret);
2150         if (r != -EBADMSG && /* corrupted */
2151             r != -ENODATA && /* truncated */
2152             r != -EHOSTDOWN && /* other machine */
2153             r != -EPROTONOSUPPORT && /* incompatible feature */
2154             r != -EBUSY && /* unclean shutdown */
2155             r != -ESHUTDOWN /* already archived */)
2156                 return r;
2157
2158         if ((flags & O_ACCMODE) == O_RDONLY)
2159                 return r;
2160
2161         if (!(flags & O_CREAT))
2162                 return r;
2163
2164         if (!endswith(fname, ".journal"))
2165                 return r;
2166
2167         /* The file is corrupted. Rotate it away and try it again (but only once) */
2168
2169         l = strlen(fname);
2170         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2171                      (int) (l-8), fname,
2172                      (unsigned long long) now(CLOCK_REALTIME),
2173                      random_ull()) < 0)
2174                 return -ENOMEM;
2175
2176         r = rename(fname, p);
2177         free(p);
2178         if (r < 0)
2179                 return -errno;
2180
2181         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2182
2183         return journal_file_open(fname, flags, mode, compress, authenticate,
2184                                  metrics, mmap_cache, template, ret);
2185 }
2186
2187
2188 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2189         uint64_t i, n;
2190         uint64_t q, xor_hash = 0;
2191         int r;
2192         EntryItem *items;
2193         dual_timestamp ts;
2194
2195         assert(from);
2196         assert(to);
2197         assert(o);
2198         assert(p);
2199
2200         if (!to->writable)
2201                 return -EPERM;
2202
2203         ts.monotonic = le64toh(o->entry.monotonic);
2204         ts.realtime = le64toh(o->entry.realtime);
2205
2206         if (to->tail_entry_monotonic_valid &&
2207             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2208                 return -EINVAL;
2209
2210         n = journal_file_entry_n_items(o);
2211         items = alloca(sizeof(EntryItem) * n);
2212
2213         for (i = 0; i < n; i++) {
2214                 uint64_t l, h;
2215                 le64_t le_hash;
2216                 size_t t;
2217                 void *data;
2218                 Object *u;
2219
2220                 q = le64toh(o->entry.items[i].object_offset);
2221                 le_hash = o->entry.items[i].hash;
2222
2223                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2224                 if (r < 0)
2225                         return r;
2226
2227                 if (le_hash != o->data.hash)
2228                         return -EBADMSG;
2229
2230                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2231                 t = (size_t) l;
2232
2233                 /* We hit the limit on 32bit machines */
2234                 if ((uint64_t) t != l)
2235                         return -E2BIG;
2236
2237                 if (o->object.flags & OBJECT_COMPRESSED) {
2238 #ifdef HAVE_XZ
2239                         uint64_t rsize;
2240
2241                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2242                                 return -EBADMSG;
2243
2244                         data = from->compress_buffer;
2245                         l = rsize;
2246 #else
2247                         return -EPROTONOSUPPORT;
2248 #endif
2249                 } else
2250                         data = o->data.payload;
2251
2252                 r = journal_file_append_data(to, data, l, &u, &h);
2253                 if (r < 0)
2254                         return r;
2255
2256                 xor_hash ^= le64toh(u->data.hash);
2257                 items[i].object_offset = htole64(h);
2258                 items[i].hash = u->data.hash;
2259
2260                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2261                 if (r < 0)
2262                         return r;
2263         }
2264
2265         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2266 }
2267
2268 void journal_default_metrics(JournalMetrics *m, int fd) {
2269         uint64_t fs_size = 0;
2270         struct statvfs ss;
2271         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2272
2273         assert(m);
2274         assert(fd >= 0);
2275
2276         if (fstatvfs(fd, &ss) >= 0)
2277                 fs_size = ss.f_frsize * ss.f_blocks;
2278
2279         if (m->max_use == (uint64_t) -1) {
2280
2281                 if (fs_size > 0) {
2282                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2283
2284                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2285                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2286
2287                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2288                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2289                 } else
2290                         m->max_use = DEFAULT_MAX_USE_LOWER;
2291         } else {
2292                 m->max_use = PAGE_ALIGN(m->max_use);
2293
2294                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2295                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2296         }
2297
2298         if (m->max_size == (uint64_t) -1) {
2299                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2300
2301                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2302                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2303         } else
2304                 m->max_size = PAGE_ALIGN(m->max_size);
2305
2306         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2307                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2308
2309         if (m->max_size*2 > m->max_use)
2310                 m->max_use = m->max_size*2;
2311
2312         if (m->min_size == (uint64_t) -1)
2313                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2314         else {
2315                 m->min_size = PAGE_ALIGN(m->min_size);
2316
2317                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2318                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2319
2320                 if (m->min_size > m->max_size)
2321                         m->max_size = m->min_size;
2322         }
2323
2324         if (m->keep_free == (uint64_t) -1) {
2325
2326                 if (fs_size > 0) {
2327                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2328
2329                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2330                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2331
2332                 } else
2333                         m->keep_free = DEFAULT_KEEP_FREE;
2334         }
2335
2336         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2337                  format_bytes(a, sizeof(a), m->max_use),
2338                  format_bytes(b, sizeof(b), m->max_size),
2339                  format_bytes(c, sizeof(c), m->min_size),
2340                  format_bytes(d, sizeof(d), m->keep_free));
2341 }
2342
2343 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2344         assert(f);
2345         assert(from || to);
2346
2347         if (from) {
2348                 if (f->header->head_entry_realtime == 0)
2349                         return -ENOENT;
2350
2351                 *from = le64toh(f->header->head_entry_realtime);
2352         }
2353
2354         if (to) {
2355                 if (f->header->tail_entry_realtime == 0)
2356                         return -ENOENT;
2357
2358                 *to = le64toh(f->header->tail_entry_realtime);
2359         }
2360
2361         return 1;
2362 }
2363
2364 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2365         char t[9+32+1] = "_BOOT_ID=";
2366         Object *o;
2367         uint64_t p;
2368         int r;
2369
2370         assert(f);
2371         assert(from || to);
2372
2373         sd_id128_to_string(boot_id, t + 9);
2374
2375         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2376         if (r <= 0)
2377                 return r;
2378
2379         if (le64toh(o->data.n_entries) <= 0)
2380                 return 0;
2381
2382         if (from) {
2383                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2384                 if (r < 0)
2385                         return r;
2386
2387                 *from = le64toh(o->entry.monotonic);
2388         }
2389
2390         if (to) {
2391                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2392                 if (r < 0)
2393                         return r;
2394
2395                 r = generic_array_get_plus_one(f,
2396                                                le64toh(o->data.entry_offset),
2397                                                le64toh(o->data.entry_array_offset),
2398                                                le64toh(o->data.n_entries)-1,
2399                                                &o, NULL);
2400                 if (r <= 0)
2401                         return r;
2402
2403                 *to = le64toh(o->entry.monotonic);
2404         }
2405
2406         return 1;
2407 }
2408
2409 bool journal_file_rotate_suggested(JournalFile *f) {
2410         assert(f);
2411
2412         /* If we gained new header fields we gained new features,
2413          * hence suggest a rotation */
2414         if (le64toh(f->header->header_size) < sizeof(Header)) {
2415                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2416                 return true;
2417         }
2418
2419         /* Let's check if the hash tables grew over a certain fill
2420          * level (75%, borrowing this value from Java's hash table
2421          * implementation), and if so suggest a rotation. To calculate
2422          * the fill level we need the n_data field, which only exists
2423          * in newer versions. */
2424
2425         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2426                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2427                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2428                                   f->path,
2429                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2430                                   (unsigned long long) le64toh(f->header->n_data),
2431                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2432                                   (unsigned long long) (f->last_stat.st_size),
2433                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2434                         return true;
2435                 }
2436
2437         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2438                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2439                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2440                                   f->path,
2441                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2442                                   (unsigned long long) le64toh(f->header->n_fields),
2443                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2444                         return true;
2445                 }
2446
2447         return false;
2448 }