chiark / gitweb /
journal: even more simple static object tests
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->seal && f->writable)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fss_file)
100                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101         else if (f->fsprg_state)
102                 free(f->fsprg_state);
103
104         free(f->fsprg_seed);
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         if (f->header->state >= _STATE_MAX)
207                 return -EBADMSG;
208
209         /* The first addition was n_data, so check that we are at least this large */
210         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
211                 return -EBADMSG;
212
213         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214                 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
215                 return -EBADMSG;
216
217         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218                 return -ENODATA;
219
220         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221                 return -ENODATA;
222
223         if (!VALID64(f->header->data_hash_table_offset) ||
224             !VALID64(f->header->field_hash_table_offset) ||
225             !VALID64(f->header->tail_object_offset) ||
226             !VALID64(f->header->entry_array_offset))
227                 return -ENODATA;
228
229         if (f->writable) {
230                 uint8_t state;
231                 sd_id128_t machine_id;
232                 int r;
233
234                 r = sd_id128_get_machine(&machine_id);
235                 if (r < 0)
236                         return r;
237
238                 if (!sd_id128_equal(machine_id, f->header->machine_id))
239                         return -EHOSTDOWN;
240
241                 state = f->header->state;
242
243                 if (state == STATE_ONLINE) {
244                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245                         return -EBUSY;
246                 } else if (state == STATE_ARCHIVED)
247                         return -ESHUTDOWN;
248                 else if (state != STATE_OFFLINE) {
249                         log_debug("Journal file %s has unknown state %u.", f->path, state);
250                         return -EBUSY;
251                 }
252         }
253
254         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
255
256         if (f->writable)
257                 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
258
259         return 0;
260 }
261
262 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
263         uint64_t old_size, new_size;
264         int r;
265
266         assert(f);
267
268         /* We assume that this file is not sparse, and we know that
269          * for sure, since we always call posix_fallocate()
270          * ourselves */
271
272         old_size =
273                 le64toh(f->header->header_size) +
274                 le64toh(f->header->arena_size);
275
276         new_size = PAGE_ALIGN(offset + size);
277         if (new_size < le64toh(f->header->header_size))
278                 new_size = le64toh(f->header->header_size);
279
280         if (new_size <= old_size)
281                 return 0;
282
283         if (f->metrics.max_size > 0 &&
284             new_size > f->metrics.max_size)
285                 return -E2BIG;
286
287         if (new_size > f->metrics.min_size &&
288             f->metrics.keep_free > 0) {
289                 struct statvfs svfs;
290
291                 if (fstatvfs(f->fd, &svfs) >= 0) {
292                         uint64_t available;
293
294                         available = svfs.f_bfree * svfs.f_bsize;
295
296                         if (available >= f->metrics.keep_free)
297                                 available -= f->metrics.keep_free;
298                         else
299                                 available = 0;
300
301                         if (new_size - old_size > available)
302                                 return -E2BIG;
303                 }
304         }
305
306         /* Note that the glibc fallocate() fallback is very
307            inefficient, hence we try to minimize the allocation area
308            as we can. */
309         r = posix_fallocate(f->fd, old_size, new_size - old_size);
310         if (r != 0)
311                 return -r;
312
313         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
314
315         if (fstat(f->fd, &f->last_stat) < 0)
316                 return -errno;
317
318         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
319
320         return 0;
321 }
322
323 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
324         assert(f);
325         assert(ret);
326
327         /* Avoid SIGBUS on invalid accesses */
328         if (offset + size > (uint64_t) f->last_stat.st_size) {
329                 /* Hmm, out of range? Let's refresh the fstat() data
330                  * first, before we trust that check. */
331
332                 if (fstat(f->fd, &f->last_stat) < 0 ||
333                     offset + size > (uint64_t) f->last_stat.st_size)
334                         return -EADDRNOTAVAIL;
335         }
336
337         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
338 }
339
340 static uint64_t minimum_header_size(Object *o) {
341
342         static uint64_t table[] = {
343                 [OBJECT_DATA] = sizeof(DataObject),
344                 [OBJECT_FIELD] = sizeof(FieldObject),
345                 [OBJECT_ENTRY] = sizeof(EntryObject),
346                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349                 [OBJECT_TAG] = sizeof(TagObject),
350         };
351
352         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353                 return sizeof(ObjectHeader);
354
355         return table[o->object.type];
356 }
357
358 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
359         int r;
360         void *t;
361         Object *o;
362         uint64_t s;
363         unsigned context;
364
365         assert(f);
366         assert(ret);
367
368         /* Objects may only be located at multiple of 64 bit */
369         if (!VALID64(offset))
370                 return -EFAULT;
371
372         /* One context for each type, plus one catch-all for the rest */
373         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
374
375         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
376         if (r < 0)
377                 return r;
378
379         o = (Object*) t;
380         s = le64toh(o->object.size);
381
382         if (s < sizeof(ObjectHeader))
383                 return -EBADMSG;
384
385         if (o->object.type <= OBJECT_UNUSED)
386                 return -EBADMSG;
387
388         if (s < minimum_header_size(o))
389                 return -EBADMSG;
390
391         if (type >= 0 && o->object.type != type)
392                 return -EBADMSG;
393
394         if (s > sizeof(ObjectHeader)) {
395                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
396                 if (r < 0)
397                         return r;
398
399                 o = (Object*) t;
400         }
401
402         *ret = o;
403         return 0;
404 }
405
406 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
407         uint64_t r;
408
409         assert(f);
410
411         r = le64toh(f->header->tail_entry_seqnum) + 1;
412
413         if (seqnum) {
414                 /* If an external seqnum counter was passed, we update
415                  * both the local and the external one, and set it to
416                  * the maximum of both */
417
418                 if (*seqnum + 1 > r)
419                         r = *seqnum + 1;
420
421                 *seqnum = r;
422         }
423
424         f->header->tail_entry_seqnum = htole64(r);
425
426         if (f->header->head_entry_seqnum == 0)
427                 f->header->head_entry_seqnum = htole64(r);
428
429         return r;
430 }
431
432 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
433         int r;
434         uint64_t p;
435         Object *tail, *o;
436         void *t;
437
438         assert(f);
439         assert(type > 0 && type < _OBJECT_TYPE_MAX);
440         assert(size >= sizeof(ObjectHeader));
441         assert(offset);
442         assert(ret);
443
444         p = le64toh(f->header->tail_object_offset);
445         if (p == 0)
446                 p = le64toh(f->header->header_size);
447         else {
448                 r = journal_file_move_to_object(f, -1, p, &tail);
449                 if (r < 0)
450                         return r;
451
452                 p += ALIGN64(le64toh(tail->object.size));
453         }
454
455         r = journal_file_allocate(f, p, size);
456         if (r < 0)
457                 return r;
458
459         r = journal_file_move_to(f, type, p, size, &t);
460         if (r < 0)
461                 return r;
462
463         o = (Object*) t;
464
465         zero(o->object);
466         o->object.type = type;
467         o->object.size = htole64(size);
468
469         f->header->tail_object_offset = htole64(p);
470         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
471
472         *ret = o;
473         *offset = p;
474
475         return 0;
476 }
477
478 static int journal_file_setup_data_hash_table(JournalFile *f) {
479         uint64_t s, p;
480         Object *o;
481         int r;
482
483         assert(f);
484
485         /* We estimate that we need 1 hash table entry per 768 of
486            journal file and we want to make sure we never get beyond
487            75% fill level. Calculate the hash table size for the
488            maximum file size based on these metrics. */
489
490         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
491         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
492                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
493
494         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
495
496         r = journal_file_append_object(f,
497                                        OBJECT_DATA_HASH_TABLE,
498                                        offsetof(Object, hash_table.items) + s,
499                                        &o, &p);
500         if (r < 0)
501                 return r;
502
503         memset(o->hash_table.items, 0, s);
504
505         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
506         f->header->data_hash_table_size = htole64(s);
507
508         return 0;
509 }
510
511 static int journal_file_setup_field_hash_table(JournalFile *f) {
512         uint64_t s, p;
513         Object *o;
514         int r;
515
516         assert(f);
517
518         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
519         r = journal_file_append_object(f,
520                                        OBJECT_FIELD_HASH_TABLE,
521                                        offsetof(Object, hash_table.items) + s,
522                                        &o, &p);
523         if (r < 0)
524                 return r;
525
526         memset(o->hash_table.items, 0, s);
527
528         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
529         f->header->field_hash_table_size = htole64(s);
530
531         return 0;
532 }
533
534 static int journal_file_map_data_hash_table(JournalFile *f) {
535         uint64_t s, p;
536         void *t;
537         int r;
538
539         assert(f);
540
541         p = le64toh(f->header->data_hash_table_offset);
542         s = le64toh(f->header->data_hash_table_size);
543
544         r = journal_file_move_to(f,
545                                  OBJECT_DATA_HASH_TABLE,
546                                  p, s,
547                                  &t);
548         if (r < 0)
549                 return r;
550
551         f->data_hash_table = t;
552         return 0;
553 }
554
555 static int journal_file_map_field_hash_table(JournalFile *f) {
556         uint64_t s, p;
557         void *t;
558         int r;
559
560         assert(f);
561
562         p = le64toh(f->header->field_hash_table_offset);
563         s = le64toh(f->header->field_hash_table_size);
564
565         r = journal_file_move_to(f,
566                                  OBJECT_FIELD_HASH_TABLE,
567                                  p, s,
568                                  &t);
569         if (r < 0)
570                 return r;
571
572         f->field_hash_table = t;
573         return 0;
574 }
575
576 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
577         uint64_t p, h;
578         int r;
579
580         assert(f);
581         assert(o);
582         assert(offset > 0);
583         assert(o->object.type == OBJECT_DATA);
584
585         /* This might alter the window we are looking at */
586
587         o->data.next_hash_offset = o->data.next_field_offset = 0;
588         o->data.entry_offset = o->data.entry_array_offset = 0;
589         o->data.n_entries = 0;
590
591         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
592         p = le64toh(f->data_hash_table[h].tail_hash_offset);
593         if (p == 0) {
594                 /* Only entry in the hash table is easy */
595                 f->data_hash_table[h].head_hash_offset = htole64(offset);
596         } else {
597                 /* Move back to the previous data object, to patch in
598                  * pointer */
599
600                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
601                 if (r < 0)
602                         return r;
603
604                 o->data.next_hash_offset = htole64(offset);
605         }
606
607         f->data_hash_table[h].tail_hash_offset = htole64(offset);
608
609         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
610                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
611
612         return 0;
613 }
614
615 int journal_file_find_data_object_with_hash(
616                 JournalFile *f,
617                 const void *data, uint64_t size, uint64_t hash,
618                 Object **ret, uint64_t *offset) {
619
620         uint64_t p, osize, h;
621         int r;
622
623         assert(f);
624         assert(data || size == 0);
625
626         osize = offsetof(Object, data.payload) + size;
627
628         if (f->header->data_hash_table_size == 0)
629                 return -EBADMSG;
630
631         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
632         p = le64toh(f->data_hash_table[h].head_hash_offset);
633
634         while (p > 0) {
635                 Object *o;
636
637                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
638                 if (r < 0)
639                         return r;
640
641                 if (le64toh(o->data.hash) != hash)
642                         goto next;
643
644                 if (o->object.flags & OBJECT_COMPRESSED) {
645 #ifdef HAVE_XZ
646                         uint64_t l, rsize;
647
648                         l = le64toh(o->object.size);
649                         if (l <= offsetof(Object, data.payload))
650                                 return -EBADMSG;
651
652                         l -= offsetof(Object, data.payload);
653
654                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
655                                 return -EBADMSG;
656
657                         if (rsize == size &&
658                             memcmp(f->compress_buffer, data, size) == 0) {
659
660                                 if (ret)
661                                         *ret = o;
662
663                                 if (offset)
664                                         *offset = p;
665
666                                 return 1;
667                         }
668 #else
669                         return -EPROTONOSUPPORT;
670 #endif
671
672                 } else if (le64toh(o->object.size) == osize &&
673                            memcmp(o->data.payload, data, size) == 0) {
674
675                         if (ret)
676                                 *ret = o;
677
678                         if (offset)
679                                 *offset = p;
680
681                         return 1;
682                 }
683
684         next:
685                 p = le64toh(o->data.next_hash_offset);
686         }
687
688         return 0;
689 }
690
691 int journal_file_find_data_object(
692                 JournalFile *f,
693                 const void *data, uint64_t size,
694                 Object **ret, uint64_t *offset) {
695
696         uint64_t hash;
697
698         assert(f);
699         assert(data || size == 0);
700
701         hash = hash64(data, size);
702
703         return journal_file_find_data_object_with_hash(f,
704                                                        data, size, hash,
705                                                        ret, offset);
706 }
707
708 static int journal_file_append_data(
709                 JournalFile *f,
710                 const void *data, uint64_t size,
711                 Object **ret, uint64_t *offset) {
712
713         uint64_t hash, p;
714         uint64_t osize;
715         Object *o;
716         int r;
717         bool compressed = false;
718
719         assert(f);
720         assert(data || size == 0);
721
722         hash = hash64(data, size);
723
724         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
725         if (r < 0)
726                 return r;
727         else if (r > 0) {
728
729                 if (ret)
730                         *ret = o;
731
732                 if (offset)
733                         *offset = p;
734
735                 return 0;
736         }
737
738         osize = offsetof(Object, data.payload) + size;
739         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
740         if (r < 0)
741                 return r;
742
743         o->data.hash = htole64(hash);
744
745 #ifdef HAVE_XZ
746         if (f->compress &&
747             size >= COMPRESSION_SIZE_THRESHOLD) {
748                 uint64_t rsize;
749
750                 compressed = compress_blob(data, size, o->data.payload, &rsize);
751
752                 if (compressed) {
753                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
754                         o->object.flags |= OBJECT_COMPRESSED;
755
756                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
757                 }
758         }
759 #endif
760
761         if (!compressed && size > 0)
762                 memcpy(o->data.payload, data, size);
763
764         r = journal_file_link_data(f, o, p, hash);
765         if (r < 0)
766                 return r;
767
768         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
769         if (r < 0)
770                 return r;
771
772         /* The linking might have altered the window, so let's
773          * refresh our pointer */
774         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
775         if (r < 0)
776                 return r;
777
778         if (ret)
779                 *ret = o;
780
781         if (offset)
782                 *offset = p;
783
784         return 0;
785 }
786
787 uint64_t journal_file_entry_n_items(Object *o) {
788         assert(o);
789         assert(o->object.type == OBJECT_ENTRY);
790
791         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
792 }
793
794 uint64_t journal_file_entry_array_n_items(Object *o) {
795         assert(o);
796         assert(o->object.type == OBJECT_ENTRY_ARRAY);
797
798         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
799 }
800
801 uint64_t journal_file_hash_table_n_items(Object *o) {
802         assert(o);
803         assert(o->object.type == OBJECT_DATA_HASH_TABLE ||
804                o->object.type == OBJECT_FIELD_HASH_TABLE);
805
806         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
807 }
808
809 static int link_entry_into_array(JournalFile *f,
810                                  le64_t *first,
811                                  le64_t *idx,
812                                  uint64_t p) {
813         int r;
814         uint64_t n = 0, ap = 0, q, i, a, hidx;
815         Object *o;
816
817         assert(f);
818         assert(first);
819         assert(idx);
820         assert(p > 0);
821
822         a = le64toh(*first);
823         i = hidx = le64toh(*idx);
824         while (a > 0) {
825
826                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
827                 if (r < 0)
828                         return r;
829
830                 n = journal_file_entry_array_n_items(o);
831                 if (i < n) {
832                         o->entry_array.items[i] = htole64(p);
833                         *idx = htole64(hidx + 1);
834                         return 0;
835                 }
836
837                 i -= n;
838                 ap = a;
839                 a = le64toh(o->entry_array.next_entry_array_offset);
840         }
841
842         if (hidx > n)
843                 n = (hidx+1) * 2;
844         else
845                 n = n * 2;
846
847         if (n < 4)
848                 n = 4;
849
850         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
851                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
852                                        &o, &q);
853         if (r < 0)
854                 return r;
855
856         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
857         if (r < 0)
858                 return r;
859
860         o->entry_array.items[i] = htole64(p);
861
862         if (ap == 0)
863                 *first = htole64(q);
864         else {
865                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
866                 if (r < 0)
867                         return r;
868
869                 o->entry_array.next_entry_array_offset = htole64(q);
870         }
871
872         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
873                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
874
875         *idx = htole64(hidx + 1);
876
877         return 0;
878 }
879
880 static int link_entry_into_array_plus_one(JournalFile *f,
881                                           le64_t *extra,
882                                           le64_t *first,
883                                           le64_t *idx,
884                                           uint64_t p) {
885
886         int r;
887
888         assert(f);
889         assert(extra);
890         assert(first);
891         assert(idx);
892         assert(p > 0);
893
894         if (*idx == 0)
895                 *extra = htole64(p);
896         else {
897                 le64_t i;
898
899                 i = htole64(le64toh(*idx) - 1);
900                 r = link_entry_into_array(f, first, &i, p);
901                 if (r < 0)
902                         return r;
903         }
904
905         *idx = htole64(le64toh(*idx) + 1);
906         return 0;
907 }
908
909 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
910         uint64_t p;
911         int r;
912         assert(f);
913         assert(o);
914         assert(offset > 0);
915
916         p = le64toh(o->entry.items[i].object_offset);
917         if (p == 0)
918                 return -EINVAL;
919
920         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
921         if (r < 0)
922                 return r;
923
924         return link_entry_into_array_plus_one(f,
925                                               &o->data.entry_offset,
926                                               &o->data.entry_array_offset,
927                                               &o->data.n_entries,
928                                               offset);
929 }
930
931 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
932         uint64_t n, i;
933         int r;
934
935         assert(f);
936         assert(o);
937         assert(offset > 0);
938         assert(o->object.type == OBJECT_ENTRY);
939
940         __sync_synchronize();
941
942         /* Link up the entry itself */
943         r = link_entry_into_array(f,
944                                   &f->header->entry_array_offset,
945                                   &f->header->n_entries,
946                                   offset);
947         if (r < 0)
948                 return r;
949
950         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
951
952         if (f->header->head_entry_realtime == 0)
953                 f->header->head_entry_realtime = o->entry.realtime;
954
955         f->header->tail_entry_realtime = o->entry.realtime;
956         f->header->tail_entry_monotonic = o->entry.monotonic;
957
958         f->tail_entry_monotonic_valid = true;
959
960         /* Link up the items */
961         n = journal_file_entry_n_items(o);
962         for (i = 0; i < n; i++) {
963                 r = journal_file_link_entry_item(f, o, offset, i);
964                 if (r < 0)
965                         return r;
966         }
967
968         return 0;
969 }
970
971 static int journal_file_append_entry_internal(
972                 JournalFile *f,
973                 const dual_timestamp *ts,
974                 uint64_t xor_hash,
975                 const EntryItem items[], unsigned n_items,
976                 uint64_t *seqnum,
977                 Object **ret, uint64_t *offset) {
978         uint64_t np;
979         uint64_t osize;
980         Object *o;
981         int r;
982
983         assert(f);
984         assert(items || n_items == 0);
985         assert(ts);
986
987         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
988
989         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
990         if (r < 0)
991                 return r;
992
993         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
994         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
995         o->entry.realtime = htole64(ts->realtime);
996         o->entry.monotonic = htole64(ts->monotonic);
997         o->entry.xor_hash = htole64(xor_hash);
998         o->entry.boot_id = f->header->boot_id;
999
1000         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
1001         if (r < 0)
1002                 return r;
1003
1004         r = journal_file_link_entry(f, o, np);
1005         if (r < 0)
1006                 return r;
1007
1008         if (ret)
1009                 *ret = o;
1010
1011         if (offset)
1012                 *offset = np;
1013
1014         return 0;
1015 }
1016
1017 void journal_file_post_change(JournalFile *f) {
1018         assert(f);
1019
1020         /* inotify() does not receive IN_MODIFY events from file
1021          * accesses done via mmap(). After each access we hence
1022          * trigger IN_MODIFY by truncating the journal file to its
1023          * current size which triggers IN_MODIFY. */
1024
1025         __sync_synchronize();
1026
1027         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1028                 log_error("Failed to to truncate file to its own size: %m");
1029 }
1030
1031 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1032         unsigned i;
1033         EntryItem *items;
1034         int r;
1035         uint64_t xor_hash = 0;
1036         struct dual_timestamp _ts;
1037
1038         assert(f);
1039         assert(iovec || n_iovec == 0);
1040
1041         if (!f->writable)
1042                 return -EPERM;
1043
1044         if (!ts) {
1045                 dual_timestamp_get(&_ts);
1046                 ts = &_ts;
1047         }
1048
1049         if (f->tail_entry_monotonic_valid &&
1050             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1051                 return -EINVAL;
1052
1053         r = journal_file_maybe_append_tag(f, ts->realtime);
1054         if (r < 0)
1055                 return r;
1056
1057         /* alloca() can't take 0, hence let's allocate at least one */
1058         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1059
1060         for (i = 0; i < n_iovec; i++) {
1061                 uint64_t p;
1062                 Object *o;
1063
1064                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1065                 if (r < 0)
1066                         return r;
1067
1068                 xor_hash ^= le64toh(o->data.hash);
1069                 items[i].object_offset = htole64(p);
1070                 items[i].hash = o->data.hash;
1071         }
1072
1073         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1074
1075         journal_file_post_change(f);
1076
1077         return r;
1078 }
1079
1080 static int generic_array_get(JournalFile *f,
1081                              uint64_t first,
1082                              uint64_t i,
1083                              Object **ret, uint64_t *offset) {
1084
1085         Object *o;
1086         uint64_t p = 0, a;
1087         int r;
1088
1089         assert(f);
1090
1091         a = first;
1092         while (a > 0) {
1093                 uint64_t n;
1094
1095                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1096                 if (r < 0)
1097                         return r;
1098
1099                 n = journal_file_entry_array_n_items(o);
1100                 if (i < n) {
1101                         p = le64toh(o->entry_array.items[i]);
1102                         break;
1103                 }
1104
1105                 i -= n;
1106                 a = le64toh(o->entry_array.next_entry_array_offset);
1107         }
1108
1109         if (a <= 0 || p <= 0)
1110                 return 0;
1111
1112         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1113         if (r < 0)
1114                 return r;
1115
1116         if (ret)
1117                 *ret = o;
1118
1119         if (offset)
1120                 *offset = p;
1121
1122         return 1;
1123 }
1124
1125 static int generic_array_get_plus_one(JournalFile *f,
1126                                       uint64_t extra,
1127                                       uint64_t first,
1128                                       uint64_t i,
1129                                       Object **ret, uint64_t *offset) {
1130
1131         Object *o;
1132
1133         assert(f);
1134
1135         if (i == 0) {
1136                 int r;
1137
1138                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1139                 if (r < 0)
1140                         return r;
1141
1142                 if (ret)
1143                         *ret = o;
1144
1145                 if (offset)
1146                         *offset = extra;
1147
1148                 return 1;
1149         }
1150
1151         return generic_array_get(f, first, i-1, ret, offset);
1152 }
1153
1154 enum {
1155         TEST_FOUND,
1156         TEST_LEFT,
1157         TEST_RIGHT
1158 };
1159
1160 static int generic_array_bisect(JournalFile *f,
1161                                 uint64_t first,
1162                                 uint64_t n,
1163                                 uint64_t needle,
1164                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1165                                 direction_t direction,
1166                                 Object **ret,
1167                                 uint64_t *offset,
1168                                 uint64_t *idx) {
1169
1170         uint64_t a, p, t = 0, i = 0, last_p = 0;
1171         bool subtract_one = false;
1172         Object *o, *array = NULL;
1173         int r;
1174
1175         assert(f);
1176         assert(test_object);
1177
1178         a = first;
1179         while (a > 0) {
1180                 uint64_t left, right, k, lp;
1181
1182                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1183                 if (r < 0)
1184                         return r;
1185
1186                 k = journal_file_entry_array_n_items(array);
1187                 right = MIN(k, n);
1188                 if (right <= 0)
1189                         return 0;
1190
1191                 i = right - 1;
1192                 lp = p = le64toh(array->entry_array.items[i]);
1193                 if (p <= 0)
1194                         return -EBADMSG;
1195
1196                 r = test_object(f, p, needle);
1197                 if (r < 0)
1198                         return r;
1199
1200                 if (r == TEST_FOUND)
1201                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1202
1203                 if (r == TEST_RIGHT) {
1204                         left = 0;
1205                         right -= 1;
1206                         for (;;) {
1207                                 if (left == right) {
1208                                         if (direction == DIRECTION_UP)
1209                                                 subtract_one = true;
1210
1211                                         i = left;
1212                                         goto found;
1213                                 }
1214
1215                                 assert(left < right);
1216
1217                                 i = (left + right) / 2;
1218                                 p = le64toh(array->entry_array.items[i]);
1219                                 if (p <= 0)
1220                                         return -EBADMSG;
1221
1222                                 r = test_object(f, p, needle);
1223                                 if (r < 0)
1224                                         return r;
1225
1226                                 if (r == TEST_FOUND)
1227                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1228
1229                                 if (r == TEST_RIGHT)
1230                                         right = i;
1231                                 else
1232                                         left = i + 1;
1233                         }
1234                 }
1235
1236                 if (k > n) {
1237                         if (direction == DIRECTION_UP) {
1238                                 i = n;
1239                                 subtract_one = true;
1240                                 goto found;
1241                         }
1242
1243                         return 0;
1244                 }
1245
1246                 last_p = lp;
1247
1248                 n -= k;
1249                 t += k;
1250                 a = le64toh(array->entry_array.next_entry_array_offset);
1251         }
1252
1253         return 0;
1254
1255 found:
1256         if (subtract_one && t == 0 && i == 0)
1257                 return 0;
1258
1259         if (subtract_one && i == 0)
1260                 p = last_p;
1261         else if (subtract_one)
1262                 p = le64toh(array->entry_array.items[i-1]);
1263         else
1264                 p = le64toh(array->entry_array.items[i]);
1265
1266         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1267         if (r < 0)
1268                 return r;
1269
1270         if (ret)
1271                 *ret = o;
1272
1273         if (offset)
1274                 *offset = p;
1275
1276         if (idx)
1277                 *idx = t + i + (subtract_one ? -1 : 0);
1278
1279         return 1;
1280 }
1281
1282 static int generic_array_bisect_plus_one(JournalFile *f,
1283                                          uint64_t extra,
1284                                          uint64_t first,
1285                                          uint64_t n,
1286                                          uint64_t needle,
1287                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1288                                          direction_t direction,
1289                                          Object **ret,
1290                                          uint64_t *offset,
1291                                          uint64_t *idx) {
1292
1293         int r;
1294         bool step_back = false;
1295         Object *o;
1296
1297         assert(f);
1298         assert(test_object);
1299
1300         if (n <= 0)
1301                 return 0;
1302
1303         /* This bisects the array in object 'first', but first checks
1304          * an extra  */
1305         r = test_object(f, extra, needle);
1306         if (r < 0)
1307                 return r;
1308
1309         if (r == TEST_FOUND)
1310                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1311
1312         /* if we are looking with DIRECTION_UP then we need to first
1313            see if in the actual array there is a matching entry, and
1314            return the last one of that. But if there isn't any we need
1315            to return this one. Hence remember this, and return it
1316            below. */
1317         if (r == TEST_LEFT)
1318                 step_back = direction == DIRECTION_UP;
1319
1320         if (r == TEST_RIGHT) {
1321                 if (direction == DIRECTION_DOWN)
1322                         goto found;
1323                 else
1324                         return 0;
1325         }
1326
1327         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1328
1329         if (r == 0 && step_back)
1330                 goto found;
1331
1332         if (r > 0 && idx)
1333                 (*idx) ++;
1334
1335         return r;
1336
1337 found:
1338         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1339         if (r < 0)
1340                 return r;
1341
1342         if (ret)
1343                 *ret = o;
1344
1345         if (offset)
1346                 *offset = extra;
1347
1348         if (idx)
1349                 *idx = 0;
1350
1351         return 1;
1352 }
1353
1354 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1355         assert(f);
1356         assert(p > 0);
1357
1358         if (p == needle)
1359                 return TEST_FOUND;
1360         else if (p < needle)
1361                 return TEST_LEFT;
1362         else
1363                 return TEST_RIGHT;
1364 }
1365
1366 int journal_file_move_to_entry_by_offset(
1367                 JournalFile *f,
1368                 uint64_t p,
1369                 direction_t direction,
1370                 Object **ret,
1371                 uint64_t *offset) {
1372
1373         return generic_array_bisect(f,
1374                                     le64toh(f->header->entry_array_offset),
1375                                     le64toh(f->header->n_entries),
1376                                     p,
1377                                     test_object_offset,
1378                                     direction,
1379                                     ret, offset, NULL);
1380 }
1381
1382
1383 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1384         Object *o;
1385         int r;
1386
1387         assert(f);
1388         assert(p > 0);
1389
1390         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1391         if (r < 0)
1392                 return r;
1393
1394         if (le64toh(o->entry.seqnum) == needle)
1395                 return TEST_FOUND;
1396         else if (le64toh(o->entry.seqnum) < needle)
1397                 return TEST_LEFT;
1398         else
1399                 return TEST_RIGHT;
1400 }
1401
1402 int journal_file_move_to_entry_by_seqnum(
1403                 JournalFile *f,
1404                 uint64_t seqnum,
1405                 direction_t direction,
1406                 Object **ret,
1407                 uint64_t *offset) {
1408
1409         return generic_array_bisect(f,
1410                                     le64toh(f->header->entry_array_offset),
1411                                     le64toh(f->header->n_entries),
1412                                     seqnum,
1413                                     test_object_seqnum,
1414                                     direction,
1415                                     ret, offset, NULL);
1416 }
1417
1418 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1419         Object *o;
1420         int r;
1421
1422         assert(f);
1423         assert(p > 0);
1424
1425         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1426         if (r < 0)
1427                 return r;
1428
1429         if (le64toh(o->entry.realtime) == needle)
1430                 return TEST_FOUND;
1431         else if (le64toh(o->entry.realtime) < needle)
1432                 return TEST_LEFT;
1433         else
1434                 return TEST_RIGHT;
1435 }
1436
1437 int journal_file_move_to_entry_by_realtime(
1438                 JournalFile *f,
1439                 uint64_t realtime,
1440                 direction_t direction,
1441                 Object **ret,
1442                 uint64_t *offset) {
1443
1444         return generic_array_bisect(f,
1445                                     le64toh(f->header->entry_array_offset),
1446                                     le64toh(f->header->n_entries),
1447                                     realtime,
1448                                     test_object_realtime,
1449                                     direction,
1450                                     ret, offset, NULL);
1451 }
1452
1453 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1454         Object *o;
1455         int r;
1456
1457         assert(f);
1458         assert(p > 0);
1459
1460         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1461         if (r < 0)
1462                 return r;
1463
1464         if (le64toh(o->entry.monotonic) == needle)
1465                 return TEST_FOUND;
1466         else if (le64toh(o->entry.monotonic) < needle)
1467                 return TEST_LEFT;
1468         else
1469                 return TEST_RIGHT;
1470 }
1471
1472 int journal_file_move_to_entry_by_monotonic(
1473                 JournalFile *f,
1474                 sd_id128_t boot_id,
1475                 uint64_t monotonic,
1476                 direction_t direction,
1477                 Object **ret,
1478                 uint64_t *offset) {
1479
1480         char t[9+32+1] = "_BOOT_ID=";
1481         Object *o;
1482         int r;
1483
1484         assert(f);
1485
1486         sd_id128_to_string(boot_id, t + 9);
1487         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1488         if (r < 0)
1489                 return r;
1490         if (r == 0)
1491                 return -ENOENT;
1492
1493         return generic_array_bisect_plus_one(f,
1494                                              le64toh(o->data.entry_offset),
1495                                              le64toh(o->data.entry_array_offset),
1496                                              le64toh(o->data.n_entries),
1497                                              monotonic,
1498                                              test_object_monotonic,
1499                                              direction,
1500                                              ret, offset, NULL);
1501 }
1502
1503 int journal_file_next_entry(
1504                 JournalFile *f,
1505                 Object *o, uint64_t p,
1506                 direction_t direction,
1507                 Object **ret, uint64_t *offset) {
1508
1509         uint64_t i, n;
1510         int r;
1511
1512         assert(f);
1513         assert(p > 0 || !o);
1514
1515         n = le64toh(f->header->n_entries);
1516         if (n <= 0)
1517                 return 0;
1518
1519         if (!o)
1520                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1521         else {
1522                 if (o->object.type != OBJECT_ENTRY)
1523                         return -EINVAL;
1524
1525                 r = generic_array_bisect(f,
1526                                          le64toh(f->header->entry_array_offset),
1527                                          le64toh(f->header->n_entries),
1528                                          p,
1529                                          test_object_offset,
1530                                          DIRECTION_DOWN,
1531                                          NULL, NULL,
1532                                          &i);
1533                 if (r <= 0)
1534                         return r;
1535
1536                 if (direction == DIRECTION_DOWN) {
1537                         if (i >= n - 1)
1538                                 return 0;
1539
1540                         i++;
1541                 } else {
1542                         if (i <= 0)
1543                                 return 0;
1544
1545                         i--;
1546                 }
1547         }
1548
1549         /* And jump to it */
1550         return generic_array_get(f,
1551                                  le64toh(f->header->entry_array_offset),
1552                                  i,
1553                                  ret, offset);
1554 }
1555
1556 int journal_file_skip_entry(
1557                 JournalFile *f,
1558                 Object *o, uint64_t p,
1559                 int64_t skip,
1560                 Object **ret, uint64_t *offset) {
1561
1562         uint64_t i, n;
1563         int r;
1564
1565         assert(f);
1566         assert(o);
1567         assert(p > 0);
1568
1569         if (o->object.type != OBJECT_ENTRY)
1570                 return -EINVAL;
1571
1572         r = generic_array_bisect(f,
1573                                  le64toh(f->header->entry_array_offset),
1574                                  le64toh(f->header->n_entries),
1575                                  p,
1576                                  test_object_offset,
1577                                  DIRECTION_DOWN,
1578                                  NULL, NULL,
1579                                  &i);
1580         if (r <= 0)
1581                 return r;
1582
1583         /* Calculate new index */
1584         if (skip < 0) {
1585                 if ((uint64_t) -skip >= i)
1586                         i = 0;
1587                 else
1588                         i = i - (uint64_t) -skip;
1589         } else
1590                 i  += (uint64_t) skip;
1591
1592         n = le64toh(f->header->n_entries);
1593         if (n <= 0)
1594                 return -EBADMSG;
1595
1596         if (i >= n)
1597                 i = n-1;
1598
1599         return generic_array_get(f,
1600                                  le64toh(f->header->entry_array_offset),
1601                                  i,
1602                                  ret, offset);
1603 }
1604
1605 int journal_file_next_entry_for_data(
1606                 JournalFile *f,
1607                 Object *o, uint64_t p,
1608                 uint64_t data_offset,
1609                 direction_t direction,
1610                 Object **ret, uint64_t *offset) {
1611
1612         uint64_t n, i;
1613         int r;
1614         Object *d;
1615
1616         assert(f);
1617         assert(p > 0 || !o);
1618
1619         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1620         if (r < 0)
1621                 return r;
1622
1623         n = le64toh(d->data.n_entries);
1624         if (n <= 0)
1625                 return n;
1626
1627         if (!o)
1628                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1629         else {
1630                 if (o->object.type != OBJECT_ENTRY)
1631                         return -EINVAL;
1632
1633                 r = generic_array_bisect_plus_one(f,
1634                                                   le64toh(d->data.entry_offset),
1635                                                   le64toh(d->data.entry_array_offset),
1636                                                   le64toh(d->data.n_entries),
1637                                                   p,
1638                                                   test_object_offset,
1639                                                   DIRECTION_DOWN,
1640                                                   NULL, NULL,
1641                                                   &i);
1642
1643                 if (r <= 0)
1644                         return r;
1645
1646                 if (direction == DIRECTION_DOWN) {
1647                         if (i >= n - 1)
1648                                 return 0;
1649
1650                         i++;
1651                 } else {
1652                         if (i <= 0)
1653                                 return 0;
1654
1655                         i--;
1656                 }
1657
1658         }
1659
1660         return generic_array_get_plus_one(f,
1661                                           le64toh(d->data.entry_offset),
1662                                           le64toh(d->data.entry_array_offset),
1663                                           i,
1664                                           ret, offset);
1665 }
1666
1667 int journal_file_move_to_entry_by_offset_for_data(
1668                 JournalFile *f,
1669                 uint64_t data_offset,
1670                 uint64_t p,
1671                 direction_t direction,
1672                 Object **ret, uint64_t *offset) {
1673
1674         int r;
1675         Object *d;
1676
1677         assert(f);
1678
1679         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1680         if (r < 0)
1681                 return r;
1682
1683         return generic_array_bisect_plus_one(f,
1684                                              le64toh(d->data.entry_offset),
1685                                              le64toh(d->data.entry_array_offset),
1686                                              le64toh(d->data.n_entries),
1687                                              p,
1688                                              test_object_offset,
1689                                              direction,
1690                                              ret, offset, NULL);
1691 }
1692
1693 int journal_file_move_to_entry_by_monotonic_for_data(
1694                 JournalFile *f,
1695                 uint64_t data_offset,
1696                 sd_id128_t boot_id,
1697                 uint64_t monotonic,
1698                 direction_t direction,
1699                 Object **ret, uint64_t *offset) {
1700
1701         char t[9+32+1] = "_BOOT_ID=";
1702         Object *o, *d;
1703         int r;
1704         uint64_t b, z;
1705
1706         assert(f);
1707
1708         /* First, seek by time */
1709         sd_id128_to_string(boot_id, t + 9);
1710         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1711         if (r < 0)
1712                 return r;
1713         if (r == 0)
1714                 return -ENOENT;
1715
1716         r = generic_array_bisect_plus_one(f,
1717                                           le64toh(o->data.entry_offset),
1718                                           le64toh(o->data.entry_array_offset),
1719                                           le64toh(o->data.n_entries),
1720                                           monotonic,
1721                                           test_object_monotonic,
1722                                           direction,
1723                                           NULL, &z, NULL);
1724         if (r <= 0)
1725                 return r;
1726
1727         /* And now, continue seeking until we find an entry that
1728          * exists in both bisection arrays */
1729
1730         for (;;) {
1731                 Object *qo;
1732                 uint64_t p, q;
1733
1734                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1735                 if (r < 0)
1736                         return r;
1737
1738                 r = generic_array_bisect_plus_one(f,
1739                                                   le64toh(d->data.entry_offset),
1740                                                   le64toh(d->data.entry_array_offset),
1741                                                   le64toh(d->data.n_entries),
1742                                                   z,
1743                                                   test_object_offset,
1744                                                   direction,
1745                                                   NULL, &p, NULL);
1746                 if (r <= 0)
1747                         return r;
1748
1749                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1750                 if (r < 0)
1751                         return r;
1752
1753                 r = generic_array_bisect_plus_one(f,
1754                                                   le64toh(o->data.entry_offset),
1755                                                   le64toh(o->data.entry_array_offset),
1756                                                   le64toh(o->data.n_entries),
1757                                                   p,
1758                                                   test_object_offset,
1759                                                   direction,
1760                                                   &qo, &q, NULL);
1761
1762                 if (r <= 0)
1763                         return r;
1764
1765                 if (p == q) {
1766                         if (ret)
1767                                 *ret = qo;
1768                         if (offset)
1769                                 *offset = q;
1770
1771                         return 1;
1772                 }
1773
1774                 z = q;
1775         }
1776
1777         return 0;
1778 }
1779
1780 int journal_file_move_to_entry_by_seqnum_for_data(
1781                 JournalFile *f,
1782                 uint64_t data_offset,
1783                 uint64_t seqnum,
1784                 direction_t direction,
1785                 Object **ret, uint64_t *offset) {
1786
1787         Object *d;
1788         int r;
1789
1790         assert(f);
1791
1792         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1793         if (r < 0)
1794                 return r;
1795
1796         return generic_array_bisect_plus_one(f,
1797                                              le64toh(d->data.entry_offset),
1798                                              le64toh(d->data.entry_array_offset),
1799                                              le64toh(d->data.n_entries),
1800                                              seqnum,
1801                                              test_object_seqnum,
1802                                              direction,
1803                                              ret, offset, NULL);
1804 }
1805
1806 int journal_file_move_to_entry_by_realtime_for_data(
1807                 JournalFile *f,
1808                 uint64_t data_offset,
1809                 uint64_t realtime,
1810                 direction_t direction,
1811                 Object **ret, uint64_t *offset) {
1812
1813         Object *d;
1814         int r;
1815
1816         assert(f);
1817
1818         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1819         if (r < 0)
1820                 return r;
1821
1822         return generic_array_bisect_plus_one(f,
1823                                              le64toh(d->data.entry_offset),
1824                                              le64toh(d->data.entry_array_offset),
1825                                              le64toh(d->data.n_entries),
1826                                              realtime,
1827                                              test_object_realtime,
1828                                              direction,
1829                                              ret, offset, NULL);
1830 }
1831
1832 void journal_file_dump(JournalFile *f) {
1833         Object *o;
1834         int r;
1835         uint64_t p;
1836
1837         assert(f);
1838
1839         journal_file_print_header(f);
1840
1841         p = le64toh(f->header->header_size);
1842         while (p != 0) {
1843                 r = journal_file_move_to_object(f, -1, p, &o);
1844                 if (r < 0)
1845                         goto fail;
1846
1847                 switch (o->object.type) {
1848
1849                 case OBJECT_UNUSED:
1850                         printf("Type: OBJECT_UNUSED\n");
1851                         break;
1852
1853                 case OBJECT_DATA:
1854                         printf("Type: OBJECT_DATA\n");
1855                         break;
1856
1857                 case OBJECT_ENTRY:
1858                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1859                                (unsigned long long) le64toh(o->entry.seqnum),
1860                                (unsigned long long) le64toh(o->entry.monotonic),
1861                                (unsigned long long) le64toh(o->entry.realtime));
1862                         break;
1863
1864                 case OBJECT_FIELD_HASH_TABLE:
1865                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1866                         break;
1867
1868                 case OBJECT_DATA_HASH_TABLE:
1869                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1870                         break;
1871
1872                 case OBJECT_ENTRY_ARRAY:
1873                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1874                         break;
1875
1876                 case OBJECT_TAG:
1877                         printf("Type: OBJECT_TAG %llu\n",
1878                                (unsigned long long) le64toh(o->tag.seqnum));
1879                         break;
1880                 }
1881
1882                 if (o->object.flags & OBJECT_COMPRESSED)
1883                         printf("Flags: COMPRESSED\n");
1884
1885                 if (p == le64toh(f->header->tail_object_offset))
1886                         p = 0;
1887                 else
1888                         p = p + ALIGN64(le64toh(o->object.size));
1889         }
1890
1891         return;
1892 fail:
1893         log_error("File corrupt");
1894 }
1895
1896 void journal_file_print_header(JournalFile *f) {
1897         char a[33], b[33], c[33];
1898         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1899
1900         assert(f);
1901
1902         printf("File Path: %s\n"
1903                "File ID: %s\n"
1904                "Machine ID: %s\n"
1905                "Boot ID: %s\n"
1906                "Sequential Number ID: %s\n"
1907                "State: %s\n"
1908                "Compatible Flags:%s%s\n"
1909                "Incompatible Flags:%s%s\n"
1910                "Header size: %llu\n"
1911                "Arena size: %llu\n"
1912                "Data Hash Table Size: %llu\n"
1913                "Field Hash Table Size: %llu\n"
1914                "Rotate Suggested: %s\n"
1915                "Head Sequential Number: %llu\n"
1916                "Tail Sequential Number: %llu\n"
1917                "Head Realtime Timestamp: %s\n"
1918                "Tail Realtime Timestamp: %s\n"
1919                "Objects: %llu\n"
1920                "Entry Objects: %llu\n",
1921                f->path,
1922                sd_id128_to_string(f->header->file_id, a),
1923                sd_id128_to_string(f->header->machine_id, b),
1924                sd_id128_to_string(f->header->boot_id, c),
1925                sd_id128_to_string(f->header->seqnum_id, c),
1926                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1927                f->header->state == STATE_ONLINE ? "ONLINE" :
1928                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1929                (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1930                (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1931                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1932                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1933                (unsigned long long) le64toh(f->header->header_size),
1934                (unsigned long long) le64toh(f->header->arena_size),
1935                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1936                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1937                yes_no(journal_file_rotate_suggested(f)),
1938                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1939                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1940                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1941                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1942                (unsigned long long) le64toh(f->header->n_objects),
1943                (unsigned long long) le64toh(f->header->n_entries));
1944
1945         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1946                 printf("Data Objects: %llu\n"
1947                        "Data Hash Table Fill: %.1f%%\n",
1948                        (unsigned long long) le64toh(f->header->n_data),
1949                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1950
1951         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1952                 printf("Field Objects: %llu\n"
1953                        "Field Hash Table Fill: %.1f%%\n",
1954                        (unsigned long long) le64toh(f->header->n_fields),
1955                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1956
1957         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1958                 printf("Tag Objects: %llu\n",
1959                        (unsigned long long) le64toh(f->header->n_tags));
1960         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1961                 printf("Entry Array Objects: %llu\n",
1962                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1963 }
1964
1965 int journal_file_open(
1966                 const char *fname,
1967                 int flags,
1968                 mode_t mode,
1969                 bool compress,
1970                 bool seal,
1971                 JournalMetrics *metrics,
1972                 MMapCache *mmap_cache,
1973                 JournalFile *template,
1974                 JournalFile **ret) {
1975
1976         JournalFile *f;
1977         int r;
1978         bool newly_created = false;
1979
1980         assert(fname);
1981
1982         if ((flags & O_ACCMODE) != O_RDONLY &&
1983             (flags & O_ACCMODE) != O_RDWR)
1984                 return -EINVAL;
1985
1986         if (!endswith(fname, ".journal") &&
1987             !endswith(fname, ".journal~"))
1988                 return -EINVAL;
1989
1990         f = new0(JournalFile, 1);
1991         if (!f)
1992                 return -ENOMEM;
1993
1994         f->fd = -1;
1995         f->mode = mode;
1996
1997         f->flags = flags;
1998         f->prot = prot_from_flags(flags);
1999         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2000         f->compress = compress;
2001         f->seal = seal;
2002
2003         if (mmap_cache)
2004                 f->mmap = mmap_cache_ref(mmap_cache);
2005         else {
2006                 /* One context for each type, plus the zeroth catchall
2007                  * context. One fd for the file plus one for each type
2008                  * (which we need during verification */
2009                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2010                 if (!f->mmap) {
2011                         r = -ENOMEM;
2012                         goto fail;
2013                 }
2014         }
2015
2016         f->path = strdup(fname);
2017         if (!f->path) {
2018                 r = -ENOMEM;
2019                 goto fail;
2020         }
2021
2022         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2023         if (f->fd < 0) {
2024                 r = -errno;
2025                 goto fail;
2026         }
2027
2028         if (fstat(f->fd, &f->last_stat) < 0) {
2029                 r = -errno;
2030                 goto fail;
2031         }
2032
2033         if (f->last_stat.st_size == 0 && f->writable) {
2034                 newly_created = true;
2035
2036                 /* Try to load the FSPRG state, and if we can't, then
2037                  * just don't do sealing */
2038                 r = journal_file_fss_load(f);
2039                 if (r < 0)
2040                         f->seal = false;
2041
2042                 r = journal_file_init_header(f, template);
2043                 if (r < 0)
2044                         goto fail;
2045
2046                 if (fstat(f->fd, &f->last_stat) < 0) {
2047                         r = -errno;
2048                         goto fail;
2049                 }
2050         }
2051
2052         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2053                 r = -EIO;
2054                 goto fail;
2055         }
2056
2057         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2058         if (f->header == MAP_FAILED) {
2059                 f->header = NULL;
2060                 r = -errno;
2061                 goto fail;
2062         }
2063
2064         if (!newly_created) {
2065                 r = journal_file_verify_header(f);
2066                 if (r < 0)
2067                         goto fail;
2068         }
2069
2070         if (!newly_created && f->writable) {
2071                 r = journal_file_fss_load(f);
2072                 if (r < 0)
2073                         goto fail;
2074         }
2075
2076         if (f->writable) {
2077                 if (metrics) {
2078                         journal_default_metrics(metrics, f->fd);
2079                         f->metrics = *metrics;
2080                 } else if (template)
2081                         f->metrics = template->metrics;
2082
2083                 r = journal_file_refresh_header(f);
2084                 if (r < 0)
2085                         goto fail;
2086         }
2087
2088         r = journal_file_hmac_setup(f);
2089         if (r < 0)
2090                 goto fail;
2091
2092         if (newly_created) {
2093                 r = journal_file_setup_field_hash_table(f);
2094                 if (r < 0)
2095                         goto fail;
2096
2097                 r = journal_file_setup_data_hash_table(f);
2098                 if (r < 0)
2099                         goto fail;
2100
2101                 r = journal_file_append_first_tag(f);
2102                 if (r < 0)
2103                         goto fail;
2104         }
2105
2106         r = journal_file_map_field_hash_table(f);
2107         if (r < 0)
2108                 goto fail;
2109
2110         r = journal_file_map_data_hash_table(f);
2111         if (r < 0)
2112                 goto fail;
2113
2114         if (ret)
2115                 *ret = f;
2116
2117         return 0;
2118
2119 fail:
2120         journal_file_close(f);
2121
2122         return r;
2123 }
2124
2125 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2126         char *p;
2127         size_t l;
2128         JournalFile *old_file, *new_file = NULL;
2129         int r;
2130
2131         assert(f);
2132         assert(*f);
2133
2134         old_file = *f;
2135
2136         if (!old_file->writable)
2137                 return -EINVAL;
2138
2139         if (!endswith(old_file->path, ".journal"))
2140                 return -EINVAL;
2141
2142         l = strlen(old_file->path);
2143
2144         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2145         if (!p)
2146                 return -ENOMEM;
2147
2148         memcpy(p, old_file->path, l - 8);
2149         p[l-8] = '@';
2150         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2151         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2152                  "-%016llx-%016llx.journal",
2153                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2154                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2155
2156         r = rename(old_file->path, p);
2157         free(p);
2158
2159         if (r < 0)
2160                 return -errno;
2161
2162         old_file->header->state = STATE_ARCHIVED;
2163
2164         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2165         journal_file_close(old_file);
2166
2167         *f = new_file;
2168         return r;
2169 }
2170
2171 int journal_file_open_reliably(
2172                 const char *fname,
2173                 int flags,
2174                 mode_t mode,
2175                 bool compress,
2176                 bool seal,
2177                 JournalMetrics *metrics,
2178                 MMapCache *mmap_cache,
2179                 JournalFile *template,
2180                 JournalFile **ret) {
2181
2182         int r;
2183         size_t l;
2184         char *p;
2185
2186         r = journal_file_open(fname, flags, mode, compress, seal,
2187                               metrics, mmap_cache, template, ret);
2188         if (r != -EBADMSG && /* corrupted */
2189             r != -ENODATA && /* truncated */
2190             r != -EHOSTDOWN && /* other machine */
2191             r != -EPROTONOSUPPORT && /* incompatible feature */
2192             r != -EBUSY && /* unclean shutdown */
2193             r != -ESHUTDOWN /* already archived */)
2194                 return r;
2195
2196         if ((flags & O_ACCMODE) == O_RDONLY)
2197                 return r;
2198
2199         if (!(flags & O_CREAT))
2200                 return r;
2201
2202         if (!endswith(fname, ".journal"))
2203                 return r;
2204
2205         /* The file is corrupted. Rotate it away and try it again (but only once) */
2206
2207         l = strlen(fname);
2208         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2209                      (int) (l-8), fname,
2210                      (unsigned long long) now(CLOCK_REALTIME),
2211                      random_ull()) < 0)
2212                 return -ENOMEM;
2213
2214         r = rename(fname, p);
2215         free(p);
2216         if (r < 0)
2217                 return -errno;
2218
2219         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2220
2221         return journal_file_open(fname, flags, mode, compress, seal,
2222                                  metrics, mmap_cache, template, ret);
2223 }
2224
2225
2226 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2227         uint64_t i, n;
2228         uint64_t q, xor_hash = 0;
2229         int r;
2230         EntryItem *items;
2231         dual_timestamp ts;
2232
2233         assert(from);
2234         assert(to);
2235         assert(o);
2236         assert(p);
2237
2238         if (!to->writable)
2239                 return -EPERM;
2240
2241         ts.monotonic = le64toh(o->entry.monotonic);
2242         ts.realtime = le64toh(o->entry.realtime);
2243
2244         if (to->tail_entry_monotonic_valid &&
2245             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2246                 return -EINVAL;
2247
2248         n = journal_file_entry_n_items(o);
2249         items = alloca(sizeof(EntryItem) * n);
2250
2251         for (i = 0; i < n; i++) {
2252                 uint64_t l, h;
2253                 le64_t le_hash;
2254                 size_t t;
2255                 void *data;
2256                 Object *u;
2257
2258                 q = le64toh(o->entry.items[i].object_offset);
2259                 le_hash = o->entry.items[i].hash;
2260
2261                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2262                 if (r < 0)
2263                         return r;
2264
2265                 if (le_hash != o->data.hash)
2266                         return -EBADMSG;
2267
2268                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2269                 t = (size_t) l;
2270
2271                 /* We hit the limit on 32bit machines */
2272                 if ((uint64_t) t != l)
2273                         return -E2BIG;
2274
2275                 if (o->object.flags & OBJECT_COMPRESSED) {
2276 #ifdef HAVE_XZ
2277                         uint64_t rsize;
2278
2279                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2280                                 return -EBADMSG;
2281
2282                         data = from->compress_buffer;
2283                         l = rsize;
2284 #else
2285                         return -EPROTONOSUPPORT;
2286 #endif
2287                 } else
2288                         data = o->data.payload;
2289
2290                 r = journal_file_append_data(to, data, l, &u, &h);
2291                 if (r < 0)
2292                         return r;
2293
2294                 xor_hash ^= le64toh(u->data.hash);
2295                 items[i].object_offset = htole64(h);
2296                 items[i].hash = u->data.hash;
2297
2298                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2299                 if (r < 0)
2300                         return r;
2301         }
2302
2303         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2304 }
2305
2306 void journal_default_metrics(JournalMetrics *m, int fd) {
2307         uint64_t fs_size = 0;
2308         struct statvfs ss;
2309         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2310
2311         assert(m);
2312         assert(fd >= 0);
2313
2314         if (fstatvfs(fd, &ss) >= 0)
2315                 fs_size = ss.f_frsize * ss.f_blocks;
2316
2317         if (m->max_use == (uint64_t) -1) {
2318
2319                 if (fs_size > 0) {
2320                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2321
2322                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2323                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2324
2325                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2326                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2327                 } else
2328                         m->max_use = DEFAULT_MAX_USE_LOWER;
2329         } else {
2330                 m->max_use = PAGE_ALIGN(m->max_use);
2331
2332                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2333                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2334         }
2335
2336         if (m->max_size == (uint64_t) -1) {
2337                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2338
2339                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2340                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2341         } else
2342                 m->max_size = PAGE_ALIGN(m->max_size);
2343
2344         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2345                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2346
2347         if (m->max_size*2 > m->max_use)
2348                 m->max_use = m->max_size*2;
2349
2350         if (m->min_size == (uint64_t) -1)
2351                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2352         else {
2353                 m->min_size = PAGE_ALIGN(m->min_size);
2354
2355                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2356                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2357
2358                 if (m->min_size > m->max_size)
2359                         m->max_size = m->min_size;
2360         }
2361
2362         if (m->keep_free == (uint64_t) -1) {
2363
2364                 if (fs_size > 0) {
2365                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2366
2367                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2368                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2369
2370                 } else
2371                         m->keep_free = DEFAULT_KEEP_FREE;
2372         }
2373
2374         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2375                  format_bytes(a, sizeof(a), m->max_use),
2376                  format_bytes(b, sizeof(b), m->max_size),
2377                  format_bytes(c, sizeof(c), m->min_size),
2378                  format_bytes(d, sizeof(d), m->keep_free));
2379 }
2380
2381 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2382         assert(f);
2383         assert(from || to);
2384
2385         if (from) {
2386                 if (f->header->head_entry_realtime == 0)
2387                         return -ENOENT;
2388
2389                 *from = le64toh(f->header->head_entry_realtime);
2390         }
2391
2392         if (to) {
2393                 if (f->header->tail_entry_realtime == 0)
2394                         return -ENOENT;
2395
2396                 *to = le64toh(f->header->tail_entry_realtime);
2397         }
2398
2399         return 1;
2400 }
2401
2402 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2403         char t[9+32+1] = "_BOOT_ID=";
2404         Object *o;
2405         uint64_t p;
2406         int r;
2407
2408         assert(f);
2409         assert(from || to);
2410
2411         sd_id128_to_string(boot_id, t + 9);
2412
2413         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2414         if (r <= 0)
2415                 return r;
2416
2417         if (le64toh(o->data.n_entries) <= 0)
2418                 return 0;
2419
2420         if (from) {
2421                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2422                 if (r < 0)
2423                         return r;
2424
2425                 *from = le64toh(o->entry.monotonic);
2426         }
2427
2428         if (to) {
2429                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2430                 if (r < 0)
2431                         return r;
2432
2433                 r = generic_array_get_plus_one(f,
2434                                                le64toh(o->data.entry_offset),
2435                                                le64toh(o->data.entry_array_offset),
2436                                                le64toh(o->data.n_entries)-1,
2437                                                &o, NULL);
2438                 if (r <= 0)
2439                         return r;
2440
2441                 *to = le64toh(o->entry.monotonic);
2442         }
2443
2444         return 1;
2445 }
2446
2447 bool journal_file_rotate_suggested(JournalFile *f) {
2448         assert(f);
2449
2450         /* If we gained new header fields we gained new features,
2451          * hence suggest a rotation */
2452         if (le64toh(f->header->header_size) < sizeof(Header)) {
2453                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2454                 return true;
2455         }
2456
2457         /* Let's check if the hash tables grew over a certain fill
2458          * level (75%, borrowing this value from Java's hash table
2459          * implementation), and if so suggest a rotation. To calculate
2460          * the fill level we need the n_data field, which only exists
2461          * in newer versions. */
2462
2463         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2464                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2465                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2466                                   f->path,
2467                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2468                                   (unsigned long long) le64toh(f->header->n_data),
2469                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2470                                   (unsigned long long) (f->last_stat.st_size),
2471                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2472                         return true;
2473                 }
2474
2475         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2476                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2477                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2478                                   f->path,
2479                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2480                                   (unsigned long long) le64toh(f->header->n_fields),
2481                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2482                         return true;
2483                 }
2484
2485         return false;
2486 }