chiark / gitweb /
journal: fix verification without key
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->seal && f->writable)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fss_file)
100                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101         else if (f->fsprg_state)
102                 free(f->fsprg_state);
103
104         free(f->fsprg_seed);
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         if (f->header->state >= _STATE_MAX)
207                 return -EBADMSG;
208
209         /* The first addition was n_data, so check that we are at least this large */
210         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
211                 return -EBADMSG;
212
213         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214                 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
215                 return -EBADMSG;
216
217         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218                 return -ENODATA;
219
220         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221                 return -ENODATA;
222
223         if (!VALID64(f->header->data_hash_table_offset) ||
224             !VALID64(f->header->field_hash_table_offset) ||
225             !VALID64(f->header->tail_object_offset) ||
226             !VALID64(f->header->entry_array_offset))
227                 return -ENODATA;
228
229         if (f->writable) {
230                 uint8_t state;
231                 sd_id128_t machine_id;
232                 int r;
233
234                 r = sd_id128_get_machine(&machine_id);
235                 if (r < 0)
236                         return r;
237
238                 if (!sd_id128_equal(machine_id, f->header->machine_id))
239                         return -EHOSTDOWN;
240
241                 state = f->header->state;
242
243                 if (state == STATE_ONLINE) {
244                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245                         return -EBUSY;
246                 } else if (state == STATE_ARCHIVED)
247                         return -ESHUTDOWN;
248                 else if (state != STATE_OFFLINE) {
249                         log_debug("Journal file %s has unknown state %u.", f->path, state);
250                         return -EBUSY;
251                 }
252         }
253
254         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
255
256         if (f->writable)
257                 f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
258
259         return 0;
260 }
261
262 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
263         uint64_t old_size, new_size;
264         int r;
265
266         assert(f);
267
268         /* We assume that this file is not sparse, and we know that
269          * for sure, since we always call posix_fallocate()
270          * ourselves */
271
272         old_size =
273                 le64toh(f->header->header_size) +
274                 le64toh(f->header->arena_size);
275
276         new_size = PAGE_ALIGN(offset + size);
277         if (new_size < le64toh(f->header->header_size))
278                 new_size = le64toh(f->header->header_size);
279
280         if (new_size <= old_size)
281                 return 0;
282
283         if (f->metrics.max_size > 0 &&
284             new_size > f->metrics.max_size)
285                 return -E2BIG;
286
287         if (new_size > f->metrics.min_size &&
288             f->metrics.keep_free > 0) {
289                 struct statvfs svfs;
290
291                 if (fstatvfs(f->fd, &svfs) >= 0) {
292                         uint64_t available;
293
294                         available = svfs.f_bfree * svfs.f_bsize;
295
296                         if (available >= f->metrics.keep_free)
297                                 available -= f->metrics.keep_free;
298                         else
299                                 available = 0;
300
301                         if (new_size - old_size > available)
302                                 return -E2BIG;
303                 }
304         }
305
306         /* Note that the glibc fallocate() fallback is very
307            inefficient, hence we try to minimize the allocation area
308            as we can. */
309         r = posix_fallocate(f->fd, old_size, new_size - old_size);
310         if (r != 0)
311                 return -r;
312
313         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
314
315         if (fstat(f->fd, &f->last_stat) < 0)
316                 return -errno;
317
318         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
319
320         return 0;
321 }
322
323 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
324         assert(f);
325         assert(ret);
326
327         /* Avoid SIGBUS on invalid accesses */
328         if (offset + size > (uint64_t) f->last_stat.st_size) {
329                 /* Hmm, out of range? Let's refresh the fstat() data
330                  * first, before we trust that check. */
331
332                 if (fstat(f->fd, &f->last_stat) < 0 ||
333                     offset + size > (uint64_t) f->last_stat.st_size)
334                         return -EADDRNOTAVAIL;
335         }
336
337         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
338 }
339
340 static uint64_t minimum_header_size(Object *o) {
341
342         static uint64_t table[] = {
343                 [OBJECT_DATA] = sizeof(DataObject),
344                 [OBJECT_FIELD] = sizeof(FieldObject),
345                 [OBJECT_ENTRY] = sizeof(EntryObject),
346                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
347                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
348                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
349                 [OBJECT_TAG] = sizeof(TagObject),
350         };
351
352         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
353                 return sizeof(ObjectHeader);
354
355         return table[o->object.type];
356 }
357
358 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
359         int r;
360         void *t;
361         Object *o;
362         uint64_t s;
363         unsigned context;
364
365         assert(f);
366         assert(ret);
367
368         /* Objects may only be located at multiple of 64 bit */
369         if (!VALID64(offset))
370                 return -EFAULT;
371
372         /* One context for each type, plus one catch-all for the rest */
373         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
374
375         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
376         if (r < 0)
377                 return r;
378
379         o = (Object*) t;
380         s = le64toh(o->object.size);
381
382         if (s < sizeof(ObjectHeader))
383                 return -EBADMSG;
384
385         if (o->object.type <= OBJECT_UNUSED)
386                 return -EBADMSG;
387
388         if (s < minimum_header_size(o))
389                 return -EBADMSG;
390
391         if (type >= 0 && o->object.type != type)
392                 return -EBADMSG;
393
394         if (s > sizeof(ObjectHeader)) {
395                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
396                 if (r < 0)
397                         return r;
398
399                 o = (Object*) t;
400         }
401
402         *ret = o;
403         return 0;
404 }
405
406 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
407         uint64_t r;
408
409         assert(f);
410
411         r = le64toh(f->header->tail_entry_seqnum) + 1;
412
413         if (seqnum) {
414                 /* If an external seqnum counter was passed, we update
415                  * both the local and the external one, and set it to
416                  * the maximum of both */
417
418                 if (*seqnum + 1 > r)
419                         r = *seqnum + 1;
420
421                 *seqnum = r;
422         }
423
424         f->header->tail_entry_seqnum = htole64(r);
425
426         if (f->header->head_entry_seqnum == 0)
427                 f->header->head_entry_seqnum = htole64(r);
428
429         return r;
430 }
431
432 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
433         int r;
434         uint64_t p;
435         Object *tail, *o;
436         void *t;
437
438         assert(f);
439         assert(type > 0 && type < _OBJECT_TYPE_MAX);
440         assert(size >= sizeof(ObjectHeader));
441         assert(offset);
442         assert(ret);
443
444         p = le64toh(f->header->tail_object_offset);
445         if (p == 0)
446                 p = le64toh(f->header->header_size);
447         else {
448                 r = journal_file_move_to_object(f, -1, p, &tail);
449                 if (r < 0)
450                         return r;
451
452                 p += ALIGN64(le64toh(tail->object.size));
453         }
454
455         r = journal_file_allocate(f, p, size);
456         if (r < 0)
457                 return r;
458
459         r = journal_file_move_to(f, type, p, size, &t);
460         if (r < 0)
461                 return r;
462
463         o = (Object*) t;
464
465         zero(o->object);
466         o->object.type = type;
467         o->object.size = htole64(size);
468
469         f->header->tail_object_offset = htole64(p);
470         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
471
472         *ret = o;
473         *offset = p;
474
475         return 0;
476 }
477
478 static int journal_file_setup_data_hash_table(JournalFile *f) {
479         uint64_t s, p;
480         Object *o;
481         int r;
482
483         assert(f);
484
485         /* We estimate that we need 1 hash table entry per 768 of
486            journal file and we want to make sure we never get beyond
487            75% fill level. Calculate the hash table size for the
488            maximum file size based on these metrics. */
489
490         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
491         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
492                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
493
494         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
495
496         r = journal_file_append_object(f,
497                                        OBJECT_DATA_HASH_TABLE,
498                                        offsetof(Object, hash_table.items) + s,
499                                        &o, &p);
500         if (r < 0)
501                 return r;
502
503         memset(o->hash_table.items, 0, s);
504
505         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
506         f->header->data_hash_table_size = htole64(s);
507
508         return 0;
509 }
510
511 static int journal_file_setup_field_hash_table(JournalFile *f) {
512         uint64_t s, p;
513         Object *o;
514         int r;
515
516         assert(f);
517
518         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
519         r = journal_file_append_object(f,
520                                        OBJECT_FIELD_HASH_TABLE,
521                                        offsetof(Object, hash_table.items) + s,
522                                        &o, &p);
523         if (r < 0)
524                 return r;
525
526         memset(o->hash_table.items, 0, s);
527
528         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
529         f->header->field_hash_table_size = htole64(s);
530
531         return 0;
532 }
533
534 static int journal_file_map_data_hash_table(JournalFile *f) {
535         uint64_t s, p;
536         void *t;
537         int r;
538
539         assert(f);
540
541         p = le64toh(f->header->data_hash_table_offset);
542         s = le64toh(f->header->data_hash_table_size);
543
544         r = journal_file_move_to(f,
545                                  OBJECT_DATA_HASH_TABLE,
546                                  p, s,
547                                  &t);
548         if (r < 0)
549                 return r;
550
551         f->data_hash_table = t;
552         return 0;
553 }
554
555 static int journal_file_map_field_hash_table(JournalFile *f) {
556         uint64_t s, p;
557         void *t;
558         int r;
559
560         assert(f);
561
562         p = le64toh(f->header->field_hash_table_offset);
563         s = le64toh(f->header->field_hash_table_size);
564
565         r = journal_file_move_to(f,
566                                  OBJECT_FIELD_HASH_TABLE,
567                                  p, s,
568                                  &t);
569         if (r < 0)
570                 return r;
571
572         f->field_hash_table = t;
573         return 0;
574 }
575
576 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
577         uint64_t p, h;
578         int r;
579
580         assert(f);
581         assert(o);
582         assert(offset > 0);
583         assert(o->object.type == OBJECT_DATA);
584
585         /* This might alter the window we are looking at */
586
587         o->data.next_hash_offset = o->data.next_field_offset = 0;
588         o->data.entry_offset = o->data.entry_array_offset = 0;
589         o->data.n_entries = 0;
590
591         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
592         p = le64toh(f->data_hash_table[h].tail_hash_offset);
593         if (p == 0) {
594                 /* Only entry in the hash table is easy */
595                 f->data_hash_table[h].head_hash_offset = htole64(offset);
596         } else {
597                 /* Move back to the previous data object, to patch in
598                  * pointer */
599
600                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
601                 if (r < 0)
602                         return r;
603
604                 o->data.next_hash_offset = htole64(offset);
605         }
606
607         f->data_hash_table[h].tail_hash_offset = htole64(offset);
608
609         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
610                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
611
612         return 0;
613 }
614
615 int journal_file_find_data_object_with_hash(
616                 JournalFile *f,
617                 const void *data, uint64_t size, uint64_t hash,
618                 Object **ret, uint64_t *offset) {
619
620         uint64_t p, osize, h;
621         int r;
622
623         assert(f);
624         assert(data || size == 0);
625
626         osize = offsetof(Object, data.payload) + size;
627
628         if (f->header->data_hash_table_size == 0)
629                 return -EBADMSG;
630
631         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
632         p = le64toh(f->data_hash_table[h].head_hash_offset);
633
634         while (p > 0) {
635                 Object *o;
636
637                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
638                 if (r < 0)
639                         return r;
640
641                 if (le64toh(o->data.hash) != hash)
642                         goto next;
643
644                 if (o->object.flags & OBJECT_COMPRESSED) {
645 #ifdef HAVE_XZ
646                         uint64_t l, rsize;
647
648                         l = le64toh(o->object.size);
649                         if (l <= offsetof(Object, data.payload))
650                                 return -EBADMSG;
651
652                         l -= offsetof(Object, data.payload);
653
654                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
655                                 return -EBADMSG;
656
657                         if (rsize == size &&
658                             memcmp(f->compress_buffer, data, size) == 0) {
659
660                                 if (ret)
661                                         *ret = o;
662
663                                 if (offset)
664                                         *offset = p;
665
666                                 return 1;
667                         }
668 #else
669                         return -EPROTONOSUPPORT;
670 #endif
671
672                 } else if (le64toh(o->object.size) == osize &&
673                            memcmp(o->data.payload, data, size) == 0) {
674
675                         if (ret)
676                                 *ret = o;
677
678                         if (offset)
679                                 *offset = p;
680
681                         return 1;
682                 }
683
684         next:
685                 p = le64toh(o->data.next_hash_offset);
686         }
687
688         return 0;
689 }
690
691 int journal_file_find_data_object(
692                 JournalFile *f,
693                 const void *data, uint64_t size,
694                 Object **ret, uint64_t *offset) {
695
696         uint64_t hash;
697
698         assert(f);
699         assert(data || size == 0);
700
701         hash = hash64(data, size);
702
703         return journal_file_find_data_object_with_hash(f,
704                                                        data, size, hash,
705                                                        ret, offset);
706 }
707
708 static int journal_file_append_data(
709                 JournalFile *f,
710                 const void *data, uint64_t size,
711                 Object **ret, uint64_t *offset) {
712
713         uint64_t hash, p;
714         uint64_t osize;
715         Object *o;
716         int r;
717         bool compressed = false;
718
719         assert(f);
720         assert(data || size == 0);
721
722         hash = hash64(data, size);
723
724         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
725         if (r < 0)
726                 return r;
727         else if (r > 0) {
728
729                 if (ret)
730                         *ret = o;
731
732                 if (offset)
733                         *offset = p;
734
735                 return 0;
736         }
737
738         osize = offsetof(Object, data.payload) + size;
739         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
740         if (r < 0)
741                 return r;
742
743         o->data.hash = htole64(hash);
744
745 #ifdef HAVE_XZ
746         if (f->compress &&
747             size >= COMPRESSION_SIZE_THRESHOLD) {
748                 uint64_t rsize;
749
750                 compressed = compress_blob(data, size, o->data.payload, &rsize);
751
752                 if (compressed) {
753                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
754                         o->object.flags |= OBJECT_COMPRESSED;
755
756                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
757                 }
758         }
759 #endif
760
761         if (!compressed && size > 0)
762                 memcpy(o->data.payload, data, size);
763
764         r = journal_file_link_data(f, o, p, hash);
765         if (r < 0)
766                 return r;
767
768         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
769         if (r < 0)
770                 return r;
771
772         /* The linking might have altered the window, so let's
773          * refresh our pointer */
774         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
775         if (r < 0)
776                 return r;
777
778         if (ret)
779                 *ret = o;
780
781         if (offset)
782                 *offset = p;
783
784         return 0;
785 }
786
787 uint64_t journal_file_entry_n_items(Object *o) {
788         assert(o);
789         assert(o->object.type == OBJECT_ENTRY);
790
791         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
792 }
793
794 uint64_t journal_file_entry_array_n_items(Object *o) {
795         assert(o);
796         assert(o->object.type == OBJECT_ENTRY_ARRAY);
797
798         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
799 }
800
801 static int link_entry_into_array(JournalFile *f,
802                                  le64_t *first,
803                                  le64_t *idx,
804                                  uint64_t p) {
805         int r;
806         uint64_t n = 0, ap = 0, q, i, a, hidx;
807         Object *o;
808
809         assert(f);
810         assert(first);
811         assert(idx);
812         assert(p > 0);
813
814         a = le64toh(*first);
815         i = hidx = le64toh(*idx);
816         while (a > 0) {
817
818                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
819                 if (r < 0)
820                         return r;
821
822                 n = journal_file_entry_array_n_items(o);
823                 if (i < n) {
824                         o->entry_array.items[i] = htole64(p);
825                         *idx = htole64(hidx + 1);
826                         return 0;
827                 }
828
829                 i -= n;
830                 ap = a;
831                 a = le64toh(o->entry_array.next_entry_array_offset);
832         }
833
834         if (hidx > n)
835                 n = (hidx+1) * 2;
836         else
837                 n = n * 2;
838
839         if (n < 4)
840                 n = 4;
841
842         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
843                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
844                                        &o, &q);
845         if (r < 0)
846                 return r;
847
848         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
849         if (r < 0)
850                 return r;
851
852         o->entry_array.items[i] = htole64(p);
853
854         if (ap == 0)
855                 *first = htole64(q);
856         else {
857                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
858                 if (r < 0)
859                         return r;
860
861                 o->entry_array.next_entry_array_offset = htole64(q);
862         }
863
864         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
865                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
866
867         *idx = htole64(hidx + 1);
868
869         return 0;
870 }
871
872 static int link_entry_into_array_plus_one(JournalFile *f,
873                                           le64_t *extra,
874                                           le64_t *first,
875                                           le64_t *idx,
876                                           uint64_t p) {
877
878         int r;
879
880         assert(f);
881         assert(extra);
882         assert(first);
883         assert(idx);
884         assert(p > 0);
885
886         if (*idx == 0)
887                 *extra = htole64(p);
888         else {
889                 le64_t i;
890
891                 i = htole64(le64toh(*idx) - 1);
892                 r = link_entry_into_array(f, first, &i, p);
893                 if (r < 0)
894                         return r;
895         }
896
897         *idx = htole64(le64toh(*idx) + 1);
898         return 0;
899 }
900
901 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
902         uint64_t p;
903         int r;
904         assert(f);
905         assert(o);
906         assert(offset > 0);
907
908         p = le64toh(o->entry.items[i].object_offset);
909         if (p == 0)
910                 return -EINVAL;
911
912         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
913         if (r < 0)
914                 return r;
915
916         return link_entry_into_array_plus_one(f,
917                                               &o->data.entry_offset,
918                                               &o->data.entry_array_offset,
919                                               &o->data.n_entries,
920                                               offset);
921 }
922
923 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
924         uint64_t n, i;
925         int r;
926
927         assert(f);
928         assert(o);
929         assert(offset > 0);
930         assert(o->object.type == OBJECT_ENTRY);
931
932         __sync_synchronize();
933
934         /* Link up the entry itself */
935         r = link_entry_into_array(f,
936                                   &f->header->entry_array_offset,
937                                   &f->header->n_entries,
938                                   offset);
939         if (r < 0)
940                 return r;
941
942         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
943
944         if (f->header->head_entry_realtime == 0)
945                 f->header->head_entry_realtime = o->entry.realtime;
946
947         f->header->tail_entry_realtime = o->entry.realtime;
948         f->header->tail_entry_monotonic = o->entry.monotonic;
949
950         f->tail_entry_monotonic_valid = true;
951
952         /* Link up the items */
953         n = journal_file_entry_n_items(o);
954         for (i = 0; i < n; i++) {
955                 r = journal_file_link_entry_item(f, o, offset, i);
956                 if (r < 0)
957                         return r;
958         }
959
960         return 0;
961 }
962
963 static int journal_file_append_entry_internal(
964                 JournalFile *f,
965                 const dual_timestamp *ts,
966                 uint64_t xor_hash,
967                 const EntryItem items[], unsigned n_items,
968                 uint64_t *seqnum,
969                 Object **ret, uint64_t *offset) {
970         uint64_t np;
971         uint64_t osize;
972         Object *o;
973         int r;
974
975         assert(f);
976         assert(items || n_items == 0);
977         assert(ts);
978
979         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
980
981         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
982         if (r < 0)
983                 return r;
984
985         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
986         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
987         o->entry.realtime = htole64(ts->realtime);
988         o->entry.monotonic = htole64(ts->monotonic);
989         o->entry.xor_hash = htole64(xor_hash);
990         o->entry.boot_id = f->header->boot_id;
991
992         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
993         if (r < 0)
994                 return r;
995
996         r = journal_file_link_entry(f, o, np);
997         if (r < 0)
998                 return r;
999
1000         if (ret)
1001                 *ret = o;
1002
1003         if (offset)
1004                 *offset = np;
1005
1006         return 0;
1007 }
1008
1009 void journal_file_post_change(JournalFile *f) {
1010         assert(f);
1011
1012         /* inotify() does not receive IN_MODIFY events from file
1013          * accesses done via mmap(). After each access we hence
1014          * trigger IN_MODIFY by truncating the journal file to its
1015          * current size which triggers IN_MODIFY. */
1016
1017         __sync_synchronize();
1018
1019         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1020                 log_error("Failed to to truncate file to its own size: %m");
1021 }
1022
1023 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1024         unsigned i;
1025         EntryItem *items;
1026         int r;
1027         uint64_t xor_hash = 0;
1028         struct dual_timestamp _ts;
1029
1030         assert(f);
1031         assert(iovec || n_iovec == 0);
1032
1033         if (!f->writable)
1034                 return -EPERM;
1035
1036         if (!ts) {
1037                 dual_timestamp_get(&_ts);
1038                 ts = &_ts;
1039         }
1040
1041         if (f->tail_entry_monotonic_valid &&
1042             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1043                 return -EINVAL;
1044
1045         r = journal_file_maybe_append_tag(f, ts->realtime);
1046         if (r < 0)
1047                 return r;
1048
1049         /* alloca() can't take 0, hence let's allocate at least one */
1050         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1051
1052         for (i = 0; i < n_iovec; i++) {
1053                 uint64_t p;
1054                 Object *o;
1055
1056                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1057                 if (r < 0)
1058                         return r;
1059
1060                 xor_hash ^= le64toh(o->data.hash);
1061                 items[i].object_offset = htole64(p);
1062                 items[i].hash = o->data.hash;
1063         }
1064
1065         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1066
1067         journal_file_post_change(f);
1068
1069         return r;
1070 }
1071
1072 static int generic_array_get(JournalFile *f,
1073                              uint64_t first,
1074                              uint64_t i,
1075                              Object **ret, uint64_t *offset) {
1076
1077         Object *o;
1078         uint64_t p = 0, a;
1079         int r;
1080
1081         assert(f);
1082
1083         a = first;
1084         while (a > 0) {
1085                 uint64_t n;
1086
1087                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1088                 if (r < 0)
1089                         return r;
1090
1091                 n = journal_file_entry_array_n_items(o);
1092                 if (i < n) {
1093                         p = le64toh(o->entry_array.items[i]);
1094                         break;
1095                 }
1096
1097                 i -= n;
1098                 a = le64toh(o->entry_array.next_entry_array_offset);
1099         }
1100
1101         if (a <= 0 || p <= 0)
1102                 return 0;
1103
1104         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1105         if (r < 0)
1106                 return r;
1107
1108         if (ret)
1109                 *ret = o;
1110
1111         if (offset)
1112                 *offset = p;
1113
1114         return 1;
1115 }
1116
1117 static int generic_array_get_plus_one(JournalFile *f,
1118                                       uint64_t extra,
1119                                       uint64_t first,
1120                                       uint64_t i,
1121                                       Object **ret, uint64_t *offset) {
1122
1123         Object *o;
1124
1125         assert(f);
1126
1127         if (i == 0) {
1128                 int r;
1129
1130                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1131                 if (r < 0)
1132                         return r;
1133
1134                 if (ret)
1135                         *ret = o;
1136
1137                 if (offset)
1138                         *offset = extra;
1139
1140                 return 1;
1141         }
1142
1143         return generic_array_get(f, first, i-1, ret, offset);
1144 }
1145
1146 enum {
1147         TEST_FOUND,
1148         TEST_LEFT,
1149         TEST_RIGHT
1150 };
1151
1152 static int generic_array_bisect(JournalFile *f,
1153                                 uint64_t first,
1154                                 uint64_t n,
1155                                 uint64_t needle,
1156                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1157                                 direction_t direction,
1158                                 Object **ret,
1159                                 uint64_t *offset,
1160                                 uint64_t *idx) {
1161
1162         uint64_t a, p, t = 0, i = 0, last_p = 0;
1163         bool subtract_one = false;
1164         Object *o, *array = NULL;
1165         int r;
1166
1167         assert(f);
1168         assert(test_object);
1169
1170         a = first;
1171         while (a > 0) {
1172                 uint64_t left, right, k, lp;
1173
1174                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1175                 if (r < 0)
1176                         return r;
1177
1178                 k = journal_file_entry_array_n_items(array);
1179                 right = MIN(k, n);
1180                 if (right <= 0)
1181                         return 0;
1182
1183                 i = right - 1;
1184                 lp = p = le64toh(array->entry_array.items[i]);
1185                 if (p <= 0)
1186                         return -EBADMSG;
1187
1188                 r = test_object(f, p, needle);
1189                 if (r < 0)
1190                         return r;
1191
1192                 if (r == TEST_FOUND)
1193                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1194
1195                 if (r == TEST_RIGHT) {
1196                         left = 0;
1197                         right -= 1;
1198                         for (;;) {
1199                                 if (left == right) {
1200                                         if (direction == DIRECTION_UP)
1201                                                 subtract_one = true;
1202
1203                                         i = left;
1204                                         goto found;
1205                                 }
1206
1207                                 assert(left < right);
1208
1209                                 i = (left + right) / 2;
1210                                 p = le64toh(array->entry_array.items[i]);
1211                                 if (p <= 0)
1212                                         return -EBADMSG;
1213
1214                                 r = test_object(f, p, needle);
1215                                 if (r < 0)
1216                                         return r;
1217
1218                                 if (r == TEST_FOUND)
1219                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1220
1221                                 if (r == TEST_RIGHT)
1222                                         right = i;
1223                                 else
1224                                         left = i + 1;
1225                         }
1226                 }
1227
1228                 if (k > n) {
1229                         if (direction == DIRECTION_UP) {
1230                                 i = n;
1231                                 subtract_one = true;
1232                                 goto found;
1233                         }
1234
1235                         return 0;
1236                 }
1237
1238                 last_p = lp;
1239
1240                 n -= k;
1241                 t += k;
1242                 a = le64toh(array->entry_array.next_entry_array_offset);
1243         }
1244
1245         return 0;
1246
1247 found:
1248         if (subtract_one && t == 0 && i == 0)
1249                 return 0;
1250
1251         if (subtract_one && i == 0)
1252                 p = last_p;
1253         else if (subtract_one)
1254                 p = le64toh(array->entry_array.items[i-1]);
1255         else
1256                 p = le64toh(array->entry_array.items[i]);
1257
1258         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1259         if (r < 0)
1260                 return r;
1261
1262         if (ret)
1263                 *ret = o;
1264
1265         if (offset)
1266                 *offset = p;
1267
1268         if (idx)
1269                 *idx = t + i + (subtract_one ? -1 : 0);
1270
1271         return 1;
1272 }
1273
1274 static int generic_array_bisect_plus_one(JournalFile *f,
1275                                          uint64_t extra,
1276                                          uint64_t first,
1277                                          uint64_t n,
1278                                          uint64_t needle,
1279                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1280                                          direction_t direction,
1281                                          Object **ret,
1282                                          uint64_t *offset,
1283                                          uint64_t *idx) {
1284
1285         int r;
1286         bool step_back = false;
1287         Object *o;
1288
1289         assert(f);
1290         assert(test_object);
1291
1292         if (n <= 0)
1293                 return 0;
1294
1295         /* This bisects the array in object 'first', but first checks
1296          * an extra  */
1297         r = test_object(f, extra, needle);
1298         if (r < 0)
1299                 return r;
1300
1301         if (r == TEST_FOUND)
1302                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1303
1304         /* if we are looking with DIRECTION_UP then we need to first
1305            see if in the actual array there is a matching entry, and
1306            return the last one of that. But if there isn't any we need
1307            to return this one. Hence remember this, and return it
1308            below. */
1309         if (r == TEST_LEFT)
1310                 step_back = direction == DIRECTION_UP;
1311
1312         if (r == TEST_RIGHT) {
1313                 if (direction == DIRECTION_DOWN)
1314                         goto found;
1315                 else
1316                         return 0;
1317         }
1318
1319         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1320
1321         if (r == 0 && step_back)
1322                 goto found;
1323
1324         if (r > 0 && idx)
1325                 (*idx) ++;
1326
1327         return r;
1328
1329 found:
1330         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1331         if (r < 0)
1332                 return r;
1333
1334         if (ret)
1335                 *ret = o;
1336
1337         if (offset)
1338                 *offset = extra;
1339
1340         if (idx)
1341                 *idx = 0;
1342
1343         return 1;
1344 }
1345
1346 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1347         assert(f);
1348         assert(p > 0);
1349
1350         if (p == needle)
1351                 return TEST_FOUND;
1352         else if (p < needle)
1353                 return TEST_LEFT;
1354         else
1355                 return TEST_RIGHT;
1356 }
1357
1358 int journal_file_move_to_entry_by_offset(
1359                 JournalFile *f,
1360                 uint64_t p,
1361                 direction_t direction,
1362                 Object **ret,
1363                 uint64_t *offset) {
1364
1365         return generic_array_bisect(f,
1366                                     le64toh(f->header->entry_array_offset),
1367                                     le64toh(f->header->n_entries),
1368                                     p,
1369                                     test_object_offset,
1370                                     direction,
1371                                     ret, offset, NULL);
1372 }
1373
1374
1375 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1376         Object *o;
1377         int r;
1378
1379         assert(f);
1380         assert(p > 0);
1381
1382         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1383         if (r < 0)
1384                 return r;
1385
1386         if (le64toh(o->entry.seqnum) == needle)
1387                 return TEST_FOUND;
1388         else if (le64toh(o->entry.seqnum) < needle)
1389                 return TEST_LEFT;
1390         else
1391                 return TEST_RIGHT;
1392 }
1393
1394 int journal_file_move_to_entry_by_seqnum(
1395                 JournalFile *f,
1396                 uint64_t seqnum,
1397                 direction_t direction,
1398                 Object **ret,
1399                 uint64_t *offset) {
1400
1401         return generic_array_bisect(f,
1402                                     le64toh(f->header->entry_array_offset),
1403                                     le64toh(f->header->n_entries),
1404                                     seqnum,
1405                                     test_object_seqnum,
1406                                     direction,
1407                                     ret, offset, NULL);
1408 }
1409
1410 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1411         Object *o;
1412         int r;
1413
1414         assert(f);
1415         assert(p > 0);
1416
1417         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1418         if (r < 0)
1419                 return r;
1420
1421         if (le64toh(o->entry.realtime) == needle)
1422                 return TEST_FOUND;
1423         else if (le64toh(o->entry.realtime) < needle)
1424                 return TEST_LEFT;
1425         else
1426                 return TEST_RIGHT;
1427 }
1428
1429 int journal_file_move_to_entry_by_realtime(
1430                 JournalFile *f,
1431                 uint64_t realtime,
1432                 direction_t direction,
1433                 Object **ret,
1434                 uint64_t *offset) {
1435
1436         return generic_array_bisect(f,
1437                                     le64toh(f->header->entry_array_offset),
1438                                     le64toh(f->header->n_entries),
1439                                     realtime,
1440                                     test_object_realtime,
1441                                     direction,
1442                                     ret, offset, NULL);
1443 }
1444
1445 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1446         Object *o;
1447         int r;
1448
1449         assert(f);
1450         assert(p > 0);
1451
1452         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1453         if (r < 0)
1454                 return r;
1455
1456         if (le64toh(o->entry.monotonic) == needle)
1457                 return TEST_FOUND;
1458         else if (le64toh(o->entry.monotonic) < needle)
1459                 return TEST_LEFT;
1460         else
1461                 return TEST_RIGHT;
1462 }
1463
1464 int journal_file_move_to_entry_by_monotonic(
1465                 JournalFile *f,
1466                 sd_id128_t boot_id,
1467                 uint64_t monotonic,
1468                 direction_t direction,
1469                 Object **ret,
1470                 uint64_t *offset) {
1471
1472         char t[9+32+1] = "_BOOT_ID=";
1473         Object *o;
1474         int r;
1475
1476         assert(f);
1477
1478         sd_id128_to_string(boot_id, t + 9);
1479         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1480         if (r < 0)
1481                 return r;
1482         if (r == 0)
1483                 return -ENOENT;
1484
1485         return generic_array_bisect_plus_one(f,
1486                                              le64toh(o->data.entry_offset),
1487                                              le64toh(o->data.entry_array_offset),
1488                                              le64toh(o->data.n_entries),
1489                                              monotonic,
1490                                              test_object_monotonic,
1491                                              direction,
1492                                              ret, offset, NULL);
1493 }
1494
1495 int journal_file_next_entry(
1496                 JournalFile *f,
1497                 Object *o, uint64_t p,
1498                 direction_t direction,
1499                 Object **ret, uint64_t *offset) {
1500
1501         uint64_t i, n;
1502         int r;
1503
1504         assert(f);
1505         assert(p > 0 || !o);
1506
1507         n = le64toh(f->header->n_entries);
1508         if (n <= 0)
1509                 return 0;
1510
1511         if (!o)
1512                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1513         else {
1514                 if (o->object.type != OBJECT_ENTRY)
1515                         return -EINVAL;
1516
1517                 r = generic_array_bisect(f,
1518                                          le64toh(f->header->entry_array_offset),
1519                                          le64toh(f->header->n_entries),
1520                                          p,
1521                                          test_object_offset,
1522                                          DIRECTION_DOWN,
1523                                          NULL, NULL,
1524                                          &i);
1525                 if (r <= 0)
1526                         return r;
1527
1528                 if (direction == DIRECTION_DOWN) {
1529                         if (i >= n - 1)
1530                                 return 0;
1531
1532                         i++;
1533                 } else {
1534                         if (i <= 0)
1535                                 return 0;
1536
1537                         i--;
1538                 }
1539         }
1540
1541         /* And jump to it */
1542         return generic_array_get(f,
1543                                  le64toh(f->header->entry_array_offset),
1544                                  i,
1545                                  ret, offset);
1546 }
1547
1548 int journal_file_skip_entry(
1549                 JournalFile *f,
1550                 Object *o, uint64_t p,
1551                 int64_t skip,
1552                 Object **ret, uint64_t *offset) {
1553
1554         uint64_t i, n;
1555         int r;
1556
1557         assert(f);
1558         assert(o);
1559         assert(p > 0);
1560
1561         if (o->object.type != OBJECT_ENTRY)
1562                 return -EINVAL;
1563
1564         r = generic_array_bisect(f,
1565                                  le64toh(f->header->entry_array_offset),
1566                                  le64toh(f->header->n_entries),
1567                                  p,
1568                                  test_object_offset,
1569                                  DIRECTION_DOWN,
1570                                  NULL, NULL,
1571                                  &i);
1572         if (r <= 0)
1573                 return r;
1574
1575         /* Calculate new index */
1576         if (skip < 0) {
1577                 if ((uint64_t) -skip >= i)
1578                         i = 0;
1579                 else
1580                         i = i - (uint64_t) -skip;
1581         } else
1582                 i  += (uint64_t) skip;
1583
1584         n = le64toh(f->header->n_entries);
1585         if (n <= 0)
1586                 return -EBADMSG;
1587
1588         if (i >= n)
1589                 i = n-1;
1590
1591         return generic_array_get(f,
1592                                  le64toh(f->header->entry_array_offset),
1593                                  i,
1594                                  ret, offset);
1595 }
1596
1597 int journal_file_next_entry_for_data(
1598                 JournalFile *f,
1599                 Object *o, uint64_t p,
1600                 uint64_t data_offset,
1601                 direction_t direction,
1602                 Object **ret, uint64_t *offset) {
1603
1604         uint64_t n, i;
1605         int r;
1606         Object *d;
1607
1608         assert(f);
1609         assert(p > 0 || !o);
1610
1611         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1612         if (r < 0)
1613                 return r;
1614
1615         n = le64toh(d->data.n_entries);
1616         if (n <= 0)
1617                 return n;
1618
1619         if (!o)
1620                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1621         else {
1622                 if (o->object.type != OBJECT_ENTRY)
1623                         return -EINVAL;
1624
1625                 r = generic_array_bisect_plus_one(f,
1626                                                   le64toh(d->data.entry_offset),
1627                                                   le64toh(d->data.entry_array_offset),
1628                                                   le64toh(d->data.n_entries),
1629                                                   p,
1630                                                   test_object_offset,
1631                                                   DIRECTION_DOWN,
1632                                                   NULL, NULL,
1633                                                   &i);
1634
1635                 if (r <= 0)
1636                         return r;
1637
1638                 if (direction == DIRECTION_DOWN) {
1639                         if (i >= n - 1)
1640                                 return 0;
1641
1642                         i++;
1643                 } else {
1644                         if (i <= 0)
1645                                 return 0;
1646
1647                         i--;
1648                 }
1649
1650         }
1651
1652         return generic_array_get_plus_one(f,
1653                                           le64toh(d->data.entry_offset),
1654                                           le64toh(d->data.entry_array_offset),
1655                                           i,
1656                                           ret, offset);
1657 }
1658
1659 int journal_file_move_to_entry_by_offset_for_data(
1660                 JournalFile *f,
1661                 uint64_t data_offset,
1662                 uint64_t p,
1663                 direction_t direction,
1664                 Object **ret, uint64_t *offset) {
1665
1666         int r;
1667         Object *d;
1668
1669         assert(f);
1670
1671         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1672         if (r < 0)
1673                 return r;
1674
1675         return generic_array_bisect_plus_one(f,
1676                                              le64toh(d->data.entry_offset),
1677                                              le64toh(d->data.entry_array_offset),
1678                                              le64toh(d->data.n_entries),
1679                                              p,
1680                                              test_object_offset,
1681                                              direction,
1682                                              ret, offset, NULL);
1683 }
1684
1685 int journal_file_move_to_entry_by_monotonic_for_data(
1686                 JournalFile *f,
1687                 uint64_t data_offset,
1688                 sd_id128_t boot_id,
1689                 uint64_t monotonic,
1690                 direction_t direction,
1691                 Object **ret, uint64_t *offset) {
1692
1693         char t[9+32+1] = "_BOOT_ID=";
1694         Object *o, *d;
1695         int r;
1696         uint64_t b, z;
1697
1698         assert(f);
1699
1700         /* First, seek by time */
1701         sd_id128_to_string(boot_id, t + 9);
1702         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1703         if (r < 0)
1704                 return r;
1705         if (r == 0)
1706                 return -ENOENT;
1707
1708         r = generic_array_bisect_plus_one(f,
1709                                           le64toh(o->data.entry_offset),
1710                                           le64toh(o->data.entry_array_offset),
1711                                           le64toh(o->data.n_entries),
1712                                           monotonic,
1713                                           test_object_monotonic,
1714                                           direction,
1715                                           NULL, &z, NULL);
1716         if (r <= 0)
1717                 return r;
1718
1719         /* And now, continue seeking until we find an entry that
1720          * exists in both bisection arrays */
1721
1722         for (;;) {
1723                 Object *qo;
1724                 uint64_t p, q;
1725
1726                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1727                 if (r < 0)
1728                         return r;
1729
1730                 r = generic_array_bisect_plus_one(f,
1731                                                   le64toh(d->data.entry_offset),
1732                                                   le64toh(d->data.entry_array_offset),
1733                                                   le64toh(d->data.n_entries),
1734                                                   z,
1735                                                   test_object_offset,
1736                                                   direction,
1737                                                   NULL, &p, NULL);
1738                 if (r <= 0)
1739                         return r;
1740
1741                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1742                 if (r < 0)
1743                         return r;
1744
1745                 r = generic_array_bisect_plus_one(f,
1746                                                   le64toh(o->data.entry_offset),
1747                                                   le64toh(o->data.entry_array_offset),
1748                                                   le64toh(o->data.n_entries),
1749                                                   p,
1750                                                   test_object_offset,
1751                                                   direction,
1752                                                   &qo, &q, NULL);
1753
1754                 if (r <= 0)
1755                         return r;
1756
1757                 if (p == q) {
1758                         if (ret)
1759                                 *ret = qo;
1760                         if (offset)
1761                                 *offset = q;
1762
1763                         return 1;
1764                 }
1765
1766                 z = q;
1767         }
1768
1769         return 0;
1770 }
1771
1772 int journal_file_move_to_entry_by_seqnum_for_data(
1773                 JournalFile *f,
1774                 uint64_t data_offset,
1775                 uint64_t seqnum,
1776                 direction_t direction,
1777                 Object **ret, uint64_t *offset) {
1778
1779         Object *d;
1780         int r;
1781
1782         assert(f);
1783
1784         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1785         if (r < 0)
1786                 return r;
1787
1788         return generic_array_bisect_plus_one(f,
1789                                              le64toh(d->data.entry_offset),
1790                                              le64toh(d->data.entry_array_offset),
1791                                              le64toh(d->data.n_entries),
1792                                              seqnum,
1793                                              test_object_seqnum,
1794                                              direction,
1795                                              ret, offset, NULL);
1796 }
1797
1798 int journal_file_move_to_entry_by_realtime_for_data(
1799                 JournalFile *f,
1800                 uint64_t data_offset,
1801                 uint64_t realtime,
1802                 direction_t direction,
1803                 Object **ret, uint64_t *offset) {
1804
1805         Object *d;
1806         int r;
1807
1808         assert(f);
1809
1810         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1811         if (r < 0)
1812                 return r;
1813
1814         return generic_array_bisect_plus_one(f,
1815                                              le64toh(d->data.entry_offset),
1816                                              le64toh(d->data.entry_array_offset),
1817                                              le64toh(d->data.n_entries),
1818                                              realtime,
1819                                              test_object_realtime,
1820                                              direction,
1821                                              ret, offset, NULL);
1822 }
1823
1824 void journal_file_dump(JournalFile *f) {
1825         Object *o;
1826         int r;
1827         uint64_t p;
1828
1829         assert(f);
1830
1831         journal_file_print_header(f);
1832
1833         p = le64toh(f->header->header_size);
1834         while (p != 0) {
1835                 r = journal_file_move_to_object(f, -1, p, &o);
1836                 if (r < 0)
1837                         goto fail;
1838
1839                 switch (o->object.type) {
1840
1841                 case OBJECT_UNUSED:
1842                         printf("Type: OBJECT_UNUSED\n");
1843                         break;
1844
1845                 case OBJECT_DATA:
1846                         printf("Type: OBJECT_DATA\n");
1847                         break;
1848
1849                 case OBJECT_ENTRY:
1850                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1851                                (unsigned long long) le64toh(o->entry.seqnum),
1852                                (unsigned long long) le64toh(o->entry.monotonic),
1853                                (unsigned long long) le64toh(o->entry.realtime));
1854                         break;
1855
1856                 case OBJECT_FIELD_HASH_TABLE:
1857                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1858                         break;
1859
1860                 case OBJECT_DATA_HASH_TABLE:
1861                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1862                         break;
1863
1864                 case OBJECT_ENTRY_ARRAY:
1865                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1866                         break;
1867
1868                 case OBJECT_TAG:
1869                         printf("Type: OBJECT_TAG %llu\n",
1870                                (unsigned long long) le64toh(o->tag.seqnum));
1871                         break;
1872                 }
1873
1874                 if (o->object.flags & OBJECT_COMPRESSED)
1875                         printf("Flags: COMPRESSED\n");
1876
1877                 if (p == le64toh(f->header->tail_object_offset))
1878                         p = 0;
1879                 else
1880                         p = p + ALIGN64(le64toh(o->object.size));
1881         }
1882
1883         return;
1884 fail:
1885         log_error("File corrupt");
1886 }
1887
1888 void journal_file_print_header(JournalFile *f) {
1889         char a[33], b[33], c[33];
1890         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1891
1892         assert(f);
1893
1894         printf("File Path: %s\n"
1895                "File ID: %s\n"
1896                "Machine ID: %s\n"
1897                "Boot ID: %s\n"
1898                "Sequential Number ID: %s\n"
1899                "State: %s\n"
1900                "Compatible Flags:%s%s\n"
1901                "Incompatible Flags:%s%s\n"
1902                "Header size: %llu\n"
1903                "Arena size: %llu\n"
1904                "Data Hash Table Size: %llu\n"
1905                "Field Hash Table Size: %llu\n"
1906                "Rotate Suggested: %s\n"
1907                "Head Sequential Number: %llu\n"
1908                "Tail Sequential Number: %llu\n"
1909                "Head Realtime Timestamp: %s\n"
1910                "Tail Realtime Timestamp: %s\n"
1911                "Objects: %llu\n"
1912                "Entry Objects: %llu\n",
1913                f->path,
1914                sd_id128_to_string(f->header->file_id, a),
1915                sd_id128_to_string(f->header->machine_id, b),
1916                sd_id128_to_string(f->header->boot_id, c),
1917                sd_id128_to_string(f->header->seqnum_id, c),
1918                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1919                f->header->state == STATE_ONLINE ? "ONLINE" :
1920                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1921                (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1922                (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1923                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1924                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1925                (unsigned long long) le64toh(f->header->header_size),
1926                (unsigned long long) le64toh(f->header->arena_size),
1927                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1928                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1929                yes_no(journal_file_rotate_suggested(f)),
1930                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1931                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1932                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1933                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1934                (unsigned long long) le64toh(f->header->n_objects),
1935                (unsigned long long) le64toh(f->header->n_entries));
1936
1937         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1938                 printf("Data Objects: %llu\n"
1939                        "Data Hash Table Fill: %.1f%%\n",
1940                        (unsigned long long) le64toh(f->header->n_data),
1941                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1942
1943         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1944                 printf("Field Objects: %llu\n"
1945                        "Field Hash Table Fill: %.1f%%\n",
1946                        (unsigned long long) le64toh(f->header->n_fields),
1947                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1948
1949         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1950                 printf("Tag Objects: %llu\n",
1951                        (unsigned long long) le64toh(f->header->n_tags));
1952         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1953                 printf("Entry Array Objects: %llu\n",
1954                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1955 }
1956
1957 int journal_file_open(
1958                 const char *fname,
1959                 int flags,
1960                 mode_t mode,
1961                 bool compress,
1962                 bool seal,
1963                 JournalMetrics *metrics,
1964                 MMapCache *mmap_cache,
1965                 JournalFile *template,
1966                 JournalFile **ret) {
1967
1968         JournalFile *f;
1969         int r;
1970         bool newly_created = false;
1971
1972         assert(fname);
1973
1974         if ((flags & O_ACCMODE) != O_RDONLY &&
1975             (flags & O_ACCMODE) != O_RDWR)
1976                 return -EINVAL;
1977
1978         if (!endswith(fname, ".journal") &&
1979             !endswith(fname, ".journal~"))
1980                 return -EINVAL;
1981
1982         f = new0(JournalFile, 1);
1983         if (!f)
1984                 return -ENOMEM;
1985
1986         f->fd = -1;
1987         f->mode = mode;
1988
1989         f->flags = flags;
1990         f->prot = prot_from_flags(flags);
1991         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1992         f->compress = compress;
1993         f->seal = seal;
1994
1995         if (mmap_cache)
1996                 f->mmap = mmap_cache_ref(mmap_cache);
1997         else {
1998                 /* One context for each type, plus the zeroth catchall
1999                  * context. One fd for the file plus one for each type
2000                  * (which we need during verification */
2001                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2002                 if (!f->mmap) {
2003                         r = -ENOMEM;
2004                         goto fail;
2005                 }
2006         }
2007
2008         f->path = strdup(fname);
2009         if (!f->path) {
2010                 r = -ENOMEM;
2011                 goto fail;
2012         }
2013
2014         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2015         if (f->fd < 0) {
2016                 r = -errno;
2017                 goto fail;
2018         }
2019
2020         if (fstat(f->fd, &f->last_stat) < 0) {
2021                 r = -errno;
2022                 goto fail;
2023         }
2024
2025         if (f->last_stat.st_size == 0 && f->writable) {
2026                 newly_created = true;
2027
2028                 /* Try to load the FSPRG state, and if we can't, then
2029                  * just don't do sealing */
2030                 r = journal_file_fss_load(f);
2031                 if (r < 0)
2032                         f->seal = false;
2033
2034                 r = journal_file_init_header(f, template);
2035                 if (r < 0)
2036                         goto fail;
2037
2038                 if (fstat(f->fd, &f->last_stat) < 0) {
2039                         r = -errno;
2040                         goto fail;
2041                 }
2042         }
2043
2044         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2045                 r = -EIO;
2046                 goto fail;
2047         }
2048
2049         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2050         if (f->header == MAP_FAILED) {
2051                 f->header = NULL;
2052                 r = -errno;
2053                 goto fail;
2054         }
2055
2056         if (!newly_created) {
2057                 r = journal_file_verify_header(f);
2058                 if (r < 0)
2059                         goto fail;
2060         }
2061
2062         if (!newly_created && f->writable) {
2063                 r = journal_file_fss_load(f);
2064                 if (r < 0)
2065                         goto fail;
2066         }
2067
2068         if (f->writable) {
2069                 if (metrics) {
2070                         journal_default_metrics(metrics, f->fd);
2071                         f->metrics = *metrics;
2072                 } else if (template)
2073                         f->metrics = template->metrics;
2074
2075                 r = journal_file_refresh_header(f);
2076                 if (r < 0)
2077                         goto fail;
2078         }
2079
2080         r = journal_file_hmac_setup(f);
2081         if (r < 0)
2082                 goto fail;
2083
2084         if (newly_created) {
2085                 r = journal_file_setup_field_hash_table(f);
2086                 if (r < 0)
2087                         goto fail;
2088
2089                 r = journal_file_setup_data_hash_table(f);
2090                 if (r < 0)
2091                         goto fail;
2092
2093                 r = journal_file_append_first_tag(f);
2094                 if (r < 0)
2095                         goto fail;
2096         }
2097
2098         r = journal_file_map_field_hash_table(f);
2099         if (r < 0)
2100                 goto fail;
2101
2102         r = journal_file_map_data_hash_table(f);
2103         if (r < 0)
2104                 goto fail;
2105
2106         if (ret)
2107                 *ret = f;
2108
2109         return 0;
2110
2111 fail:
2112         journal_file_close(f);
2113
2114         return r;
2115 }
2116
2117 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2118         char *p;
2119         size_t l;
2120         JournalFile *old_file, *new_file = NULL;
2121         int r;
2122
2123         assert(f);
2124         assert(*f);
2125
2126         old_file = *f;
2127
2128         if (!old_file->writable)
2129                 return -EINVAL;
2130
2131         if (!endswith(old_file->path, ".journal"))
2132                 return -EINVAL;
2133
2134         l = strlen(old_file->path);
2135
2136         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2137         if (!p)
2138                 return -ENOMEM;
2139
2140         memcpy(p, old_file->path, l - 8);
2141         p[l-8] = '@';
2142         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2143         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2144                  "-%016llx-%016llx.journal",
2145                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2146                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2147
2148         r = rename(old_file->path, p);
2149         free(p);
2150
2151         if (r < 0)
2152                 return -errno;
2153
2154         old_file->header->state = STATE_ARCHIVED;
2155
2156         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2157         journal_file_close(old_file);
2158
2159         *f = new_file;
2160         return r;
2161 }
2162
2163 int journal_file_open_reliably(
2164                 const char *fname,
2165                 int flags,
2166                 mode_t mode,
2167                 bool compress,
2168                 bool seal,
2169                 JournalMetrics *metrics,
2170                 MMapCache *mmap_cache,
2171                 JournalFile *template,
2172                 JournalFile **ret) {
2173
2174         int r;
2175         size_t l;
2176         char *p;
2177
2178         r = journal_file_open(fname, flags, mode, compress, seal,
2179                               metrics, mmap_cache, template, ret);
2180         if (r != -EBADMSG && /* corrupted */
2181             r != -ENODATA && /* truncated */
2182             r != -EHOSTDOWN && /* other machine */
2183             r != -EPROTONOSUPPORT && /* incompatible feature */
2184             r != -EBUSY && /* unclean shutdown */
2185             r != -ESHUTDOWN /* already archived */)
2186                 return r;
2187
2188         if ((flags & O_ACCMODE) == O_RDONLY)
2189                 return r;
2190
2191         if (!(flags & O_CREAT))
2192                 return r;
2193
2194         if (!endswith(fname, ".journal"))
2195                 return r;
2196
2197         /* The file is corrupted. Rotate it away and try it again (but only once) */
2198
2199         l = strlen(fname);
2200         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2201                      (int) (l-8), fname,
2202                      (unsigned long long) now(CLOCK_REALTIME),
2203                      random_ull()) < 0)
2204                 return -ENOMEM;
2205
2206         r = rename(fname, p);
2207         free(p);
2208         if (r < 0)
2209                 return -errno;
2210
2211         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2212
2213         return journal_file_open(fname, flags, mode, compress, seal,
2214                                  metrics, mmap_cache, template, ret);
2215 }
2216
2217
2218 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2219         uint64_t i, n;
2220         uint64_t q, xor_hash = 0;
2221         int r;
2222         EntryItem *items;
2223         dual_timestamp ts;
2224
2225         assert(from);
2226         assert(to);
2227         assert(o);
2228         assert(p);
2229
2230         if (!to->writable)
2231                 return -EPERM;
2232
2233         ts.monotonic = le64toh(o->entry.monotonic);
2234         ts.realtime = le64toh(o->entry.realtime);
2235
2236         if (to->tail_entry_monotonic_valid &&
2237             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2238                 return -EINVAL;
2239
2240         n = journal_file_entry_n_items(o);
2241         items = alloca(sizeof(EntryItem) * n);
2242
2243         for (i = 0; i < n; i++) {
2244                 uint64_t l, h;
2245                 le64_t le_hash;
2246                 size_t t;
2247                 void *data;
2248                 Object *u;
2249
2250                 q = le64toh(o->entry.items[i].object_offset);
2251                 le_hash = o->entry.items[i].hash;
2252
2253                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2254                 if (r < 0)
2255                         return r;
2256
2257                 if (le_hash != o->data.hash)
2258                         return -EBADMSG;
2259
2260                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2261                 t = (size_t) l;
2262
2263                 /* We hit the limit on 32bit machines */
2264                 if ((uint64_t) t != l)
2265                         return -E2BIG;
2266
2267                 if (o->object.flags & OBJECT_COMPRESSED) {
2268 #ifdef HAVE_XZ
2269                         uint64_t rsize;
2270
2271                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2272                                 return -EBADMSG;
2273
2274                         data = from->compress_buffer;
2275                         l = rsize;
2276 #else
2277                         return -EPROTONOSUPPORT;
2278 #endif
2279                 } else
2280                         data = o->data.payload;
2281
2282                 r = journal_file_append_data(to, data, l, &u, &h);
2283                 if (r < 0)
2284                         return r;
2285
2286                 xor_hash ^= le64toh(u->data.hash);
2287                 items[i].object_offset = htole64(h);
2288                 items[i].hash = u->data.hash;
2289
2290                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2291                 if (r < 0)
2292                         return r;
2293         }
2294
2295         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2296 }
2297
2298 void journal_default_metrics(JournalMetrics *m, int fd) {
2299         uint64_t fs_size = 0;
2300         struct statvfs ss;
2301         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2302
2303         assert(m);
2304         assert(fd >= 0);
2305
2306         if (fstatvfs(fd, &ss) >= 0)
2307                 fs_size = ss.f_frsize * ss.f_blocks;
2308
2309         if (m->max_use == (uint64_t) -1) {
2310
2311                 if (fs_size > 0) {
2312                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2313
2314                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2315                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2316
2317                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2318                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2319                 } else
2320                         m->max_use = DEFAULT_MAX_USE_LOWER;
2321         } else {
2322                 m->max_use = PAGE_ALIGN(m->max_use);
2323
2324                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2325                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2326         }
2327
2328         if (m->max_size == (uint64_t) -1) {
2329                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2330
2331                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2332                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2333         } else
2334                 m->max_size = PAGE_ALIGN(m->max_size);
2335
2336         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2337                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2338
2339         if (m->max_size*2 > m->max_use)
2340                 m->max_use = m->max_size*2;
2341
2342         if (m->min_size == (uint64_t) -1)
2343                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2344         else {
2345                 m->min_size = PAGE_ALIGN(m->min_size);
2346
2347                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2348                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2349
2350                 if (m->min_size > m->max_size)
2351                         m->max_size = m->min_size;
2352         }
2353
2354         if (m->keep_free == (uint64_t) -1) {
2355
2356                 if (fs_size > 0) {
2357                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2358
2359                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2360                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2361
2362                 } else
2363                         m->keep_free = DEFAULT_KEEP_FREE;
2364         }
2365
2366         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2367                  format_bytes(a, sizeof(a), m->max_use),
2368                  format_bytes(b, sizeof(b), m->max_size),
2369                  format_bytes(c, sizeof(c), m->min_size),
2370                  format_bytes(d, sizeof(d), m->keep_free));
2371 }
2372
2373 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2374         assert(f);
2375         assert(from || to);
2376
2377         if (from) {
2378                 if (f->header->head_entry_realtime == 0)
2379                         return -ENOENT;
2380
2381                 *from = le64toh(f->header->head_entry_realtime);
2382         }
2383
2384         if (to) {
2385                 if (f->header->tail_entry_realtime == 0)
2386                         return -ENOENT;
2387
2388                 *to = le64toh(f->header->tail_entry_realtime);
2389         }
2390
2391         return 1;
2392 }
2393
2394 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2395         char t[9+32+1] = "_BOOT_ID=";
2396         Object *o;
2397         uint64_t p;
2398         int r;
2399
2400         assert(f);
2401         assert(from || to);
2402
2403         sd_id128_to_string(boot_id, t + 9);
2404
2405         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2406         if (r <= 0)
2407                 return r;
2408
2409         if (le64toh(o->data.n_entries) <= 0)
2410                 return 0;
2411
2412         if (from) {
2413                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2414                 if (r < 0)
2415                         return r;
2416
2417                 *from = le64toh(o->entry.monotonic);
2418         }
2419
2420         if (to) {
2421                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2422                 if (r < 0)
2423                         return r;
2424
2425                 r = generic_array_get_plus_one(f,
2426                                                le64toh(o->data.entry_offset),
2427                                                le64toh(o->data.entry_array_offset),
2428                                                le64toh(o->data.n_entries)-1,
2429                                                &o, NULL);
2430                 if (r <= 0)
2431                         return r;
2432
2433                 *to = le64toh(o->entry.monotonic);
2434         }
2435
2436         return 1;
2437 }
2438
2439 bool journal_file_rotate_suggested(JournalFile *f) {
2440         assert(f);
2441
2442         /* If we gained new header fields we gained new features,
2443          * hence suggest a rotation */
2444         if (le64toh(f->header->header_size) < sizeof(Header)) {
2445                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2446                 return true;
2447         }
2448
2449         /* Let's check if the hash tables grew over a certain fill
2450          * level (75%, borrowing this value from Java's hash table
2451          * implementation), and if so suggest a rotation. To calculate
2452          * the fill level we need the n_data field, which only exists
2453          * in newer versions. */
2454
2455         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2456                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2457                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2458                                   f->path,
2459                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2460                                   (unsigned long long) le64toh(f->header->n_data),
2461                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2462                                   (unsigned long long) (f->last_stat.st_size),
2463                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2464                         return true;
2465                 }
2466
2467         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2468                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2469                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2470                                   f->path,
2471                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2472                                   (unsigned long long) le64toh(f->header->n_fields),
2473                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2474                         return true;
2475                 }
2476
2477         return false;
2478 }