chiark / gitweb /
journald: add additional simple static tests to verifier
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->seal)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fss_file)
100                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101         else if (f->fsprg_state)
102                 free(f->fsprg_state);
103
104         free(f->fsprg_seed);
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         if (f->header->state >= _STATE_MAX)
207                 return -EBADMSG;
208
209         /* The first addition was n_data, so check that we are at least this large */
210         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
211                 return -EBADMSG;
212
213         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
214                 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
215                 return -EBADMSG;
216
217         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
218                 return -ENODATA;
219
220         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
221                 return -ENODATA;
222
223         if (!VALID64(f->header->data_hash_table_offset) ||
224             !VALID64(f->header->field_hash_table_offset) ||
225             !VALID64(f->header->tail_object_offset) ||
226             !VALID64(f->header->entry_array_offset))
227                 return -ENODATA;
228
229         if (f->writable) {
230                 uint8_t state;
231                 sd_id128_t machine_id;
232                 int r;
233
234                 r = sd_id128_get_machine(&machine_id);
235                 if (r < 0)
236                         return r;
237
238                 if (!sd_id128_equal(machine_id, f->header->machine_id))
239                         return -EHOSTDOWN;
240
241                 state = f->header->state;
242
243                 if (state == STATE_ONLINE) {
244                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
245                         return -EBUSY;
246                 } else if (state == STATE_ARCHIVED)
247                         return -ESHUTDOWN;
248                 else if (state != STATE_OFFLINE) {
249                         log_debug("Journal file %s has unknown state %u.", f->path, state);
250                         return -EBUSY;
251                 }
252         }
253
254         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
255         f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
256
257         return 0;
258 }
259
260 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
261         uint64_t old_size, new_size;
262         int r;
263
264         assert(f);
265
266         /* We assume that this file is not sparse, and we know that
267          * for sure, since we always call posix_fallocate()
268          * ourselves */
269
270         old_size =
271                 le64toh(f->header->header_size) +
272                 le64toh(f->header->arena_size);
273
274         new_size = PAGE_ALIGN(offset + size);
275         if (new_size < le64toh(f->header->header_size))
276                 new_size = le64toh(f->header->header_size);
277
278         if (new_size <= old_size)
279                 return 0;
280
281         if (f->metrics.max_size > 0 &&
282             new_size > f->metrics.max_size)
283                 return -E2BIG;
284
285         if (new_size > f->metrics.min_size &&
286             f->metrics.keep_free > 0) {
287                 struct statvfs svfs;
288
289                 if (fstatvfs(f->fd, &svfs) >= 0) {
290                         uint64_t available;
291
292                         available = svfs.f_bfree * svfs.f_bsize;
293
294                         if (available >= f->metrics.keep_free)
295                                 available -= f->metrics.keep_free;
296                         else
297                                 available = 0;
298
299                         if (new_size - old_size > available)
300                                 return -E2BIG;
301                 }
302         }
303
304         /* Note that the glibc fallocate() fallback is very
305            inefficient, hence we try to minimize the allocation area
306            as we can. */
307         r = posix_fallocate(f->fd, old_size, new_size - old_size);
308         if (r != 0)
309                 return -r;
310
311         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
312
313         if (fstat(f->fd, &f->last_stat) < 0)
314                 return -errno;
315
316         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
317
318         return 0;
319 }
320
321 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
322         assert(f);
323         assert(ret);
324
325         /* Avoid SIGBUS on invalid accesses */
326         if (offset + size > (uint64_t) f->last_stat.st_size) {
327                 /* Hmm, out of range? Let's refresh the fstat() data
328                  * first, before we trust that check. */
329
330                 if (fstat(f->fd, &f->last_stat) < 0 ||
331                     offset + size > (uint64_t) f->last_stat.st_size)
332                         return -EADDRNOTAVAIL;
333         }
334
335         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
336 }
337
338 static uint64_t minimum_header_size(Object *o) {
339
340         static uint64_t table[] = {
341                 [OBJECT_DATA] = sizeof(DataObject),
342                 [OBJECT_FIELD] = sizeof(FieldObject),
343                 [OBJECT_ENTRY] = sizeof(EntryObject),
344                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
345                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
346                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
347                 [OBJECT_TAG] = sizeof(TagObject),
348         };
349
350         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
351                 return sizeof(ObjectHeader);
352
353         return table[o->object.type];
354 }
355
356 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
357         int r;
358         void *t;
359         Object *o;
360         uint64_t s;
361         unsigned context;
362
363         assert(f);
364         assert(ret);
365
366         /* Objects may only be located at multiple of 64 bit */
367         if (!VALID64(offset))
368                 return -EFAULT;
369
370         /* One context for each type, plus one catch-all for the rest */
371         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
372
373         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
374         if (r < 0)
375                 return r;
376
377         o = (Object*) t;
378         s = le64toh(o->object.size);
379
380         if (s < sizeof(ObjectHeader))
381                 return -EBADMSG;
382
383         if (o->object.type <= OBJECT_UNUSED)
384                 return -EBADMSG;
385
386         if (s < minimum_header_size(o))
387                 return -EBADMSG;
388
389         if (type >= 0 && o->object.type != type)
390                 return -EBADMSG;
391
392         if (s > sizeof(ObjectHeader)) {
393                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
394                 if (r < 0)
395                         return r;
396
397                 o = (Object*) t;
398         }
399
400         *ret = o;
401         return 0;
402 }
403
404 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
405         uint64_t r;
406
407         assert(f);
408
409         r = le64toh(f->header->tail_entry_seqnum) + 1;
410
411         if (seqnum) {
412                 /* If an external seqnum counter was passed, we update
413                  * both the local and the external one, and set it to
414                  * the maximum of both */
415
416                 if (*seqnum + 1 > r)
417                         r = *seqnum + 1;
418
419                 *seqnum = r;
420         }
421
422         f->header->tail_entry_seqnum = htole64(r);
423
424         if (f->header->head_entry_seqnum == 0)
425                 f->header->head_entry_seqnum = htole64(r);
426
427         return r;
428 }
429
430 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
431         int r;
432         uint64_t p;
433         Object *tail, *o;
434         void *t;
435
436         assert(f);
437         assert(type > 0 && type < _OBJECT_TYPE_MAX);
438         assert(size >= sizeof(ObjectHeader));
439         assert(offset);
440         assert(ret);
441
442         p = le64toh(f->header->tail_object_offset);
443         if (p == 0)
444                 p = le64toh(f->header->header_size);
445         else {
446                 r = journal_file_move_to_object(f, -1, p, &tail);
447                 if (r < 0)
448                         return r;
449
450                 p += ALIGN64(le64toh(tail->object.size));
451         }
452
453         r = journal_file_allocate(f, p, size);
454         if (r < 0)
455                 return r;
456
457         r = journal_file_move_to(f, type, p, size, &t);
458         if (r < 0)
459                 return r;
460
461         o = (Object*) t;
462
463         zero(o->object);
464         o->object.type = type;
465         o->object.size = htole64(size);
466
467         f->header->tail_object_offset = htole64(p);
468         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
469
470         *ret = o;
471         *offset = p;
472
473         return 0;
474 }
475
476 static int journal_file_setup_data_hash_table(JournalFile *f) {
477         uint64_t s, p;
478         Object *o;
479         int r;
480
481         assert(f);
482
483         /* We estimate that we need 1 hash table entry per 768 of
484            journal file and we want to make sure we never get beyond
485            75% fill level. Calculate the hash table size for the
486            maximum file size based on these metrics. */
487
488         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
489         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
490                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
491
492         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
493
494         r = journal_file_append_object(f,
495                                        OBJECT_DATA_HASH_TABLE,
496                                        offsetof(Object, hash_table.items) + s,
497                                        &o, &p);
498         if (r < 0)
499                 return r;
500
501         memset(o->hash_table.items, 0, s);
502
503         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
504         f->header->data_hash_table_size = htole64(s);
505
506         return 0;
507 }
508
509 static int journal_file_setup_field_hash_table(JournalFile *f) {
510         uint64_t s, p;
511         Object *o;
512         int r;
513
514         assert(f);
515
516         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
517         r = journal_file_append_object(f,
518                                        OBJECT_FIELD_HASH_TABLE,
519                                        offsetof(Object, hash_table.items) + s,
520                                        &o, &p);
521         if (r < 0)
522                 return r;
523
524         memset(o->hash_table.items, 0, s);
525
526         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
527         f->header->field_hash_table_size = htole64(s);
528
529         return 0;
530 }
531
532 static int journal_file_map_data_hash_table(JournalFile *f) {
533         uint64_t s, p;
534         void *t;
535         int r;
536
537         assert(f);
538
539         p = le64toh(f->header->data_hash_table_offset);
540         s = le64toh(f->header->data_hash_table_size);
541
542         r = journal_file_move_to(f,
543                                  OBJECT_DATA_HASH_TABLE,
544                                  p, s,
545                                  &t);
546         if (r < 0)
547                 return r;
548
549         f->data_hash_table = t;
550         return 0;
551 }
552
553 static int journal_file_map_field_hash_table(JournalFile *f) {
554         uint64_t s, p;
555         void *t;
556         int r;
557
558         assert(f);
559
560         p = le64toh(f->header->field_hash_table_offset);
561         s = le64toh(f->header->field_hash_table_size);
562
563         r = journal_file_move_to(f,
564                                  OBJECT_FIELD_HASH_TABLE,
565                                  p, s,
566                                  &t);
567         if (r < 0)
568                 return r;
569
570         f->field_hash_table = t;
571         return 0;
572 }
573
574 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
575         uint64_t p, h;
576         int r;
577
578         assert(f);
579         assert(o);
580         assert(offset > 0);
581         assert(o->object.type == OBJECT_DATA);
582
583         /* This might alter the window we are looking at */
584
585         o->data.next_hash_offset = o->data.next_field_offset = 0;
586         o->data.entry_offset = o->data.entry_array_offset = 0;
587         o->data.n_entries = 0;
588
589         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
590         p = le64toh(f->data_hash_table[h].tail_hash_offset);
591         if (p == 0) {
592                 /* Only entry in the hash table is easy */
593                 f->data_hash_table[h].head_hash_offset = htole64(offset);
594         } else {
595                 /* Move back to the previous data object, to patch in
596                  * pointer */
597
598                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
599                 if (r < 0)
600                         return r;
601
602                 o->data.next_hash_offset = htole64(offset);
603         }
604
605         f->data_hash_table[h].tail_hash_offset = htole64(offset);
606
607         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
608                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
609
610         return 0;
611 }
612
613 int journal_file_find_data_object_with_hash(
614                 JournalFile *f,
615                 const void *data, uint64_t size, uint64_t hash,
616                 Object **ret, uint64_t *offset) {
617
618         uint64_t p, osize, h;
619         int r;
620
621         assert(f);
622         assert(data || size == 0);
623
624         osize = offsetof(Object, data.payload) + size;
625
626         if (f->header->data_hash_table_size == 0)
627                 return -EBADMSG;
628
629         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
630         p = le64toh(f->data_hash_table[h].head_hash_offset);
631
632         while (p > 0) {
633                 Object *o;
634
635                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
636                 if (r < 0)
637                         return r;
638
639                 if (le64toh(o->data.hash) != hash)
640                         goto next;
641
642                 if (o->object.flags & OBJECT_COMPRESSED) {
643 #ifdef HAVE_XZ
644                         uint64_t l, rsize;
645
646                         l = le64toh(o->object.size);
647                         if (l <= offsetof(Object, data.payload))
648                                 return -EBADMSG;
649
650                         l -= offsetof(Object, data.payload);
651
652                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
653                                 return -EBADMSG;
654
655                         if (rsize == size &&
656                             memcmp(f->compress_buffer, data, size) == 0) {
657
658                                 if (ret)
659                                         *ret = o;
660
661                                 if (offset)
662                                         *offset = p;
663
664                                 return 1;
665                         }
666 #else
667                         return -EPROTONOSUPPORT;
668 #endif
669
670                 } else if (le64toh(o->object.size) == osize &&
671                            memcmp(o->data.payload, data, size) == 0) {
672
673                         if (ret)
674                                 *ret = o;
675
676                         if (offset)
677                                 *offset = p;
678
679                         return 1;
680                 }
681
682         next:
683                 p = le64toh(o->data.next_hash_offset);
684         }
685
686         return 0;
687 }
688
689 int journal_file_find_data_object(
690                 JournalFile *f,
691                 const void *data, uint64_t size,
692                 Object **ret, uint64_t *offset) {
693
694         uint64_t hash;
695
696         assert(f);
697         assert(data || size == 0);
698
699         hash = hash64(data, size);
700
701         return journal_file_find_data_object_with_hash(f,
702                                                        data, size, hash,
703                                                        ret, offset);
704 }
705
706 static int journal_file_append_data(
707                 JournalFile *f,
708                 const void *data, uint64_t size,
709                 Object **ret, uint64_t *offset) {
710
711         uint64_t hash, p;
712         uint64_t osize;
713         Object *o;
714         int r;
715         bool compressed = false;
716
717         assert(f);
718         assert(data || size == 0);
719
720         hash = hash64(data, size);
721
722         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
723         if (r < 0)
724                 return r;
725         else if (r > 0) {
726
727                 if (ret)
728                         *ret = o;
729
730                 if (offset)
731                         *offset = p;
732
733                 return 0;
734         }
735
736         osize = offsetof(Object, data.payload) + size;
737         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
738         if (r < 0)
739                 return r;
740
741         o->data.hash = htole64(hash);
742
743 #ifdef HAVE_XZ
744         if (f->compress &&
745             size >= COMPRESSION_SIZE_THRESHOLD) {
746                 uint64_t rsize;
747
748                 compressed = compress_blob(data, size, o->data.payload, &rsize);
749
750                 if (compressed) {
751                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
752                         o->object.flags |= OBJECT_COMPRESSED;
753
754                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
755                 }
756         }
757 #endif
758
759         if (!compressed && size > 0)
760                 memcpy(o->data.payload, data, size);
761
762         r = journal_file_link_data(f, o, p, hash);
763         if (r < 0)
764                 return r;
765
766         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
767         if (r < 0)
768                 return r;
769
770         /* The linking might have altered the window, so let's
771          * refresh our pointer */
772         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
773         if (r < 0)
774                 return r;
775
776         if (ret)
777                 *ret = o;
778
779         if (offset)
780                 *offset = p;
781
782         return 0;
783 }
784
785 uint64_t journal_file_entry_n_items(Object *o) {
786         assert(o);
787         assert(o->object.type == OBJECT_ENTRY);
788
789         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
790 }
791
792 uint64_t journal_file_entry_array_n_items(Object *o) {
793         assert(o);
794         assert(o->object.type == OBJECT_ENTRY_ARRAY);
795
796         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
797 }
798
799 static int link_entry_into_array(JournalFile *f,
800                                  le64_t *first,
801                                  le64_t *idx,
802                                  uint64_t p) {
803         int r;
804         uint64_t n = 0, ap = 0, q, i, a, hidx;
805         Object *o;
806
807         assert(f);
808         assert(first);
809         assert(idx);
810         assert(p > 0);
811
812         a = le64toh(*first);
813         i = hidx = le64toh(*idx);
814         while (a > 0) {
815
816                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
817                 if (r < 0)
818                         return r;
819
820                 n = journal_file_entry_array_n_items(o);
821                 if (i < n) {
822                         o->entry_array.items[i] = htole64(p);
823                         *idx = htole64(hidx + 1);
824                         return 0;
825                 }
826
827                 i -= n;
828                 ap = a;
829                 a = le64toh(o->entry_array.next_entry_array_offset);
830         }
831
832         if (hidx > n)
833                 n = (hidx+1) * 2;
834         else
835                 n = n * 2;
836
837         if (n < 4)
838                 n = 4;
839
840         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
841                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
842                                        &o, &q);
843         if (r < 0)
844                 return r;
845
846         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
847         if (r < 0)
848                 return r;
849
850         o->entry_array.items[i] = htole64(p);
851
852         if (ap == 0)
853                 *first = htole64(q);
854         else {
855                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
856                 if (r < 0)
857                         return r;
858
859                 o->entry_array.next_entry_array_offset = htole64(q);
860         }
861
862         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
863                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
864
865         *idx = htole64(hidx + 1);
866
867         return 0;
868 }
869
870 static int link_entry_into_array_plus_one(JournalFile *f,
871                                           le64_t *extra,
872                                           le64_t *first,
873                                           le64_t *idx,
874                                           uint64_t p) {
875
876         int r;
877
878         assert(f);
879         assert(extra);
880         assert(first);
881         assert(idx);
882         assert(p > 0);
883
884         if (*idx == 0)
885                 *extra = htole64(p);
886         else {
887                 le64_t i;
888
889                 i = htole64(le64toh(*idx) - 1);
890                 r = link_entry_into_array(f, first, &i, p);
891                 if (r < 0)
892                         return r;
893         }
894
895         *idx = htole64(le64toh(*idx) + 1);
896         return 0;
897 }
898
899 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
900         uint64_t p;
901         int r;
902         assert(f);
903         assert(o);
904         assert(offset > 0);
905
906         p = le64toh(o->entry.items[i].object_offset);
907         if (p == 0)
908                 return -EINVAL;
909
910         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
911         if (r < 0)
912                 return r;
913
914         return link_entry_into_array_plus_one(f,
915                                               &o->data.entry_offset,
916                                               &o->data.entry_array_offset,
917                                               &o->data.n_entries,
918                                               offset);
919 }
920
921 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
922         uint64_t n, i;
923         int r;
924
925         assert(f);
926         assert(o);
927         assert(offset > 0);
928         assert(o->object.type == OBJECT_ENTRY);
929
930         __sync_synchronize();
931
932         /* Link up the entry itself */
933         r = link_entry_into_array(f,
934                                   &f->header->entry_array_offset,
935                                   &f->header->n_entries,
936                                   offset);
937         if (r < 0)
938                 return r;
939
940         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
941
942         if (f->header->head_entry_realtime == 0)
943                 f->header->head_entry_realtime = o->entry.realtime;
944
945         f->header->tail_entry_realtime = o->entry.realtime;
946         f->header->tail_entry_monotonic = o->entry.monotonic;
947
948         f->tail_entry_monotonic_valid = true;
949
950         /* Link up the items */
951         n = journal_file_entry_n_items(o);
952         for (i = 0; i < n; i++) {
953                 r = journal_file_link_entry_item(f, o, offset, i);
954                 if (r < 0)
955                         return r;
956         }
957
958         return 0;
959 }
960
961 static int journal_file_append_entry_internal(
962                 JournalFile *f,
963                 const dual_timestamp *ts,
964                 uint64_t xor_hash,
965                 const EntryItem items[], unsigned n_items,
966                 uint64_t *seqnum,
967                 Object **ret, uint64_t *offset) {
968         uint64_t np;
969         uint64_t osize;
970         Object *o;
971         int r;
972
973         assert(f);
974         assert(items || n_items == 0);
975         assert(ts);
976
977         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
978
979         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
980         if (r < 0)
981                 return r;
982
983         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
984         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
985         o->entry.realtime = htole64(ts->realtime);
986         o->entry.monotonic = htole64(ts->monotonic);
987         o->entry.xor_hash = htole64(xor_hash);
988         o->entry.boot_id = f->header->boot_id;
989
990         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
991         if (r < 0)
992                 return r;
993
994         r = journal_file_link_entry(f, o, np);
995         if (r < 0)
996                 return r;
997
998         if (ret)
999                 *ret = o;
1000
1001         if (offset)
1002                 *offset = np;
1003
1004         return 0;
1005 }
1006
1007 void journal_file_post_change(JournalFile *f) {
1008         assert(f);
1009
1010         /* inotify() does not receive IN_MODIFY events from file
1011          * accesses done via mmap(). After each access we hence
1012          * trigger IN_MODIFY by truncating the journal file to its
1013          * current size which triggers IN_MODIFY. */
1014
1015         __sync_synchronize();
1016
1017         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1018                 log_error("Failed to to truncate file to its own size: %m");
1019 }
1020
1021 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1022         unsigned i;
1023         EntryItem *items;
1024         int r;
1025         uint64_t xor_hash = 0;
1026         struct dual_timestamp _ts;
1027
1028         assert(f);
1029         assert(iovec || n_iovec == 0);
1030
1031         if (!f->writable)
1032                 return -EPERM;
1033
1034         if (!ts) {
1035                 dual_timestamp_get(&_ts);
1036                 ts = &_ts;
1037         }
1038
1039         if (f->tail_entry_monotonic_valid &&
1040             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1041                 return -EINVAL;
1042
1043         r = journal_file_maybe_append_tag(f, ts->realtime);
1044         if (r < 0)
1045                 return r;
1046
1047         /* alloca() can't take 0, hence let's allocate at least one */
1048         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1049
1050         for (i = 0; i < n_iovec; i++) {
1051                 uint64_t p;
1052                 Object *o;
1053
1054                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1055                 if (r < 0)
1056                         return r;
1057
1058                 xor_hash ^= le64toh(o->data.hash);
1059                 items[i].object_offset = htole64(p);
1060                 items[i].hash = o->data.hash;
1061         }
1062
1063         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1064
1065         journal_file_post_change(f);
1066
1067         return r;
1068 }
1069
1070 static int generic_array_get(JournalFile *f,
1071                              uint64_t first,
1072                              uint64_t i,
1073                              Object **ret, uint64_t *offset) {
1074
1075         Object *o;
1076         uint64_t p = 0, a;
1077         int r;
1078
1079         assert(f);
1080
1081         a = first;
1082         while (a > 0) {
1083                 uint64_t n;
1084
1085                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1086                 if (r < 0)
1087                         return r;
1088
1089                 n = journal_file_entry_array_n_items(o);
1090                 if (i < n) {
1091                         p = le64toh(o->entry_array.items[i]);
1092                         break;
1093                 }
1094
1095                 i -= n;
1096                 a = le64toh(o->entry_array.next_entry_array_offset);
1097         }
1098
1099         if (a <= 0 || p <= 0)
1100                 return 0;
1101
1102         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1103         if (r < 0)
1104                 return r;
1105
1106         if (ret)
1107                 *ret = o;
1108
1109         if (offset)
1110                 *offset = p;
1111
1112         return 1;
1113 }
1114
1115 static int generic_array_get_plus_one(JournalFile *f,
1116                                       uint64_t extra,
1117                                       uint64_t first,
1118                                       uint64_t i,
1119                                       Object **ret, uint64_t *offset) {
1120
1121         Object *o;
1122
1123         assert(f);
1124
1125         if (i == 0) {
1126                 int r;
1127
1128                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1129                 if (r < 0)
1130                         return r;
1131
1132                 if (ret)
1133                         *ret = o;
1134
1135                 if (offset)
1136                         *offset = extra;
1137
1138                 return 1;
1139         }
1140
1141         return generic_array_get(f, first, i-1, ret, offset);
1142 }
1143
1144 enum {
1145         TEST_FOUND,
1146         TEST_LEFT,
1147         TEST_RIGHT
1148 };
1149
1150 static int generic_array_bisect(JournalFile *f,
1151                                 uint64_t first,
1152                                 uint64_t n,
1153                                 uint64_t needle,
1154                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1155                                 direction_t direction,
1156                                 Object **ret,
1157                                 uint64_t *offset,
1158                                 uint64_t *idx) {
1159
1160         uint64_t a, p, t = 0, i = 0, last_p = 0;
1161         bool subtract_one = false;
1162         Object *o, *array = NULL;
1163         int r;
1164
1165         assert(f);
1166         assert(test_object);
1167
1168         a = first;
1169         while (a > 0) {
1170                 uint64_t left, right, k, lp;
1171
1172                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1173                 if (r < 0)
1174                         return r;
1175
1176                 k = journal_file_entry_array_n_items(array);
1177                 right = MIN(k, n);
1178                 if (right <= 0)
1179                         return 0;
1180
1181                 i = right - 1;
1182                 lp = p = le64toh(array->entry_array.items[i]);
1183                 if (p <= 0)
1184                         return -EBADMSG;
1185
1186                 r = test_object(f, p, needle);
1187                 if (r < 0)
1188                         return r;
1189
1190                 if (r == TEST_FOUND)
1191                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1192
1193                 if (r == TEST_RIGHT) {
1194                         left = 0;
1195                         right -= 1;
1196                         for (;;) {
1197                                 if (left == right) {
1198                                         if (direction == DIRECTION_UP)
1199                                                 subtract_one = true;
1200
1201                                         i = left;
1202                                         goto found;
1203                                 }
1204
1205                                 assert(left < right);
1206
1207                                 i = (left + right) / 2;
1208                                 p = le64toh(array->entry_array.items[i]);
1209                                 if (p <= 0)
1210                                         return -EBADMSG;
1211
1212                                 r = test_object(f, p, needle);
1213                                 if (r < 0)
1214                                         return r;
1215
1216                                 if (r == TEST_FOUND)
1217                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1218
1219                                 if (r == TEST_RIGHT)
1220                                         right = i;
1221                                 else
1222                                         left = i + 1;
1223                         }
1224                 }
1225
1226                 if (k > n) {
1227                         if (direction == DIRECTION_UP) {
1228                                 i = n;
1229                                 subtract_one = true;
1230                                 goto found;
1231                         }
1232
1233                         return 0;
1234                 }
1235
1236                 last_p = lp;
1237
1238                 n -= k;
1239                 t += k;
1240                 a = le64toh(array->entry_array.next_entry_array_offset);
1241         }
1242
1243         return 0;
1244
1245 found:
1246         if (subtract_one && t == 0 && i == 0)
1247                 return 0;
1248
1249         if (subtract_one && i == 0)
1250                 p = last_p;
1251         else if (subtract_one)
1252                 p = le64toh(array->entry_array.items[i-1]);
1253         else
1254                 p = le64toh(array->entry_array.items[i]);
1255
1256         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1257         if (r < 0)
1258                 return r;
1259
1260         if (ret)
1261                 *ret = o;
1262
1263         if (offset)
1264                 *offset = p;
1265
1266         if (idx)
1267                 *idx = t + i + (subtract_one ? -1 : 0);
1268
1269         return 1;
1270 }
1271
1272 static int generic_array_bisect_plus_one(JournalFile *f,
1273                                          uint64_t extra,
1274                                          uint64_t first,
1275                                          uint64_t n,
1276                                          uint64_t needle,
1277                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1278                                          direction_t direction,
1279                                          Object **ret,
1280                                          uint64_t *offset,
1281                                          uint64_t *idx) {
1282
1283         int r;
1284         bool step_back = false;
1285         Object *o;
1286
1287         assert(f);
1288         assert(test_object);
1289
1290         if (n <= 0)
1291                 return 0;
1292
1293         /* This bisects the array in object 'first', but first checks
1294          * an extra  */
1295         r = test_object(f, extra, needle);
1296         if (r < 0)
1297                 return r;
1298
1299         if (r == TEST_FOUND)
1300                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1301
1302         /* if we are looking with DIRECTION_UP then we need to first
1303            see if in the actual array there is a matching entry, and
1304            return the last one of that. But if there isn't any we need
1305            to return this one. Hence remember this, and return it
1306            below. */
1307         if (r == TEST_LEFT)
1308                 step_back = direction == DIRECTION_UP;
1309
1310         if (r == TEST_RIGHT) {
1311                 if (direction == DIRECTION_DOWN)
1312                         goto found;
1313                 else
1314                         return 0;
1315         }
1316
1317         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1318
1319         if (r == 0 && step_back)
1320                 goto found;
1321
1322         if (r > 0 && idx)
1323                 (*idx) ++;
1324
1325         return r;
1326
1327 found:
1328         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1329         if (r < 0)
1330                 return r;
1331
1332         if (ret)
1333                 *ret = o;
1334
1335         if (offset)
1336                 *offset = extra;
1337
1338         if (idx)
1339                 *idx = 0;
1340
1341         return 1;
1342 }
1343
1344 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1345         assert(f);
1346         assert(p > 0);
1347
1348         if (p == needle)
1349                 return TEST_FOUND;
1350         else if (p < needle)
1351                 return TEST_LEFT;
1352         else
1353                 return TEST_RIGHT;
1354 }
1355
1356 int journal_file_move_to_entry_by_offset(
1357                 JournalFile *f,
1358                 uint64_t p,
1359                 direction_t direction,
1360                 Object **ret,
1361                 uint64_t *offset) {
1362
1363         return generic_array_bisect(f,
1364                                     le64toh(f->header->entry_array_offset),
1365                                     le64toh(f->header->n_entries),
1366                                     p,
1367                                     test_object_offset,
1368                                     direction,
1369                                     ret, offset, NULL);
1370 }
1371
1372
1373 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1374         Object *o;
1375         int r;
1376
1377         assert(f);
1378         assert(p > 0);
1379
1380         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1381         if (r < 0)
1382                 return r;
1383
1384         if (le64toh(o->entry.seqnum) == needle)
1385                 return TEST_FOUND;
1386         else if (le64toh(o->entry.seqnum) < needle)
1387                 return TEST_LEFT;
1388         else
1389                 return TEST_RIGHT;
1390 }
1391
1392 int journal_file_move_to_entry_by_seqnum(
1393                 JournalFile *f,
1394                 uint64_t seqnum,
1395                 direction_t direction,
1396                 Object **ret,
1397                 uint64_t *offset) {
1398
1399         return generic_array_bisect(f,
1400                                     le64toh(f->header->entry_array_offset),
1401                                     le64toh(f->header->n_entries),
1402                                     seqnum,
1403                                     test_object_seqnum,
1404                                     direction,
1405                                     ret, offset, NULL);
1406 }
1407
1408 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1409         Object *o;
1410         int r;
1411
1412         assert(f);
1413         assert(p > 0);
1414
1415         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1416         if (r < 0)
1417                 return r;
1418
1419         if (le64toh(o->entry.realtime) == needle)
1420                 return TEST_FOUND;
1421         else if (le64toh(o->entry.realtime) < needle)
1422                 return TEST_LEFT;
1423         else
1424                 return TEST_RIGHT;
1425 }
1426
1427 int journal_file_move_to_entry_by_realtime(
1428                 JournalFile *f,
1429                 uint64_t realtime,
1430                 direction_t direction,
1431                 Object **ret,
1432                 uint64_t *offset) {
1433
1434         return generic_array_bisect(f,
1435                                     le64toh(f->header->entry_array_offset),
1436                                     le64toh(f->header->n_entries),
1437                                     realtime,
1438                                     test_object_realtime,
1439                                     direction,
1440                                     ret, offset, NULL);
1441 }
1442
1443 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1444         Object *o;
1445         int r;
1446
1447         assert(f);
1448         assert(p > 0);
1449
1450         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1451         if (r < 0)
1452                 return r;
1453
1454         if (le64toh(o->entry.monotonic) == needle)
1455                 return TEST_FOUND;
1456         else if (le64toh(o->entry.monotonic) < needle)
1457                 return TEST_LEFT;
1458         else
1459                 return TEST_RIGHT;
1460 }
1461
1462 int journal_file_move_to_entry_by_monotonic(
1463                 JournalFile *f,
1464                 sd_id128_t boot_id,
1465                 uint64_t monotonic,
1466                 direction_t direction,
1467                 Object **ret,
1468                 uint64_t *offset) {
1469
1470         char t[9+32+1] = "_BOOT_ID=";
1471         Object *o;
1472         int r;
1473
1474         assert(f);
1475
1476         sd_id128_to_string(boot_id, t + 9);
1477         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1478         if (r < 0)
1479                 return r;
1480         if (r == 0)
1481                 return -ENOENT;
1482
1483         return generic_array_bisect_plus_one(f,
1484                                              le64toh(o->data.entry_offset),
1485                                              le64toh(o->data.entry_array_offset),
1486                                              le64toh(o->data.n_entries),
1487                                              monotonic,
1488                                              test_object_monotonic,
1489                                              direction,
1490                                              ret, offset, NULL);
1491 }
1492
1493 int journal_file_next_entry(
1494                 JournalFile *f,
1495                 Object *o, uint64_t p,
1496                 direction_t direction,
1497                 Object **ret, uint64_t *offset) {
1498
1499         uint64_t i, n;
1500         int r;
1501
1502         assert(f);
1503         assert(p > 0 || !o);
1504
1505         n = le64toh(f->header->n_entries);
1506         if (n <= 0)
1507                 return 0;
1508
1509         if (!o)
1510                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1511         else {
1512                 if (o->object.type != OBJECT_ENTRY)
1513                         return -EINVAL;
1514
1515                 r = generic_array_bisect(f,
1516                                          le64toh(f->header->entry_array_offset),
1517                                          le64toh(f->header->n_entries),
1518                                          p,
1519                                          test_object_offset,
1520                                          DIRECTION_DOWN,
1521                                          NULL, NULL,
1522                                          &i);
1523                 if (r <= 0)
1524                         return r;
1525
1526                 if (direction == DIRECTION_DOWN) {
1527                         if (i >= n - 1)
1528                                 return 0;
1529
1530                         i++;
1531                 } else {
1532                         if (i <= 0)
1533                                 return 0;
1534
1535                         i--;
1536                 }
1537         }
1538
1539         /* And jump to it */
1540         return generic_array_get(f,
1541                                  le64toh(f->header->entry_array_offset),
1542                                  i,
1543                                  ret, offset);
1544 }
1545
1546 int journal_file_skip_entry(
1547                 JournalFile *f,
1548                 Object *o, uint64_t p,
1549                 int64_t skip,
1550                 Object **ret, uint64_t *offset) {
1551
1552         uint64_t i, n;
1553         int r;
1554
1555         assert(f);
1556         assert(o);
1557         assert(p > 0);
1558
1559         if (o->object.type != OBJECT_ENTRY)
1560                 return -EINVAL;
1561
1562         r = generic_array_bisect(f,
1563                                  le64toh(f->header->entry_array_offset),
1564                                  le64toh(f->header->n_entries),
1565                                  p,
1566                                  test_object_offset,
1567                                  DIRECTION_DOWN,
1568                                  NULL, NULL,
1569                                  &i);
1570         if (r <= 0)
1571                 return r;
1572
1573         /* Calculate new index */
1574         if (skip < 0) {
1575                 if ((uint64_t) -skip >= i)
1576                         i = 0;
1577                 else
1578                         i = i - (uint64_t) -skip;
1579         } else
1580                 i  += (uint64_t) skip;
1581
1582         n = le64toh(f->header->n_entries);
1583         if (n <= 0)
1584                 return -EBADMSG;
1585
1586         if (i >= n)
1587                 i = n-1;
1588
1589         return generic_array_get(f,
1590                                  le64toh(f->header->entry_array_offset),
1591                                  i,
1592                                  ret, offset);
1593 }
1594
1595 int journal_file_next_entry_for_data(
1596                 JournalFile *f,
1597                 Object *o, uint64_t p,
1598                 uint64_t data_offset,
1599                 direction_t direction,
1600                 Object **ret, uint64_t *offset) {
1601
1602         uint64_t n, i;
1603         int r;
1604         Object *d;
1605
1606         assert(f);
1607         assert(p > 0 || !o);
1608
1609         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1610         if (r < 0)
1611                 return r;
1612
1613         n = le64toh(d->data.n_entries);
1614         if (n <= 0)
1615                 return n;
1616
1617         if (!o)
1618                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1619         else {
1620                 if (o->object.type != OBJECT_ENTRY)
1621                         return -EINVAL;
1622
1623                 r = generic_array_bisect_plus_one(f,
1624                                                   le64toh(d->data.entry_offset),
1625                                                   le64toh(d->data.entry_array_offset),
1626                                                   le64toh(d->data.n_entries),
1627                                                   p,
1628                                                   test_object_offset,
1629                                                   DIRECTION_DOWN,
1630                                                   NULL, NULL,
1631                                                   &i);
1632
1633                 if (r <= 0)
1634                         return r;
1635
1636                 if (direction == DIRECTION_DOWN) {
1637                         if (i >= n - 1)
1638                                 return 0;
1639
1640                         i++;
1641                 } else {
1642                         if (i <= 0)
1643                                 return 0;
1644
1645                         i--;
1646                 }
1647
1648         }
1649
1650         return generic_array_get_plus_one(f,
1651                                           le64toh(d->data.entry_offset),
1652                                           le64toh(d->data.entry_array_offset),
1653                                           i,
1654                                           ret, offset);
1655 }
1656
1657 int journal_file_move_to_entry_by_offset_for_data(
1658                 JournalFile *f,
1659                 uint64_t data_offset,
1660                 uint64_t p,
1661                 direction_t direction,
1662                 Object **ret, uint64_t *offset) {
1663
1664         int r;
1665         Object *d;
1666
1667         assert(f);
1668
1669         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1670         if (r < 0)
1671                 return r;
1672
1673         return generic_array_bisect_plus_one(f,
1674                                              le64toh(d->data.entry_offset),
1675                                              le64toh(d->data.entry_array_offset),
1676                                              le64toh(d->data.n_entries),
1677                                              p,
1678                                              test_object_offset,
1679                                              direction,
1680                                              ret, offset, NULL);
1681 }
1682
1683 int journal_file_move_to_entry_by_monotonic_for_data(
1684                 JournalFile *f,
1685                 uint64_t data_offset,
1686                 sd_id128_t boot_id,
1687                 uint64_t monotonic,
1688                 direction_t direction,
1689                 Object **ret, uint64_t *offset) {
1690
1691         char t[9+32+1] = "_BOOT_ID=";
1692         Object *o, *d;
1693         int r;
1694         uint64_t b, z;
1695
1696         assert(f);
1697
1698         /* First, seek by time */
1699         sd_id128_to_string(boot_id, t + 9);
1700         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1701         if (r < 0)
1702                 return r;
1703         if (r == 0)
1704                 return -ENOENT;
1705
1706         r = generic_array_bisect_plus_one(f,
1707                                           le64toh(o->data.entry_offset),
1708                                           le64toh(o->data.entry_array_offset),
1709                                           le64toh(o->data.n_entries),
1710                                           monotonic,
1711                                           test_object_monotonic,
1712                                           direction,
1713                                           NULL, &z, NULL);
1714         if (r <= 0)
1715                 return r;
1716
1717         /* And now, continue seeking until we find an entry that
1718          * exists in both bisection arrays */
1719
1720         for (;;) {
1721                 Object *qo;
1722                 uint64_t p, q;
1723
1724                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1725                 if (r < 0)
1726                         return r;
1727
1728                 r = generic_array_bisect_plus_one(f,
1729                                                   le64toh(d->data.entry_offset),
1730                                                   le64toh(d->data.entry_array_offset),
1731                                                   le64toh(d->data.n_entries),
1732                                                   z,
1733                                                   test_object_offset,
1734                                                   direction,
1735                                                   NULL, &p, NULL);
1736                 if (r <= 0)
1737                         return r;
1738
1739                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1740                 if (r < 0)
1741                         return r;
1742
1743                 r = generic_array_bisect_plus_one(f,
1744                                                   le64toh(o->data.entry_offset),
1745                                                   le64toh(o->data.entry_array_offset),
1746                                                   le64toh(o->data.n_entries),
1747                                                   p,
1748                                                   test_object_offset,
1749                                                   direction,
1750                                                   &qo, &q, NULL);
1751
1752                 if (r <= 0)
1753                         return r;
1754
1755                 if (p == q) {
1756                         if (ret)
1757                                 *ret = qo;
1758                         if (offset)
1759                                 *offset = q;
1760
1761                         return 1;
1762                 }
1763
1764                 z = q;
1765         }
1766
1767         return 0;
1768 }
1769
1770 int journal_file_move_to_entry_by_seqnum_for_data(
1771                 JournalFile *f,
1772                 uint64_t data_offset,
1773                 uint64_t seqnum,
1774                 direction_t direction,
1775                 Object **ret, uint64_t *offset) {
1776
1777         Object *d;
1778         int r;
1779
1780         assert(f);
1781
1782         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1783         if (r < 0)
1784                 return r;
1785
1786         return generic_array_bisect_plus_one(f,
1787                                              le64toh(d->data.entry_offset),
1788                                              le64toh(d->data.entry_array_offset),
1789                                              le64toh(d->data.n_entries),
1790                                              seqnum,
1791                                              test_object_seqnum,
1792                                              direction,
1793                                              ret, offset, NULL);
1794 }
1795
1796 int journal_file_move_to_entry_by_realtime_for_data(
1797                 JournalFile *f,
1798                 uint64_t data_offset,
1799                 uint64_t realtime,
1800                 direction_t direction,
1801                 Object **ret, uint64_t *offset) {
1802
1803         Object *d;
1804         int r;
1805
1806         assert(f);
1807
1808         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1809         if (r < 0)
1810                 return r;
1811
1812         return generic_array_bisect_plus_one(f,
1813                                              le64toh(d->data.entry_offset),
1814                                              le64toh(d->data.entry_array_offset),
1815                                              le64toh(d->data.n_entries),
1816                                              realtime,
1817                                              test_object_realtime,
1818                                              direction,
1819                                              ret, offset, NULL);
1820 }
1821
1822 void journal_file_dump(JournalFile *f) {
1823         Object *o;
1824         int r;
1825         uint64_t p;
1826
1827         assert(f);
1828
1829         journal_file_print_header(f);
1830
1831         p = le64toh(f->header->header_size);
1832         while (p != 0) {
1833                 r = journal_file_move_to_object(f, -1, p, &o);
1834                 if (r < 0)
1835                         goto fail;
1836
1837                 switch (o->object.type) {
1838
1839                 case OBJECT_UNUSED:
1840                         printf("Type: OBJECT_UNUSED\n");
1841                         break;
1842
1843                 case OBJECT_DATA:
1844                         printf("Type: OBJECT_DATA\n");
1845                         break;
1846
1847                 case OBJECT_ENTRY:
1848                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1849                                (unsigned long long) le64toh(o->entry.seqnum),
1850                                (unsigned long long) le64toh(o->entry.monotonic),
1851                                (unsigned long long) le64toh(o->entry.realtime));
1852                         break;
1853
1854                 case OBJECT_FIELD_HASH_TABLE:
1855                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1856                         break;
1857
1858                 case OBJECT_DATA_HASH_TABLE:
1859                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1860                         break;
1861
1862                 case OBJECT_ENTRY_ARRAY:
1863                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1864                         break;
1865
1866                 case OBJECT_TAG:
1867                         printf("Type: OBJECT_TAG %llu\n",
1868                                (unsigned long long) le64toh(o->tag.seqnum));
1869                         break;
1870                 }
1871
1872                 if (o->object.flags & OBJECT_COMPRESSED)
1873                         printf("Flags: COMPRESSED\n");
1874
1875                 if (p == le64toh(f->header->tail_object_offset))
1876                         p = 0;
1877                 else
1878                         p = p + ALIGN64(le64toh(o->object.size));
1879         }
1880
1881         return;
1882 fail:
1883         log_error("File corrupt");
1884 }
1885
1886 void journal_file_print_header(JournalFile *f) {
1887         char a[33], b[33], c[33];
1888         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1889
1890         assert(f);
1891
1892         printf("File Path: %s\n"
1893                "File ID: %s\n"
1894                "Machine ID: %s\n"
1895                "Boot ID: %s\n"
1896                "Sequential Number ID: %s\n"
1897                "State: %s\n"
1898                "Compatible Flags:%s%s\n"
1899                "Incompatible Flags:%s%s\n"
1900                "Header size: %llu\n"
1901                "Arena size: %llu\n"
1902                "Data Hash Table Size: %llu\n"
1903                "Field Hash Table Size: %llu\n"
1904                "Rotate Suggested: %s\n"
1905                "Head Sequential Number: %llu\n"
1906                "Tail Sequential Number: %llu\n"
1907                "Head Realtime Timestamp: %s\n"
1908                "Tail Realtime Timestamp: %s\n"
1909                "Objects: %llu\n"
1910                "Entry Objects: %llu\n",
1911                f->path,
1912                sd_id128_to_string(f->header->file_id, a),
1913                sd_id128_to_string(f->header->machine_id, b),
1914                sd_id128_to_string(f->header->boot_id, c),
1915                sd_id128_to_string(f->header->seqnum_id, c),
1916                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1917                f->header->state == STATE_ONLINE ? "ONLINE" :
1918                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1919                (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1920                (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1921                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1922                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1923                (unsigned long long) le64toh(f->header->header_size),
1924                (unsigned long long) le64toh(f->header->arena_size),
1925                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1926                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1927                yes_no(journal_file_rotate_suggested(f)),
1928                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1929                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1930                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1931                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1932                (unsigned long long) le64toh(f->header->n_objects),
1933                (unsigned long long) le64toh(f->header->n_entries));
1934
1935         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1936                 printf("Data Objects: %llu\n"
1937                        "Data Hash Table Fill: %.1f%%\n",
1938                        (unsigned long long) le64toh(f->header->n_data),
1939                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1940
1941         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1942                 printf("Field Objects: %llu\n"
1943                        "Field Hash Table Fill: %.1f%%\n",
1944                        (unsigned long long) le64toh(f->header->n_fields),
1945                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1946
1947         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1948                 printf("Tag Objects: %llu\n",
1949                        (unsigned long long) le64toh(f->header->n_tags));
1950         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1951                 printf("Entry Array Objects: %llu\n",
1952                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1953 }
1954
1955 int journal_file_open(
1956                 const char *fname,
1957                 int flags,
1958                 mode_t mode,
1959                 bool compress,
1960                 bool seal,
1961                 JournalMetrics *metrics,
1962                 MMapCache *mmap_cache,
1963                 JournalFile *template,
1964                 JournalFile **ret) {
1965
1966         JournalFile *f;
1967         int r;
1968         bool newly_created = false;
1969
1970         assert(fname);
1971
1972         if ((flags & O_ACCMODE) != O_RDONLY &&
1973             (flags & O_ACCMODE) != O_RDWR)
1974                 return -EINVAL;
1975
1976         if (!endswith(fname, ".journal") &&
1977             !endswith(fname, ".journal~"))
1978                 return -EINVAL;
1979
1980         f = new0(JournalFile, 1);
1981         if (!f)
1982                 return -ENOMEM;
1983
1984         f->fd = -1;
1985         f->mode = mode;
1986
1987         f->flags = flags;
1988         f->prot = prot_from_flags(flags);
1989         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1990         f->compress = compress;
1991         f->seal = seal;
1992
1993         if (mmap_cache)
1994                 f->mmap = mmap_cache_ref(mmap_cache);
1995         else {
1996                 /* One context for each type, plus the zeroth catchall
1997                  * context. One fd for the file plus one for each type
1998                  * (which we need during verification */
1999                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
2000                 if (!f->mmap) {
2001                         r = -ENOMEM;
2002                         goto fail;
2003                 }
2004         }
2005
2006         f->path = strdup(fname);
2007         if (!f->path) {
2008                 r = -ENOMEM;
2009                 goto fail;
2010         }
2011
2012         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2013         if (f->fd < 0) {
2014                 r = -errno;
2015                 goto fail;
2016         }
2017
2018         if (fstat(f->fd, &f->last_stat) < 0) {
2019                 r = -errno;
2020                 goto fail;
2021         }
2022
2023         if (f->last_stat.st_size == 0 && f->writable) {
2024                 newly_created = true;
2025
2026                 /* Try to load the FSPRG state, and if we can't, then
2027                  * just don't do sealing */
2028                 r = journal_file_fss_load(f);
2029                 if (r < 0)
2030                         f->seal = false;
2031
2032                 r = journal_file_init_header(f, template);
2033                 if (r < 0)
2034                         goto fail;
2035
2036                 if (fstat(f->fd, &f->last_stat) < 0) {
2037                         r = -errno;
2038                         goto fail;
2039                 }
2040         }
2041
2042         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2043                 r = -EIO;
2044                 goto fail;
2045         }
2046
2047         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2048         if (f->header == MAP_FAILED) {
2049                 f->header = NULL;
2050                 r = -errno;
2051                 goto fail;
2052         }
2053
2054         if (!newly_created) {
2055                 r = journal_file_verify_header(f);
2056                 if (r < 0)
2057                         goto fail;
2058         }
2059
2060         if (!newly_created && f->writable) {
2061                 r = journal_file_fss_load(f);
2062                 if (r < 0)
2063                         goto fail;
2064         }
2065
2066         if (f->writable) {
2067                 if (metrics) {
2068                         journal_default_metrics(metrics, f->fd);
2069                         f->metrics = *metrics;
2070                 } else if (template)
2071                         f->metrics = template->metrics;
2072
2073                 r = journal_file_refresh_header(f);
2074                 if (r < 0)
2075                         goto fail;
2076         }
2077
2078         r = journal_file_hmac_setup(f);
2079         if (r < 0)
2080                 goto fail;
2081
2082         if (newly_created) {
2083                 r = journal_file_setup_field_hash_table(f);
2084                 if (r < 0)
2085                         goto fail;
2086
2087                 r = journal_file_setup_data_hash_table(f);
2088                 if (r < 0)
2089                         goto fail;
2090
2091                 r = journal_file_append_first_tag(f);
2092                 if (r < 0)
2093                         goto fail;
2094         }
2095
2096         r = journal_file_map_field_hash_table(f);
2097         if (r < 0)
2098                 goto fail;
2099
2100         r = journal_file_map_data_hash_table(f);
2101         if (r < 0)
2102                 goto fail;
2103
2104         if (ret)
2105                 *ret = f;
2106
2107         return 0;
2108
2109 fail:
2110         journal_file_close(f);
2111
2112         return r;
2113 }
2114
2115 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2116         char *p;
2117         size_t l;
2118         JournalFile *old_file, *new_file = NULL;
2119         int r;
2120
2121         assert(f);
2122         assert(*f);
2123
2124         old_file = *f;
2125
2126         if (!old_file->writable)
2127                 return -EINVAL;
2128
2129         if (!endswith(old_file->path, ".journal"))
2130                 return -EINVAL;
2131
2132         l = strlen(old_file->path);
2133
2134         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2135         if (!p)
2136                 return -ENOMEM;
2137
2138         memcpy(p, old_file->path, l - 8);
2139         p[l-8] = '@';
2140         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2141         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2142                  "-%016llx-%016llx.journal",
2143                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2144                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2145
2146         r = rename(old_file->path, p);
2147         free(p);
2148
2149         if (r < 0)
2150                 return -errno;
2151
2152         old_file->header->state = STATE_ARCHIVED;
2153
2154         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2155         journal_file_close(old_file);
2156
2157         *f = new_file;
2158         return r;
2159 }
2160
2161 int journal_file_open_reliably(
2162                 const char *fname,
2163                 int flags,
2164                 mode_t mode,
2165                 bool compress,
2166                 bool seal,
2167                 JournalMetrics *metrics,
2168                 MMapCache *mmap_cache,
2169                 JournalFile *template,
2170                 JournalFile **ret) {
2171
2172         int r;
2173         size_t l;
2174         char *p;
2175
2176         r = journal_file_open(fname, flags, mode, compress, seal,
2177                               metrics, mmap_cache, template, ret);
2178         if (r != -EBADMSG && /* corrupted */
2179             r != -ENODATA && /* truncated */
2180             r != -EHOSTDOWN && /* other machine */
2181             r != -EPROTONOSUPPORT && /* incompatible feature */
2182             r != -EBUSY && /* unclean shutdown */
2183             r != -ESHUTDOWN /* already archived */)
2184                 return r;
2185
2186         if ((flags & O_ACCMODE) == O_RDONLY)
2187                 return r;
2188
2189         if (!(flags & O_CREAT))
2190                 return r;
2191
2192         if (!endswith(fname, ".journal"))
2193                 return r;
2194
2195         /* The file is corrupted. Rotate it away and try it again (but only once) */
2196
2197         l = strlen(fname);
2198         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2199                      (int) (l-8), fname,
2200                      (unsigned long long) now(CLOCK_REALTIME),
2201                      random_ull()) < 0)
2202                 return -ENOMEM;
2203
2204         r = rename(fname, p);
2205         free(p);
2206         if (r < 0)
2207                 return -errno;
2208
2209         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2210
2211         return journal_file_open(fname, flags, mode, compress, seal,
2212                                  metrics, mmap_cache, template, ret);
2213 }
2214
2215
2216 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2217         uint64_t i, n;
2218         uint64_t q, xor_hash = 0;
2219         int r;
2220         EntryItem *items;
2221         dual_timestamp ts;
2222
2223         assert(from);
2224         assert(to);
2225         assert(o);
2226         assert(p);
2227
2228         if (!to->writable)
2229                 return -EPERM;
2230
2231         ts.monotonic = le64toh(o->entry.monotonic);
2232         ts.realtime = le64toh(o->entry.realtime);
2233
2234         if (to->tail_entry_monotonic_valid &&
2235             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2236                 return -EINVAL;
2237
2238         n = journal_file_entry_n_items(o);
2239         items = alloca(sizeof(EntryItem) * n);
2240
2241         for (i = 0; i < n; i++) {
2242                 uint64_t l, h;
2243                 le64_t le_hash;
2244                 size_t t;
2245                 void *data;
2246                 Object *u;
2247
2248                 q = le64toh(o->entry.items[i].object_offset);
2249                 le_hash = o->entry.items[i].hash;
2250
2251                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2252                 if (r < 0)
2253                         return r;
2254
2255                 if (le_hash != o->data.hash)
2256                         return -EBADMSG;
2257
2258                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2259                 t = (size_t) l;
2260
2261                 /* We hit the limit on 32bit machines */
2262                 if ((uint64_t) t != l)
2263                         return -E2BIG;
2264
2265                 if (o->object.flags & OBJECT_COMPRESSED) {
2266 #ifdef HAVE_XZ
2267                         uint64_t rsize;
2268
2269                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2270                                 return -EBADMSG;
2271
2272                         data = from->compress_buffer;
2273                         l = rsize;
2274 #else
2275                         return -EPROTONOSUPPORT;
2276 #endif
2277                 } else
2278                         data = o->data.payload;
2279
2280                 r = journal_file_append_data(to, data, l, &u, &h);
2281                 if (r < 0)
2282                         return r;
2283
2284                 xor_hash ^= le64toh(u->data.hash);
2285                 items[i].object_offset = htole64(h);
2286                 items[i].hash = u->data.hash;
2287
2288                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2289                 if (r < 0)
2290                         return r;
2291         }
2292
2293         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2294 }
2295
2296 void journal_default_metrics(JournalMetrics *m, int fd) {
2297         uint64_t fs_size = 0;
2298         struct statvfs ss;
2299         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2300
2301         assert(m);
2302         assert(fd >= 0);
2303
2304         if (fstatvfs(fd, &ss) >= 0)
2305                 fs_size = ss.f_frsize * ss.f_blocks;
2306
2307         if (m->max_use == (uint64_t) -1) {
2308
2309                 if (fs_size > 0) {
2310                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2311
2312                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2313                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2314
2315                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2316                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2317                 } else
2318                         m->max_use = DEFAULT_MAX_USE_LOWER;
2319         } else {
2320                 m->max_use = PAGE_ALIGN(m->max_use);
2321
2322                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2323                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2324         }
2325
2326         if (m->max_size == (uint64_t) -1) {
2327                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2328
2329                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2330                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2331         } else
2332                 m->max_size = PAGE_ALIGN(m->max_size);
2333
2334         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2335                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2336
2337         if (m->max_size*2 > m->max_use)
2338                 m->max_use = m->max_size*2;
2339
2340         if (m->min_size == (uint64_t) -1)
2341                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2342         else {
2343                 m->min_size = PAGE_ALIGN(m->min_size);
2344
2345                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2346                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2347
2348                 if (m->min_size > m->max_size)
2349                         m->max_size = m->min_size;
2350         }
2351
2352         if (m->keep_free == (uint64_t) -1) {
2353
2354                 if (fs_size > 0) {
2355                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2356
2357                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2358                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2359
2360                 } else
2361                         m->keep_free = DEFAULT_KEEP_FREE;
2362         }
2363
2364         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2365                  format_bytes(a, sizeof(a), m->max_use),
2366                  format_bytes(b, sizeof(b), m->max_size),
2367                  format_bytes(c, sizeof(c), m->min_size),
2368                  format_bytes(d, sizeof(d), m->keep_free));
2369 }
2370
2371 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2372         assert(f);
2373         assert(from || to);
2374
2375         if (from) {
2376                 if (f->header->head_entry_realtime == 0)
2377                         return -ENOENT;
2378
2379                 *from = le64toh(f->header->head_entry_realtime);
2380         }
2381
2382         if (to) {
2383                 if (f->header->tail_entry_realtime == 0)
2384                         return -ENOENT;
2385
2386                 *to = le64toh(f->header->tail_entry_realtime);
2387         }
2388
2389         return 1;
2390 }
2391
2392 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2393         char t[9+32+1] = "_BOOT_ID=";
2394         Object *o;
2395         uint64_t p;
2396         int r;
2397
2398         assert(f);
2399         assert(from || to);
2400
2401         sd_id128_to_string(boot_id, t + 9);
2402
2403         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2404         if (r <= 0)
2405                 return r;
2406
2407         if (le64toh(o->data.n_entries) <= 0)
2408                 return 0;
2409
2410         if (from) {
2411                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2412                 if (r < 0)
2413                         return r;
2414
2415                 *from = le64toh(o->entry.monotonic);
2416         }
2417
2418         if (to) {
2419                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2420                 if (r < 0)
2421                         return r;
2422
2423                 r = generic_array_get_plus_one(f,
2424                                                le64toh(o->data.entry_offset),
2425                                                le64toh(o->data.entry_array_offset),
2426                                                le64toh(o->data.n_entries)-1,
2427                                                &o, NULL);
2428                 if (r <= 0)
2429                         return r;
2430
2431                 *to = le64toh(o->entry.monotonic);
2432         }
2433
2434         return 1;
2435 }
2436
2437 bool journal_file_rotate_suggested(JournalFile *f) {
2438         assert(f);
2439
2440         /* If we gained new header fields we gained new features,
2441          * hence suggest a rotation */
2442         if (le64toh(f->header->header_size) < sizeof(Header)) {
2443                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2444                 return true;
2445         }
2446
2447         /* Let's check if the hash tables grew over a certain fill
2448          * level (75%, borrowing this value from Java's hash table
2449          * implementation), and if so suggest a rotation. To calculate
2450          * the fill level we need the n_data field, which only exists
2451          * in newer versions. */
2452
2453         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2454                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2455                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2456                                   f->path,
2457                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2458                                   (unsigned long long) le64toh(f->header->n_data),
2459                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2460                                   (unsigned long long) (f->last_stat.st_size),
2461                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2462                         return true;
2463                 }
2464
2465         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2466                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2467                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2468                                   f->path,
2469                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2470                                   (unsigned long long) le64toh(f->header->n_fields),
2471                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2472                         return true;
2473                 }
2474
2475         return false;
2476 }