chiark / gitweb /
journal: split up journal-file.c
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->authenticate)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fsprg_header)
100                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
101
102         if (f->hmac)
103                 gcry_md_close(f->hmac);
104 #endif
105
106         free(f);
107 }
108
109 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
110         Header h;
111         ssize_t k;
112         int r;
113
114         assert(f);
115
116         zero(h);
117         memcpy(h.signature, HEADER_SIGNATURE, 8);
118         h.header_size = htole64(ALIGN64(sizeof(h)));
119
120         h.incompatible_flags =
121                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
122
123         h.compatible_flags =
124                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
125
126         r = sd_id128_randomize(&h.file_id);
127         if (r < 0)
128                 return r;
129
130         if (template) {
131                 h.seqnum_id = template->header->seqnum_id;
132                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
133         } else
134                 h.seqnum_id = h.file_id;
135
136         k = pwrite(f->fd, &h, sizeof(h), 0);
137         if (k < 0)
138                 return -errno;
139
140         if (k != sizeof(h))
141                 return -EIO;
142
143         return 0;
144 }
145
146 static int journal_file_refresh_header(JournalFile *f) {
147         int r;
148         sd_id128_t boot_id;
149
150         assert(f);
151
152         r = sd_id128_get_machine(&f->header->machine_id);
153         if (r < 0)
154                 return r;
155
156         r = sd_id128_get_boot(&boot_id);
157         if (r < 0)
158                 return r;
159
160         if (sd_id128_equal(boot_id, f->header->boot_id))
161                 f->tail_entry_monotonic_valid = true;
162
163         f->header->boot_id = boot_id;
164
165         f->header->state = STATE_ONLINE;
166
167         /* Sync the online state to disk */
168         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
169         fdatasync(f->fd);
170
171         return 0;
172 }
173
174 static int journal_file_verify_header(JournalFile *f) {
175         assert(f);
176
177         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
178                 return -EBADMSG;
179
180         /* In both read and write mode we refuse to open files with
181          * incompatible flags we don't know */
182 #ifdef HAVE_XZ
183         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
184                 return -EPROTONOSUPPORT;
185 #else
186         if (f->header->incompatible_flags != 0)
187                 return -EPROTONOSUPPORT;
188 #endif
189
190         /* When open for writing we refuse to open files with
191          * compatible flags, too */
192         if (f->writable) {
193 #ifdef HAVE_GCRYPT
194                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
195                         return -EPROTONOSUPPORT;
196 #else
197                 if (f->header->compatible_flags != 0)
198                         return -EPROTONOSUPPORT;
199 #endif
200         }
201
202         /* The first addition was n_data, so check that we are at least this large */
203         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
204                 return -EBADMSG;
205
206         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
207                 !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
208                 return -EBADMSG;
209
210         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
211                 return -ENODATA;
212
213         if (f->writable) {
214                 uint8_t state;
215                 sd_id128_t machine_id;
216                 int r;
217
218                 r = sd_id128_get_machine(&machine_id);
219                 if (r < 0)
220                         return r;
221
222                 if (!sd_id128_equal(machine_id, f->header->machine_id))
223                         return -EHOSTDOWN;
224
225                 state = f->header->state;
226
227                 if (state == STATE_ONLINE) {
228                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229                         return -EBUSY;
230                 } else if (state == STATE_ARCHIVED)
231                         return -ESHUTDOWN;
232                 else if (state != STATE_OFFLINE) {
233                         log_debug("Journal file %s has unknown state %u.", f->path, state);
234                         return -EBUSY;
235                 }
236         }
237
238         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
241         return 0;
242 }
243
244 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
245         uint64_t old_size, new_size;
246         int r;
247
248         assert(f);
249
250         /* We assume that this file is not sparse, and we know that
251          * for sure, since we always call posix_fallocate()
252          * ourselves */
253
254         old_size =
255                 le64toh(f->header->header_size) +
256                 le64toh(f->header->arena_size);
257
258         new_size = PAGE_ALIGN(offset + size);
259         if (new_size < le64toh(f->header->header_size))
260                 new_size = le64toh(f->header->header_size);
261
262         if (new_size <= old_size)
263                 return 0;
264
265         if (f->metrics.max_size > 0 &&
266             new_size > f->metrics.max_size)
267                 return -E2BIG;
268
269         if (new_size > f->metrics.min_size &&
270             f->metrics.keep_free > 0) {
271                 struct statvfs svfs;
272
273                 if (fstatvfs(f->fd, &svfs) >= 0) {
274                         uint64_t available;
275
276                         available = svfs.f_bfree * svfs.f_bsize;
277
278                         if (available >= f->metrics.keep_free)
279                                 available -= f->metrics.keep_free;
280                         else
281                                 available = 0;
282
283                         if (new_size - old_size > available)
284                                 return -E2BIG;
285                 }
286         }
287
288         /* Note that the glibc fallocate() fallback is very
289            inefficient, hence we try to minimize the allocation area
290            as we can. */
291         r = posix_fallocate(f->fd, old_size, new_size - old_size);
292         if (r != 0)
293                 return -r;
294
295         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
296
297         if (fstat(f->fd, &f->last_stat) < 0)
298                 return -errno;
299
300         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
301
302         return 0;
303 }
304
305 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
306         assert(f);
307         assert(ret);
308
309         /* Avoid SIGBUS on invalid accesses */
310         if (offset + size > (uint64_t) f->last_stat.st_size) {
311                 /* Hmm, out of range? Let's refresh the fstat() data
312                  * first, before we trust that check. */
313
314                 if (fstat(f->fd, &f->last_stat) < 0 ||
315                     offset + size > (uint64_t) f->last_stat.st_size)
316                         return -EADDRNOTAVAIL;
317         }
318
319         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
320 }
321
322 static bool verify_hash(Object *o) {
323         uint64_t h1, h2;
324
325         assert(o);
326
327         if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
328                 h1 = le64toh(o->data.hash);
329                 h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
330         } else if (o->object.type == OBJECT_FIELD) {
331                 h1 = le64toh(o->field.hash);
332                 h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
333         } else
334                 return true;
335
336         return h1 == h2;
337 }
338
339 static uint64_t minimum_header_size(Object *o) {
340
341         static uint64_t table[] = {
342                 [OBJECT_DATA] = sizeof(DataObject),
343                 [OBJECT_FIELD] = sizeof(FieldObject),
344                 [OBJECT_ENTRY] = sizeof(EntryObject),
345                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
346                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
347                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
348                 [OBJECT_TAG] = sizeof(TagObject),
349         };
350
351         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
352                 return sizeof(ObjectHeader);
353
354         return table[o->object.type];
355 }
356
357 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
358         int r;
359         void *t;
360         Object *o;
361         uint64_t s;
362         unsigned context;
363
364         assert(f);
365         assert(ret);
366
367         /* One context for each type, plus one catch-all for the rest */
368         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
369
370         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
371         if (r < 0)
372                 return r;
373
374         o = (Object*) t;
375         s = le64toh(o->object.size);
376
377         if (s < sizeof(ObjectHeader))
378                 return -EBADMSG;
379
380         if (o->object.type <= OBJECT_UNUSED)
381                 return -EBADMSG;
382
383         if (s < minimum_header_size(o))
384                 return -EBADMSG;
385
386         if (type >= 0 && o->object.type != type)
387                 return -EBADMSG;
388
389         if (s > sizeof(ObjectHeader)) {
390                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
391                 if (r < 0)
392                         return r;
393
394                 o = (Object*) t;
395         }
396
397         if (!verify_hash(o))
398                 return -EBADMSG;
399
400         *ret = o;
401         return 0;
402 }
403
404 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
405         uint64_t r;
406
407         assert(f);
408
409         r = le64toh(f->header->tail_entry_seqnum) + 1;
410
411         if (seqnum) {
412                 /* If an external seqnum counter was passed, we update
413                  * both the local and the external one, and set it to
414                  * the maximum of both */
415
416                 if (*seqnum + 1 > r)
417                         r = *seqnum + 1;
418
419                 *seqnum = r;
420         }
421
422         f->header->tail_entry_seqnum = htole64(r);
423
424         if (f->header->head_entry_seqnum == 0)
425                 f->header->head_entry_seqnum = htole64(r);
426
427         return r;
428 }
429
430 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
431         int r;
432         uint64_t p;
433         Object *tail, *o;
434         void *t;
435
436         assert(f);
437         assert(type > 0 && type < _OBJECT_TYPE_MAX);
438         assert(size >= sizeof(ObjectHeader));
439         assert(offset);
440         assert(ret);
441
442         p = le64toh(f->header->tail_object_offset);
443         if (p == 0)
444                 p = le64toh(f->header->header_size);
445         else {
446                 r = journal_file_move_to_object(f, -1, p, &tail);
447                 if (r < 0)
448                         return r;
449
450                 p += ALIGN64(le64toh(tail->object.size));
451         }
452
453         r = journal_file_allocate(f, p, size);
454         if (r < 0)
455                 return r;
456
457         r = journal_file_move_to(f, type, p, size, &t);
458         if (r < 0)
459                 return r;
460
461         o = (Object*) t;
462
463         zero(o->object);
464         o->object.type = type;
465         o->object.size = htole64(size);
466
467         f->header->tail_object_offset = htole64(p);
468         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
469
470         *ret = o;
471         *offset = p;
472
473         return 0;
474 }
475
476 static int journal_file_setup_data_hash_table(JournalFile *f) {
477         uint64_t s, p;
478         Object *o;
479         int r;
480
481         assert(f);
482
483         /* We estimate that we need 1 hash table entry per 768 of
484            journal file and we want to make sure we never get beyond
485            75% fill level. Calculate the hash table size for the
486            maximum file size based on these metrics. */
487
488         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
489         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
490                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
491
492         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
493
494         r = journal_file_append_object(f,
495                                        OBJECT_DATA_HASH_TABLE,
496                                        offsetof(Object, hash_table.items) + s,
497                                        &o, &p);
498         if (r < 0)
499                 return r;
500
501         memset(o->hash_table.items, 0, s);
502
503         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
504         f->header->data_hash_table_size = htole64(s);
505
506         return 0;
507 }
508
509 static int journal_file_setup_field_hash_table(JournalFile *f) {
510         uint64_t s, p;
511         Object *o;
512         int r;
513
514         assert(f);
515
516         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
517         r = journal_file_append_object(f,
518                                        OBJECT_FIELD_HASH_TABLE,
519                                        offsetof(Object, hash_table.items) + s,
520                                        &o, &p);
521         if (r < 0)
522                 return r;
523
524         memset(o->hash_table.items, 0, s);
525
526         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
527         f->header->field_hash_table_size = htole64(s);
528
529         return 0;
530 }
531
532 static int journal_file_map_data_hash_table(JournalFile *f) {
533         uint64_t s, p;
534         void *t;
535         int r;
536
537         assert(f);
538
539         p = le64toh(f->header->data_hash_table_offset);
540         s = le64toh(f->header->data_hash_table_size);
541
542         r = journal_file_move_to(f,
543                                  OBJECT_DATA_HASH_TABLE,
544                                  p, s,
545                                  &t);
546         if (r < 0)
547                 return r;
548
549         f->data_hash_table = t;
550         return 0;
551 }
552
553 static int journal_file_map_field_hash_table(JournalFile *f) {
554         uint64_t s, p;
555         void *t;
556         int r;
557
558         assert(f);
559
560         p = le64toh(f->header->field_hash_table_offset);
561         s = le64toh(f->header->field_hash_table_size);
562
563         r = journal_file_move_to(f,
564                                  OBJECT_FIELD_HASH_TABLE,
565                                  p, s,
566                                  &t);
567         if (r < 0)
568                 return r;
569
570         f->field_hash_table = t;
571         return 0;
572 }
573
574 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
575         uint64_t p, h;
576         int r;
577
578         assert(f);
579         assert(o);
580         assert(offset > 0);
581         assert(o->object.type == OBJECT_DATA);
582
583         /* This might alter the window we are looking at */
584
585         o->data.next_hash_offset = o->data.next_field_offset = 0;
586         o->data.entry_offset = o->data.entry_array_offset = 0;
587         o->data.n_entries = 0;
588
589         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
590         p = le64toh(f->data_hash_table[h].tail_hash_offset);
591         if (p == 0) {
592                 /* Only entry in the hash table is easy */
593                 f->data_hash_table[h].head_hash_offset = htole64(offset);
594         } else {
595                 /* Move back to the previous data object, to patch in
596                  * pointer */
597
598                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
599                 if (r < 0)
600                         return r;
601
602                 o->data.next_hash_offset = htole64(offset);
603         }
604
605         f->data_hash_table[h].tail_hash_offset = htole64(offset);
606
607         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
608                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
609
610         return 0;
611 }
612
613 int journal_file_find_data_object_with_hash(
614                 JournalFile *f,
615                 const void *data, uint64_t size, uint64_t hash,
616                 Object **ret, uint64_t *offset) {
617
618         uint64_t p, osize, h;
619         int r;
620
621         assert(f);
622         assert(data || size == 0);
623
624         osize = offsetof(Object, data.payload) + size;
625
626         if (f->header->data_hash_table_size == 0)
627                 return -EBADMSG;
628
629         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
630         p = le64toh(f->data_hash_table[h].head_hash_offset);
631
632         while (p > 0) {
633                 Object *o;
634
635                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
636                 if (r < 0)
637                         return r;
638
639                 if (le64toh(o->data.hash) != hash)
640                         goto next;
641
642                 if (o->object.flags & OBJECT_COMPRESSED) {
643 #ifdef HAVE_XZ
644                         uint64_t l, rsize;
645
646                         l = le64toh(o->object.size);
647                         if (l <= offsetof(Object, data.payload))
648                                 return -EBADMSG;
649
650                         l -= offsetof(Object, data.payload);
651
652                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
653                                 return -EBADMSG;
654
655                         if (rsize == size &&
656                             memcmp(f->compress_buffer, data, size) == 0) {
657
658                                 if (ret)
659                                         *ret = o;
660
661                                 if (offset)
662                                         *offset = p;
663
664                                 return 1;
665                         }
666 #else
667                         return -EPROTONOSUPPORT;
668 #endif
669
670                 } else if (le64toh(o->object.size) == osize &&
671                            memcmp(o->data.payload, data, size) == 0) {
672
673                         if (ret)
674                                 *ret = o;
675
676                         if (offset)
677                                 *offset = p;
678
679                         return 1;
680                 }
681
682         next:
683                 p = le64toh(o->data.next_hash_offset);
684         }
685
686         return 0;
687 }
688
689 int journal_file_find_data_object(
690                 JournalFile *f,
691                 const void *data, uint64_t size,
692                 Object **ret, uint64_t *offset) {
693
694         uint64_t hash;
695
696         assert(f);
697         assert(data || size == 0);
698
699         hash = hash64(data, size);
700
701         return journal_file_find_data_object_with_hash(f,
702                                                        data, size, hash,
703                                                        ret, offset);
704 }
705
706 static int journal_file_append_data(
707                 JournalFile *f,
708                 const void *data, uint64_t size,
709                 Object **ret, uint64_t *offset) {
710
711         uint64_t hash, p;
712         uint64_t osize;
713         Object *o;
714         int r;
715         bool compressed = false;
716
717         assert(f);
718         assert(data || size == 0);
719
720         hash = hash64(data, size);
721
722         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
723         if (r < 0)
724                 return r;
725         else if (r > 0) {
726
727                 if (ret)
728                         *ret = o;
729
730                 if (offset)
731                         *offset = p;
732
733                 return 0;
734         }
735
736         osize = offsetof(Object, data.payload) + size;
737         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
738         if (r < 0)
739                 return r;
740
741         o->data.hash = htole64(hash);
742
743 #ifdef HAVE_XZ
744         if (f->compress &&
745             size >= COMPRESSION_SIZE_THRESHOLD) {
746                 uint64_t rsize;
747
748                 compressed = compress_blob(data, size, o->data.payload, &rsize);
749
750                 if (compressed) {
751                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
752                         o->object.flags |= OBJECT_COMPRESSED;
753
754                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
755                 }
756         }
757 #endif
758
759         if (!compressed && size > 0)
760                 memcpy(o->data.payload, data, size);
761
762         r = journal_file_link_data(f, o, p, hash);
763         if (r < 0)
764                 return r;
765
766         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
767         if (r < 0)
768                 return r;
769
770         /* The linking might have altered the window, so let's
771          * refresh our pointer */
772         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
773         if (r < 0)
774                 return r;
775
776         if (ret)
777                 *ret = o;
778
779         if (offset)
780                 *offset = p;
781
782         return 0;
783 }
784
785 uint64_t journal_file_entry_n_items(Object *o) {
786         assert(o);
787         assert(o->object.type == OBJECT_ENTRY);
788
789         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
790 }
791
792 uint64_t journal_file_entry_array_n_items(Object *o) {
793         assert(o);
794         assert(o->object.type == OBJECT_ENTRY_ARRAY);
795
796         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
797 }
798
799 static int link_entry_into_array(JournalFile *f,
800                                  le64_t *first,
801                                  le64_t *idx,
802                                  uint64_t p) {
803         int r;
804         uint64_t n = 0, ap = 0, q, i, a, hidx;
805         Object *o;
806
807         assert(f);
808         assert(first);
809         assert(idx);
810         assert(p > 0);
811
812         a = le64toh(*first);
813         i = hidx = le64toh(*idx);
814         while (a > 0) {
815
816                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
817                 if (r < 0)
818                         return r;
819
820                 n = journal_file_entry_array_n_items(o);
821                 if (i < n) {
822                         o->entry_array.items[i] = htole64(p);
823                         *idx = htole64(hidx + 1);
824                         return 0;
825                 }
826
827                 i -= n;
828                 ap = a;
829                 a = le64toh(o->entry_array.next_entry_array_offset);
830         }
831
832         if (hidx > n)
833                 n = (hidx+1) * 2;
834         else
835                 n = n * 2;
836
837         if (n < 4)
838                 n = 4;
839
840         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
841                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
842                                        &o, &q);
843         if (r < 0)
844                 return r;
845
846         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
847         if (r < 0)
848                 return r;
849
850         o->entry_array.items[i] = htole64(p);
851
852         if (ap == 0)
853                 *first = htole64(q);
854         else {
855                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
856                 if (r < 0)
857                         return r;
858
859                 o->entry_array.next_entry_array_offset = htole64(q);
860         }
861
862         *idx = htole64(hidx + 1);
863
864         return 0;
865 }
866
867 static int link_entry_into_array_plus_one(JournalFile *f,
868                                           le64_t *extra,
869                                           le64_t *first,
870                                           le64_t *idx,
871                                           uint64_t p) {
872
873         int r;
874
875         assert(f);
876         assert(extra);
877         assert(first);
878         assert(idx);
879         assert(p > 0);
880
881         if (*idx == 0)
882                 *extra = htole64(p);
883         else {
884                 le64_t i;
885
886                 i = htole64(le64toh(*idx) - 1);
887                 r = link_entry_into_array(f, first, &i, p);
888                 if (r < 0)
889                         return r;
890         }
891
892         *idx = htole64(le64toh(*idx) + 1);
893         return 0;
894 }
895
896 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
897         uint64_t p;
898         int r;
899         assert(f);
900         assert(o);
901         assert(offset > 0);
902
903         p = le64toh(o->entry.items[i].object_offset);
904         if (p == 0)
905                 return -EINVAL;
906
907         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
908         if (r < 0)
909                 return r;
910
911         return link_entry_into_array_plus_one(f,
912                                               &o->data.entry_offset,
913                                               &o->data.entry_array_offset,
914                                               &o->data.n_entries,
915                                               offset);
916 }
917
918 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
919         uint64_t n, i;
920         int r;
921
922         assert(f);
923         assert(o);
924         assert(offset > 0);
925         assert(o->object.type == OBJECT_ENTRY);
926
927         __sync_synchronize();
928
929         /* Link up the entry itself */
930         r = link_entry_into_array(f,
931                                   &f->header->entry_array_offset,
932                                   &f->header->n_entries,
933                                   offset);
934         if (r < 0)
935                 return r;
936
937         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
938
939         if (f->header->head_entry_realtime == 0)
940                 f->header->head_entry_realtime = o->entry.realtime;
941
942         f->header->tail_entry_realtime = o->entry.realtime;
943         f->header->tail_entry_monotonic = o->entry.monotonic;
944
945         f->tail_entry_monotonic_valid = true;
946
947         /* Link up the items */
948         n = journal_file_entry_n_items(o);
949         for (i = 0; i < n; i++) {
950                 r = journal_file_link_entry_item(f, o, offset, i);
951                 if (r < 0)
952                         return r;
953         }
954
955         return 0;
956 }
957
958 static int journal_file_append_entry_internal(
959                 JournalFile *f,
960                 const dual_timestamp *ts,
961                 uint64_t xor_hash,
962                 const EntryItem items[], unsigned n_items,
963                 uint64_t *seqnum,
964                 Object **ret, uint64_t *offset) {
965         uint64_t np;
966         uint64_t osize;
967         Object *o;
968         int r;
969
970         assert(f);
971         assert(items || n_items == 0);
972         assert(ts);
973
974         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
975
976         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
977         if (r < 0)
978                 return r;
979
980         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
981         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
982         o->entry.realtime = htole64(ts->realtime);
983         o->entry.monotonic = htole64(ts->monotonic);
984         o->entry.xor_hash = htole64(xor_hash);
985         o->entry.boot_id = f->header->boot_id;
986
987         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
988         if (r < 0)
989                 return r;
990
991         r = journal_file_link_entry(f, o, np);
992         if (r < 0)
993                 return r;
994
995         if (ret)
996                 *ret = o;
997
998         if (offset)
999                 *offset = np;
1000
1001         return 0;
1002 }
1003
1004 void journal_file_post_change(JournalFile *f) {
1005         assert(f);
1006
1007         /* inotify() does not receive IN_MODIFY events from file
1008          * accesses done via mmap(). After each access we hence
1009          * trigger IN_MODIFY by truncating the journal file to its
1010          * current size which triggers IN_MODIFY. */
1011
1012         __sync_synchronize();
1013
1014         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1015                 log_error("Failed to to truncate file to its own size: %m");
1016 }
1017
1018 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1019         unsigned i;
1020         EntryItem *items;
1021         int r;
1022         uint64_t xor_hash = 0;
1023         struct dual_timestamp _ts;
1024
1025         assert(f);
1026         assert(iovec || n_iovec == 0);
1027
1028         if (!f->writable)
1029                 return -EPERM;
1030
1031         if (!ts) {
1032                 dual_timestamp_get(&_ts);
1033                 ts = &_ts;
1034         }
1035
1036         if (f->tail_entry_monotonic_valid &&
1037             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1038                 return -EINVAL;
1039
1040         r = journal_file_maybe_append_tag(f, ts->realtime);
1041         if (r < 0)
1042                 return r;
1043
1044         /* alloca() can't take 0, hence let's allocate at least one */
1045         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1046
1047         for (i = 0; i < n_iovec; i++) {
1048                 uint64_t p;
1049                 Object *o;
1050
1051                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1052                 if (r < 0)
1053                         return r;
1054
1055                 xor_hash ^= le64toh(o->data.hash);
1056                 items[i].object_offset = htole64(p);
1057                 items[i].hash = o->data.hash;
1058         }
1059
1060         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1061
1062         journal_file_post_change(f);
1063
1064         return r;
1065 }
1066
1067 static int generic_array_get(JournalFile *f,
1068                              uint64_t first,
1069                              uint64_t i,
1070                              Object **ret, uint64_t *offset) {
1071
1072         Object *o;
1073         uint64_t p = 0, a;
1074         int r;
1075
1076         assert(f);
1077
1078         a = first;
1079         while (a > 0) {
1080                 uint64_t n;
1081
1082                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1083                 if (r < 0)
1084                         return r;
1085
1086                 n = journal_file_entry_array_n_items(o);
1087                 if (i < n) {
1088                         p = le64toh(o->entry_array.items[i]);
1089                         break;
1090                 }
1091
1092                 i -= n;
1093                 a = le64toh(o->entry_array.next_entry_array_offset);
1094         }
1095
1096         if (a <= 0 || p <= 0)
1097                 return 0;
1098
1099         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1100         if (r < 0)
1101                 return r;
1102
1103         if (ret)
1104                 *ret = o;
1105
1106         if (offset)
1107                 *offset = p;
1108
1109         return 1;
1110 }
1111
1112 static int generic_array_get_plus_one(JournalFile *f,
1113                                       uint64_t extra,
1114                                       uint64_t first,
1115                                       uint64_t i,
1116                                       Object **ret, uint64_t *offset) {
1117
1118         Object *o;
1119
1120         assert(f);
1121
1122         if (i == 0) {
1123                 int r;
1124
1125                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1126                 if (r < 0)
1127                         return r;
1128
1129                 if (ret)
1130                         *ret = o;
1131
1132                 if (offset)
1133                         *offset = extra;
1134
1135                 return 1;
1136         }
1137
1138         return generic_array_get(f, first, i-1, ret, offset);
1139 }
1140
1141 enum {
1142         TEST_FOUND,
1143         TEST_LEFT,
1144         TEST_RIGHT
1145 };
1146
1147 static int generic_array_bisect(JournalFile *f,
1148                                 uint64_t first,
1149                                 uint64_t n,
1150                                 uint64_t needle,
1151                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1152                                 direction_t direction,
1153                                 Object **ret,
1154                                 uint64_t *offset,
1155                                 uint64_t *idx) {
1156
1157         uint64_t a, p, t = 0, i = 0, last_p = 0;
1158         bool subtract_one = false;
1159         Object *o, *array = NULL;
1160         int r;
1161
1162         assert(f);
1163         assert(test_object);
1164
1165         a = first;
1166         while (a > 0) {
1167                 uint64_t left, right, k, lp;
1168
1169                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1170                 if (r < 0)
1171                         return r;
1172
1173                 k = journal_file_entry_array_n_items(array);
1174                 right = MIN(k, n);
1175                 if (right <= 0)
1176                         return 0;
1177
1178                 i = right - 1;
1179                 lp = p = le64toh(array->entry_array.items[i]);
1180                 if (p <= 0)
1181                         return -EBADMSG;
1182
1183                 r = test_object(f, p, needle);
1184                 if (r < 0)
1185                         return r;
1186
1187                 if (r == TEST_FOUND)
1188                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1189
1190                 if (r == TEST_RIGHT) {
1191                         left = 0;
1192                         right -= 1;
1193                         for (;;) {
1194                                 if (left == right) {
1195                                         if (direction == DIRECTION_UP)
1196                                                 subtract_one = true;
1197
1198                                         i = left;
1199                                         goto found;
1200                                 }
1201
1202                                 assert(left < right);
1203
1204                                 i = (left + right) / 2;
1205                                 p = le64toh(array->entry_array.items[i]);
1206                                 if (p <= 0)
1207                                         return -EBADMSG;
1208
1209                                 r = test_object(f, p, needle);
1210                                 if (r < 0)
1211                                         return r;
1212
1213                                 if (r == TEST_FOUND)
1214                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1215
1216                                 if (r == TEST_RIGHT)
1217                                         right = i;
1218                                 else
1219                                         left = i + 1;
1220                         }
1221                 }
1222
1223                 if (k > n) {
1224                         if (direction == DIRECTION_UP) {
1225                                 i = n;
1226                                 subtract_one = true;
1227                                 goto found;
1228                         }
1229
1230                         return 0;
1231                 }
1232
1233                 last_p = lp;
1234
1235                 n -= k;
1236                 t += k;
1237                 a = le64toh(array->entry_array.next_entry_array_offset);
1238         }
1239
1240         return 0;
1241
1242 found:
1243         if (subtract_one && t == 0 && i == 0)
1244                 return 0;
1245
1246         if (subtract_one && i == 0)
1247                 p = last_p;
1248         else if (subtract_one)
1249                 p = le64toh(array->entry_array.items[i-1]);
1250         else
1251                 p = le64toh(array->entry_array.items[i]);
1252
1253         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1254         if (r < 0)
1255                 return r;
1256
1257         if (ret)
1258                 *ret = o;
1259
1260         if (offset)
1261                 *offset = p;
1262
1263         if (idx)
1264                 *idx = t + i + (subtract_one ? -1 : 0);
1265
1266         return 1;
1267 }
1268
1269 static int generic_array_bisect_plus_one(JournalFile *f,
1270                                          uint64_t extra,
1271                                          uint64_t first,
1272                                          uint64_t n,
1273                                          uint64_t needle,
1274                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1275                                          direction_t direction,
1276                                          Object **ret,
1277                                          uint64_t *offset,
1278                                          uint64_t *idx) {
1279
1280         int r;
1281         bool step_back = false;
1282         Object *o;
1283
1284         assert(f);
1285         assert(test_object);
1286
1287         if (n <= 0)
1288                 return 0;
1289
1290         /* This bisects the array in object 'first', but first checks
1291          * an extra  */
1292         r = test_object(f, extra, needle);
1293         if (r < 0)
1294                 return r;
1295
1296         if (r == TEST_FOUND)
1297                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1298
1299         /* if we are looking with DIRECTION_UP then we need to first
1300            see if in the actual array there is a matching entry, and
1301            return the last one of that. But if there isn't any we need
1302            to return this one. Hence remember this, and return it
1303            below. */
1304         if (r == TEST_LEFT)
1305                 step_back = direction == DIRECTION_UP;
1306
1307         if (r == TEST_RIGHT) {
1308                 if (direction == DIRECTION_DOWN)
1309                         goto found;
1310                 else
1311                         return 0;
1312         }
1313
1314         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1315
1316         if (r == 0 && step_back)
1317                 goto found;
1318
1319         if (r > 0 && idx)
1320                 (*idx) ++;
1321
1322         return r;
1323
1324 found:
1325         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1326         if (r < 0)
1327                 return r;
1328
1329         if (ret)
1330                 *ret = o;
1331
1332         if (offset)
1333                 *offset = extra;
1334
1335         if (idx)
1336                 *idx = 0;
1337
1338         return 1;
1339 }
1340
1341 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1342         assert(f);
1343         assert(p > 0);
1344
1345         if (p == needle)
1346                 return TEST_FOUND;
1347         else if (p < needle)
1348                 return TEST_LEFT;
1349         else
1350                 return TEST_RIGHT;
1351 }
1352
1353 int journal_file_move_to_entry_by_offset(
1354                 JournalFile *f,
1355                 uint64_t p,
1356                 direction_t direction,
1357                 Object **ret,
1358                 uint64_t *offset) {
1359
1360         return generic_array_bisect(f,
1361                                     le64toh(f->header->entry_array_offset),
1362                                     le64toh(f->header->n_entries),
1363                                     p,
1364                                     test_object_offset,
1365                                     direction,
1366                                     ret, offset, NULL);
1367 }
1368
1369
1370 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1371         Object *o;
1372         int r;
1373
1374         assert(f);
1375         assert(p > 0);
1376
1377         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1378         if (r < 0)
1379                 return r;
1380
1381         if (le64toh(o->entry.seqnum) == needle)
1382                 return TEST_FOUND;
1383         else if (le64toh(o->entry.seqnum) < needle)
1384                 return TEST_LEFT;
1385         else
1386                 return TEST_RIGHT;
1387 }
1388
1389 int journal_file_move_to_entry_by_seqnum(
1390                 JournalFile *f,
1391                 uint64_t seqnum,
1392                 direction_t direction,
1393                 Object **ret,
1394                 uint64_t *offset) {
1395
1396         return generic_array_bisect(f,
1397                                     le64toh(f->header->entry_array_offset),
1398                                     le64toh(f->header->n_entries),
1399                                     seqnum,
1400                                     test_object_seqnum,
1401                                     direction,
1402                                     ret, offset, NULL);
1403 }
1404
1405 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1406         Object *o;
1407         int r;
1408
1409         assert(f);
1410         assert(p > 0);
1411
1412         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1413         if (r < 0)
1414                 return r;
1415
1416         if (le64toh(o->entry.realtime) == needle)
1417                 return TEST_FOUND;
1418         else if (le64toh(o->entry.realtime) < needle)
1419                 return TEST_LEFT;
1420         else
1421                 return TEST_RIGHT;
1422 }
1423
1424 int journal_file_move_to_entry_by_realtime(
1425                 JournalFile *f,
1426                 uint64_t realtime,
1427                 direction_t direction,
1428                 Object **ret,
1429                 uint64_t *offset) {
1430
1431         return generic_array_bisect(f,
1432                                     le64toh(f->header->entry_array_offset),
1433                                     le64toh(f->header->n_entries),
1434                                     realtime,
1435                                     test_object_realtime,
1436                                     direction,
1437                                     ret, offset, NULL);
1438 }
1439
1440 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1441         Object *o;
1442         int r;
1443
1444         assert(f);
1445         assert(p > 0);
1446
1447         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1448         if (r < 0)
1449                 return r;
1450
1451         if (le64toh(o->entry.monotonic) == needle)
1452                 return TEST_FOUND;
1453         else if (le64toh(o->entry.monotonic) < needle)
1454                 return TEST_LEFT;
1455         else
1456                 return TEST_RIGHT;
1457 }
1458
1459 int journal_file_move_to_entry_by_monotonic(
1460                 JournalFile *f,
1461                 sd_id128_t boot_id,
1462                 uint64_t monotonic,
1463                 direction_t direction,
1464                 Object **ret,
1465                 uint64_t *offset) {
1466
1467         char t[9+32+1] = "_BOOT_ID=";
1468         Object *o;
1469         int r;
1470
1471         assert(f);
1472
1473         sd_id128_to_string(boot_id, t + 9);
1474         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1475         if (r < 0)
1476                 return r;
1477         if (r == 0)
1478                 return -ENOENT;
1479
1480         return generic_array_bisect_plus_one(f,
1481                                              le64toh(o->data.entry_offset),
1482                                              le64toh(o->data.entry_array_offset),
1483                                              le64toh(o->data.n_entries),
1484                                              monotonic,
1485                                              test_object_monotonic,
1486                                              direction,
1487                                              ret, offset, NULL);
1488 }
1489
1490 int journal_file_next_entry(
1491                 JournalFile *f,
1492                 Object *o, uint64_t p,
1493                 direction_t direction,
1494                 Object **ret, uint64_t *offset) {
1495
1496         uint64_t i, n;
1497         int r;
1498
1499         assert(f);
1500         assert(p > 0 || !o);
1501
1502         n = le64toh(f->header->n_entries);
1503         if (n <= 0)
1504                 return 0;
1505
1506         if (!o)
1507                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1508         else {
1509                 if (o->object.type != OBJECT_ENTRY)
1510                         return -EINVAL;
1511
1512                 r = generic_array_bisect(f,
1513                                          le64toh(f->header->entry_array_offset),
1514                                          le64toh(f->header->n_entries),
1515                                          p,
1516                                          test_object_offset,
1517                                          DIRECTION_DOWN,
1518                                          NULL, NULL,
1519                                          &i);
1520                 if (r <= 0)
1521                         return r;
1522
1523                 if (direction == DIRECTION_DOWN) {
1524                         if (i >= n - 1)
1525                                 return 0;
1526
1527                         i++;
1528                 } else {
1529                         if (i <= 0)
1530                                 return 0;
1531
1532                         i--;
1533                 }
1534         }
1535
1536         /* And jump to it */
1537         return generic_array_get(f,
1538                                  le64toh(f->header->entry_array_offset),
1539                                  i,
1540                                  ret, offset);
1541 }
1542
1543 int journal_file_skip_entry(
1544                 JournalFile *f,
1545                 Object *o, uint64_t p,
1546                 int64_t skip,
1547                 Object **ret, uint64_t *offset) {
1548
1549         uint64_t i, n;
1550         int r;
1551
1552         assert(f);
1553         assert(o);
1554         assert(p > 0);
1555
1556         if (o->object.type != OBJECT_ENTRY)
1557                 return -EINVAL;
1558
1559         r = generic_array_bisect(f,
1560                                  le64toh(f->header->entry_array_offset),
1561                                  le64toh(f->header->n_entries),
1562                                  p,
1563                                  test_object_offset,
1564                                  DIRECTION_DOWN,
1565                                  NULL, NULL,
1566                                  &i);
1567         if (r <= 0)
1568                 return r;
1569
1570         /* Calculate new index */
1571         if (skip < 0) {
1572                 if ((uint64_t) -skip >= i)
1573                         i = 0;
1574                 else
1575                         i = i - (uint64_t) -skip;
1576         } else
1577                 i  += (uint64_t) skip;
1578
1579         n = le64toh(f->header->n_entries);
1580         if (n <= 0)
1581                 return -EBADMSG;
1582
1583         if (i >= n)
1584                 i = n-1;
1585
1586         return generic_array_get(f,
1587                                  le64toh(f->header->entry_array_offset),
1588                                  i,
1589                                  ret, offset);
1590 }
1591
1592 int journal_file_next_entry_for_data(
1593                 JournalFile *f,
1594                 Object *o, uint64_t p,
1595                 uint64_t data_offset,
1596                 direction_t direction,
1597                 Object **ret, uint64_t *offset) {
1598
1599         uint64_t n, i;
1600         int r;
1601         Object *d;
1602
1603         assert(f);
1604         assert(p > 0 || !o);
1605
1606         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1607         if (r < 0)
1608                 return r;
1609
1610         n = le64toh(d->data.n_entries);
1611         if (n <= 0)
1612                 return n;
1613
1614         if (!o)
1615                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1616         else {
1617                 if (o->object.type != OBJECT_ENTRY)
1618                         return -EINVAL;
1619
1620                 r = generic_array_bisect_plus_one(f,
1621                                                   le64toh(d->data.entry_offset),
1622                                                   le64toh(d->data.entry_array_offset),
1623                                                   le64toh(d->data.n_entries),
1624                                                   p,
1625                                                   test_object_offset,
1626                                                   DIRECTION_DOWN,
1627                                                   NULL, NULL,
1628                                                   &i);
1629
1630                 if (r <= 0)
1631                         return r;
1632
1633                 if (direction == DIRECTION_DOWN) {
1634                         if (i >= n - 1)
1635                                 return 0;
1636
1637                         i++;
1638                 } else {
1639                         if (i <= 0)
1640                                 return 0;
1641
1642                         i--;
1643                 }
1644
1645         }
1646
1647         return generic_array_get_plus_one(f,
1648                                           le64toh(d->data.entry_offset),
1649                                           le64toh(d->data.entry_array_offset),
1650                                           i,
1651                                           ret, offset);
1652 }
1653
1654 int journal_file_move_to_entry_by_offset_for_data(
1655                 JournalFile *f,
1656                 uint64_t data_offset,
1657                 uint64_t p,
1658                 direction_t direction,
1659                 Object **ret, uint64_t *offset) {
1660
1661         int r;
1662         Object *d;
1663
1664         assert(f);
1665
1666         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1667         if (r < 0)
1668                 return r;
1669
1670         return generic_array_bisect_plus_one(f,
1671                                              le64toh(d->data.entry_offset),
1672                                              le64toh(d->data.entry_array_offset),
1673                                              le64toh(d->data.n_entries),
1674                                              p,
1675                                              test_object_offset,
1676                                              direction,
1677                                              ret, offset, NULL);
1678 }
1679
1680 int journal_file_move_to_entry_by_monotonic_for_data(
1681                 JournalFile *f,
1682                 uint64_t data_offset,
1683                 sd_id128_t boot_id,
1684                 uint64_t monotonic,
1685                 direction_t direction,
1686                 Object **ret, uint64_t *offset) {
1687
1688         char t[9+32+1] = "_BOOT_ID=";
1689         Object *o, *d;
1690         int r;
1691         uint64_t b, z;
1692
1693         assert(f);
1694
1695         /* First, seek by time */
1696         sd_id128_to_string(boot_id, t + 9);
1697         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1698         if (r < 0)
1699                 return r;
1700         if (r == 0)
1701                 return -ENOENT;
1702
1703         r = generic_array_bisect_plus_one(f,
1704                                           le64toh(o->data.entry_offset),
1705                                           le64toh(o->data.entry_array_offset),
1706                                           le64toh(o->data.n_entries),
1707                                           monotonic,
1708                                           test_object_monotonic,
1709                                           direction,
1710                                           NULL, &z, NULL);
1711         if (r <= 0)
1712                 return r;
1713
1714         /* And now, continue seeking until we find an entry that
1715          * exists in both bisection arrays */
1716
1717         for (;;) {
1718                 Object *qo;
1719                 uint64_t p, q;
1720
1721                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1722                 if (r < 0)
1723                         return r;
1724
1725                 r = generic_array_bisect_plus_one(f,
1726                                                   le64toh(d->data.entry_offset),
1727                                                   le64toh(d->data.entry_array_offset),
1728                                                   le64toh(d->data.n_entries),
1729                                                   z,
1730                                                   test_object_offset,
1731                                                   direction,
1732                                                   NULL, &p, NULL);
1733                 if (r <= 0)
1734                         return r;
1735
1736                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1737                 if (r < 0)
1738                         return r;
1739
1740                 r = generic_array_bisect_plus_one(f,
1741                                                   le64toh(o->data.entry_offset),
1742                                                   le64toh(o->data.entry_array_offset),
1743                                                   le64toh(o->data.n_entries),
1744                                                   p,
1745                                                   test_object_offset,
1746                                                   direction,
1747                                                   &qo, &q, NULL);
1748
1749                 if (r <= 0)
1750                         return r;
1751
1752                 if (p == q) {
1753                         if (ret)
1754                                 *ret = qo;
1755                         if (offset)
1756                                 *offset = q;
1757
1758                         return 1;
1759                 }
1760
1761                 z = q;
1762         }
1763
1764         return 0;
1765 }
1766
1767 int journal_file_move_to_entry_by_seqnum_for_data(
1768                 JournalFile *f,
1769                 uint64_t data_offset,
1770                 uint64_t seqnum,
1771                 direction_t direction,
1772                 Object **ret, uint64_t *offset) {
1773
1774         Object *d;
1775         int r;
1776
1777         assert(f);
1778
1779         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1780         if (r < 0)
1781                 return r;
1782
1783         return generic_array_bisect_plus_one(f,
1784                                              le64toh(d->data.entry_offset),
1785                                              le64toh(d->data.entry_array_offset),
1786                                              le64toh(d->data.n_entries),
1787                                              seqnum,
1788                                              test_object_seqnum,
1789                                              direction,
1790                                              ret, offset, NULL);
1791 }
1792
1793 int journal_file_move_to_entry_by_realtime_for_data(
1794                 JournalFile *f,
1795                 uint64_t data_offset,
1796                 uint64_t realtime,
1797                 direction_t direction,
1798                 Object **ret, uint64_t *offset) {
1799
1800         Object *d;
1801         int r;
1802
1803         assert(f);
1804
1805         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1806         if (r < 0)
1807                 return r;
1808
1809         return generic_array_bisect_plus_one(f,
1810                                              le64toh(d->data.entry_offset),
1811                                              le64toh(d->data.entry_array_offset),
1812                                              le64toh(d->data.n_entries),
1813                                              realtime,
1814                                              test_object_realtime,
1815                                              direction,
1816                                              ret, offset, NULL);
1817 }
1818
1819 void journal_file_dump(JournalFile *f) {
1820         Object *o;
1821         int r;
1822         uint64_t p;
1823
1824         assert(f);
1825
1826         journal_file_print_header(f);
1827
1828         p = le64toh(f->header->header_size);
1829         while (p != 0) {
1830                 r = journal_file_move_to_object(f, -1, p, &o);
1831                 if (r < 0)
1832                         goto fail;
1833
1834                 switch (o->object.type) {
1835
1836                 case OBJECT_UNUSED:
1837                         printf("Type: OBJECT_UNUSED\n");
1838                         break;
1839
1840                 case OBJECT_DATA:
1841                         printf("Type: OBJECT_DATA\n");
1842                         break;
1843
1844                 case OBJECT_ENTRY:
1845                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1846                                (unsigned long long) le64toh(o->entry.seqnum),
1847                                (unsigned long long) le64toh(o->entry.monotonic),
1848                                (unsigned long long) le64toh(o->entry.realtime));
1849                         break;
1850
1851                 case OBJECT_FIELD_HASH_TABLE:
1852                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1853                         break;
1854
1855                 case OBJECT_DATA_HASH_TABLE:
1856                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1857                         break;
1858
1859                 case OBJECT_ENTRY_ARRAY:
1860                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1861                         break;
1862
1863                 case OBJECT_TAG:
1864                         printf("Type: OBJECT_TAG %llu\n",
1865                                (unsigned long long) le64toh(o->tag.seqnum));
1866                         break;
1867                 }
1868
1869                 if (o->object.flags & OBJECT_COMPRESSED)
1870                         printf("Flags: COMPRESSED\n");
1871
1872                 if (p == le64toh(f->header->tail_object_offset))
1873                         p = 0;
1874                 else
1875                         p = p + ALIGN64(le64toh(o->object.size));
1876         }
1877
1878         return;
1879 fail:
1880         log_error("File corrupt");
1881 }
1882
1883 void journal_file_print_header(JournalFile *f) {
1884         char a[33], b[33], c[33];
1885         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1886
1887         assert(f);
1888
1889         printf("File Path: %s\n"
1890                "File ID: %s\n"
1891                "Machine ID: %s\n"
1892                "Boot ID: %s\n"
1893                "Sequential Number ID: %s\n"
1894                "State: %s\n"
1895                "Compatible Flags:%s%s\n"
1896                "Incompatible Flags:%s%s\n"
1897                "Header size: %llu\n"
1898                "Arena size: %llu\n"
1899                "Data Hash Table Size: %llu\n"
1900                "Field Hash Table Size: %llu\n"
1901                "Objects: %llu\n"
1902                "Entry Objects: %llu\n"
1903                "Rotate Suggested: %s\n"
1904                "Head Sequential Number: %llu\n"
1905                "Tail Sequential Number: %llu\n"
1906                "Head Realtime Timestamp: %s\n"
1907                "Tail Realtime Timestamp: %s\n",
1908                f->path,
1909                sd_id128_to_string(f->header->file_id, a),
1910                sd_id128_to_string(f->header->machine_id, b),
1911                sd_id128_to_string(f->header->boot_id, c),
1912                sd_id128_to_string(f->header->seqnum_id, c),
1913                f->header->state == STATE_OFFLINE ? "offline" :
1914                f->header->state == STATE_ONLINE ? "online" :
1915                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1916                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
1917                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
1918                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1919                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1920                (unsigned long long) le64toh(f->header->header_size),
1921                (unsigned long long) le64toh(f->header->arena_size),
1922                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1923                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1924                (unsigned long long) le64toh(f->header->n_objects),
1925                (unsigned long long) le64toh(f->header->n_entries),
1926                yes_no(journal_file_rotate_suggested(f)),
1927                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1928                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1929                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1930                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1931
1932         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1933                 printf("Data Objects: %llu\n"
1934                        "Data Hash Table Fill: %.1f%%\n",
1935                        (unsigned long long) le64toh(f->header->n_data),
1936                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1937
1938         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1939                 printf("Field Objects: %llu\n"
1940                        "Field Hash Table Fill: %.1f%%\n",
1941                        (unsigned long long) le64toh(f->header->n_fields),
1942                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1943 }
1944
1945 int journal_file_open(
1946                 const char *fname,
1947                 int flags,
1948                 mode_t mode,
1949                 bool compress,
1950                 bool authenticate,
1951                 JournalMetrics *metrics,
1952                 MMapCache *mmap_cache,
1953                 JournalFile *template,
1954                 JournalFile **ret) {
1955
1956         JournalFile *f;
1957         int r;
1958         bool newly_created = false;
1959
1960         assert(fname);
1961
1962         if ((flags & O_ACCMODE) != O_RDONLY &&
1963             (flags & O_ACCMODE) != O_RDWR)
1964                 return -EINVAL;
1965
1966         if (!endswith(fname, ".journal"))
1967                 return -EINVAL;
1968
1969         f = new0(JournalFile, 1);
1970         if (!f)
1971                 return -ENOMEM;
1972
1973         f->fd = -1;
1974         f->mode = mode;
1975
1976         f->flags = flags;
1977         f->prot = prot_from_flags(flags);
1978         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1979         f->compress = compress;
1980         f->authenticate = authenticate;
1981
1982         if (mmap_cache)
1983                 f->mmap = mmap_cache_ref(mmap_cache);
1984         else {
1985                 /* One context for each type, plus the zeroth catchall
1986                  * context. One fd for the file plus one for each type
1987                  * (which we need during verification */
1988                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1989                 if (!f->mmap) {
1990                         r = -ENOMEM;
1991                         goto fail;
1992                 }
1993         }
1994
1995         f->path = strdup(fname);
1996         if (!f->path) {
1997                 r = -ENOMEM;
1998                 goto fail;
1999         }
2000
2001         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2002         if (f->fd < 0) {
2003                 r = -errno;
2004                 goto fail;
2005         }
2006
2007         if (fstat(f->fd, &f->last_stat) < 0) {
2008                 r = -errno;
2009                 goto fail;
2010         }
2011
2012         if (f->last_stat.st_size == 0 && f->writable) {
2013                 newly_created = true;
2014
2015                 /* Try to load the FSPRG state, and if we can't, then
2016                  * just don't do authentication */
2017                 r = journal_file_load_fsprg(f);
2018                 if (r < 0)
2019                         f->authenticate = false;
2020
2021                 r = journal_file_init_header(f, template);
2022                 if (r < 0)
2023                         goto fail;
2024
2025                 if (fstat(f->fd, &f->last_stat) < 0) {
2026                         r = -errno;
2027                         goto fail;
2028                 }
2029         }
2030
2031         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2032                 r = -EIO;
2033                 goto fail;
2034         }
2035
2036         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2037         if (f->header == MAP_FAILED) {
2038                 f->header = NULL;
2039                 r = -errno;
2040                 goto fail;
2041         }
2042
2043         if (!newly_created) {
2044                 r = journal_file_verify_header(f);
2045                 if (r < 0)
2046                         goto fail;
2047         }
2048
2049         if (!newly_created && f->writable) {
2050                 r = journal_file_load_fsprg(f);
2051                 if (r < 0)
2052                         goto fail;
2053         }
2054
2055         if (f->writable) {
2056                 if (metrics) {
2057                         journal_default_metrics(metrics, f->fd);
2058                         f->metrics = *metrics;
2059                 } else if (template)
2060                         f->metrics = template->metrics;
2061
2062                 r = journal_file_refresh_header(f);
2063                 if (r < 0)
2064                         goto fail;
2065
2066                 r = journal_file_setup_hmac(f);
2067                 if (r < 0)
2068                         goto fail;
2069         }
2070
2071         if (newly_created) {
2072                 r = journal_file_setup_field_hash_table(f);
2073                 if (r < 0)
2074                         goto fail;
2075
2076                 r = journal_file_setup_data_hash_table(f);
2077                 if (r < 0)
2078                         goto fail;
2079
2080                 r = journal_file_append_first_tag(f);
2081                 if (r < 0)
2082                         goto fail;
2083         }
2084
2085         r = journal_file_map_field_hash_table(f);
2086         if (r < 0)
2087                 goto fail;
2088
2089         r = journal_file_map_data_hash_table(f);
2090         if (r < 0)
2091                 goto fail;
2092
2093         if (ret)
2094                 *ret = f;
2095
2096         return 0;
2097
2098 fail:
2099         journal_file_close(f);
2100
2101         return r;
2102 }
2103
2104 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2105         char *p;
2106         size_t l;
2107         JournalFile *old_file, *new_file = NULL;
2108         int r;
2109
2110         assert(f);
2111         assert(*f);
2112
2113         old_file = *f;
2114
2115         if (!old_file->writable)
2116                 return -EINVAL;
2117
2118         if (!endswith(old_file->path, ".journal"))
2119                 return -EINVAL;
2120
2121         l = strlen(old_file->path);
2122
2123         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2124         if (!p)
2125                 return -ENOMEM;
2126
2127         memcpy(p, old_file->path, l - 8);
2128         p[l-8] = '@';
2129         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2130         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2131                  "-%016llx-%016llx.journal",
2132                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2133                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2134
2135         r = rename(old_file->path, p);
2136         free(p);
2137
2138         if (r < 0)
2139                 return -errno;
2140
2141         old_file->header->state = STATE_ARCHIVED;
2142
2143         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
2144         journal_file_close(old_file);
2145
2146         *f = new_file;
2147         return r;
2148 }
2149
2150 int journal_file_open_reliably(
2151                 const char *fname,
2152                 int flags,
2153                 mode_t mode,
2154                 bool compress,
2155                 bool authenticate,
2156                 JournalMetrics *metrics,
2157                 MMapCache *mmap,
2158                 JournalFile *template,
2159                 JournalFile **ret) {
2160
2161         int r;
2162         size_t l;
2163         char *p;
2164
2165         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2166         if (r != -EBADMSG && /* corrupted */
2167             r != -ENODATA && /* truncated */
2168             r != -EHOSTDOWN && /* other machine */
2169             r != -EPROTONOSUPPORT && /* incompatible feature */
2170             r != -EBUSY && /* unclean shutdown */
2171             r != -ESHUTDOWN /* already archived */)
2172                 return r;
2173
2174         if ((flags & O_ACCMODE) == O_RDONLY)
2175                 return r;
2176
2177         if (!(flags & O_CREAT))
2178                 return r;
2179
2180         if (!endswith(fname, ".journal"))
2181                 return r;
2182
2183         /* The file is corrupted. Rotate it away and try it again (but only once) */
2184
2185         l = strlen(fname);
2186         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2187                      (int) (l-8), fname,
2188                      (unsigned long long) now(CLOCK_REALTIME),
2189                      random_ull()) < 0)
2190                 return -ENOMEM;
2191
2192         r = rename(fname, p);
2193         free(p);
2194         if (r < 0)
2195                 return -errno;
2196
2197         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2198
2199         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2200 }
2201
2202
2203 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2204         uint64_t i, n;
2205         uint64_t q, xor_hash = 0;
2206         int r;
2207         EntryItem *items;
2208         dual_timestamp ts;
2209
2210         assert(from);
2211         assert(to);
2212         assert(o);
2213         assert(p);
2214
2215         if (!to->writable)
2216                 return -EPERM;
2217
2218         ts.monotonic = le64toh(o->entry.monotonic);
2219         ts.realtime = le64toh(o->entry.realtime);
2220
2221         if (to->tail_entry_monotonic_valid &&
2222             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2223                 return -EINVAL;
2224
2225         n = journal_file_entry_n_items(o);
2226         items = alloca(sizeof(EntryItem) * n);
2227
2228         for (i = 0; i < n; i++) {
2229                 uint64_t l, h;
2230                 le64_t le_hash;
2231                 size_t t;
2232                 void *data;
2233                 Object *u;
2234
2235                 q = le64toh(o->entry.items[i].object_offset);
2236                 le_hash = o->entry.items[i].hash;
2237
2238                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2239                 if (r < 0)
2240                         return r;
2241
2242                 if (le_hash != o->data.hash)
2243                         return -EBADMSG;
2244
2245                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2246                 t = (size_t) l;
2247
2248                 /* We hit the limit on 32bit machines */
2249                 if ((uint64_t) t != l)
2250                         return -E2BIG;
2251
2252                 if (o->object.flags & OBJECT_COMPRESSED) {
2253 #ifdef HAVE_XZ
2254                         uint64_t rsize;
2255
2256                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2257                                 return -EBADMSG;
2258
2259                         data = from->compress_buffer;
2260                         l = rsize;
2261 #else
2262                         return -EPROTONOSUPPORT;
2263 #endif
2264                 } else
2265                         data = o->data.payload;
2266
2267                 r = journal_file_append_data(to, data, l, &u, &h);
2268                 if (r < 0)
2269                         return r;
2270
2271                 xor_hash ^= le64toh(u->data.hash);
2272                 items[i].object_offset = htole64(h);
2273                 items[i].hash = u->data.hash;
2274
2275                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2276                 if (r < 0)
2277                         return r;
2278         }
2279
2280         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2281 }
2282
2283 void journal_default_metrics(JournalMetrics *m, int fd) {
2284         uint64_t fs_size = 0;
2285         struct statvfs ss;
2286         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2287
2288         assert(m);
2289         assert(fd >= 0);
2290
2291         if (fstatvfs(fd, &ss) >= 0)
2292                 fs_size = ss.f_frsize * ss.f_blocks;
2293
2294         if (m->max_use == (uint64_t) -1) {
2295
2296                 if (fs_size > 0) {
2297                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2298
2299                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2300                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2301
2302                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2303                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2304                 } else
2305                         m->max_use = DEFAULT_MAX_USE_LOWER;
2306         } else {
2307                 m->max_use = PAGE_ALIGN(m->max_use);
2308
2309                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2310                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2311         }
2312
2313         if (m->max_size == (uint64_t) -1) {
2314                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2315
2316                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2317                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2318         } else
2319                 m->max_size = PAGE_ALIGN(m->max_size);
2320
2321         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2322                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2323
2324         if (m->max_size*2 > m->max_use)
2325                 m->max_use = m->max_size*2;
2326
2327         if (m->min_size == (uint64_t) -1)
2328                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2329         else {
2330                 m->min_size = PAGE_ALIGN(m->min_size);
2331
2332                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2333                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2334
2335                 if (m->min_size > m->max_size)
2336                         m->max_size = m->min_size;
2337         }
2338
2339         if (m->keep_free == (uint64_t) -1) {
2340
2341                 if (fs_size > 0) {
2342                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2343
2344                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2345                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2346
2347                 } else
2348                         m->keep_free = DEFAULT_KEEP_FREE;
2349         }
2350
2351         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2352                  format_bytes(a, sizeof(a), m->max_use),
2353                  format_bytes(b, sizeof(b), m->max_size),
2354                  format_bytes(c, sizeof(c), m->min_size),
2355                  format_bytes(d, sizeof(d), m->keep_free));
2356 }
2357
2358 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2359         assert(f);
2360         assert(from || to);
2361
2362         if (from) {
2363                 if (f->header->head_entry_realtime == 0)
2364                         return -ENOENT;
2365
2366                 *from = le64toh(f->header->head_entry_realtime);
2367         }
2368
2369         if (to) {
2370                 if (f->header->tail_entry_realtime == 0)
2371                         return -ENOENT;
2372
2373                 *to = le64toh(f->header->tail_entry_realtime);
2374         }
2375
2376         return 1;
2377 }
2378
2379 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2380         char t[9+32+1] = "_BOOT_ID=";
2381         Object *o;
2382         uint64_t p;
2383         int r;
2384
2385         assert(f);
2386         assert(from || to);
2387
2388         sd_id128_to_string(boot_id, t + 9);
2389
2390         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2391         if (r <= 0)
2392                 return r;
2393
2394         if (le64toh(o->data.n_entries) <= 0)
2395                 return 0;
2396
2397         if (from) {
2398                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2399                 if (r < 0)
2400                         return r;
2401
2402                 *from = le64toh(o->entry.monotonic);
2403         }
2404
2405         if (to) {
2406                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2407                 if (r < 0)
2408                         return r;
2409
2410                 r = generic_array_get_plus_one(f,
2411                                                le64toh(o->data.entry_offset),
2412                                                le64toh(o->data.entry_array_offset),
2413                                                le64toh(o->data.n_entries)-1,
2414                                                &o, NULL);
2415                 if (r <= 0)
2416                         return r;
2417
2418                 *to = le64toh(o->entry.monotonic);
2419         }
2420
2421         return 1;
2422 }
2423
2424 bool journal_file_rotate_suggested(JournalFile *f) {
2425         assert(f);
2426
2427         /* If we gained new header fields we gained new features,
2428          * hence suggest a rotation */
2429         if (le64toh(f->header->header_size) < sizeof(Header)) {
2430                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2431                 return true;
2432         }
2433
2434         /* Let's check if the hash tables grew over a certain fill
2435          * level (75%, borrowing this value from Java's hash table
2436          * implementation), and if so suggest a rotation. To calculate
2437          * the fill level we need the n_data field, which only exists
2438          * in newer versions. */
2439
2440         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2441                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2442                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2443                                   f->path,
2444                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2445                                   (unsigned long long) le64toh(f->header->n_data),
2446                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2447                                   (unsigned long long) (f->last_stat.st_size),
2448                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2449                         return true;
2450                 }
2451
2452         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2453                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2454                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2455                                   f->path,
2456                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2457                                   (unsigned long long) le64toh(f->header->n_fields),
2458                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2459                         return true;
2460                 }
2461
2462         return false;
2463 }