chiark / gitweb /
journal: rework terminology
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->seal)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fss_file)
100                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101         else if (f->fsprg_state)
102                 free(f->fsprg_state);
103
104         free(f->fsprg_seed);
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         /* The first addition was n_data, so check that we are at least this large */
207         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
208                 return -EBADMSG;
209
210         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
211                 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
212                 return -EBADMSG;
213
214         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
215                 return -ENODATA;
216
217         if (f->writable) {
218                 uint8_t state;
219                 sd_id128_t machine_id;
220                 int r;
221
222                 r = sd_id128_get_machine(&machine_id);
223                 if (r < 0)
224                         return r;
225
226                 if (!sd_id128_equal(machine_id, f->header->machine_id))
227                         return -EHOSTDOWN;
228
229                 state = f->header->state;
230
231                 if (state == STATE_ONLINE) {
232                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
233                         return -EBUSY;
234                 } else if (state == STATE_ARCHIVED)
235                         return -ESHUTDOWN;
236                 else if (state != STATE_OFFLINE) {
237                         log_debug("Journal file %s has unknown state %u.", f->path, state);
238                         return -EBUSY;
239                 }
240         }
241
242         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
243         f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
244
245         return 0;
246 }
247
248 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
249         uint64_t old_size, new_size;
250         int r;
251
252         assert(f);
253
254         /* We assume that this file is not sparse, and we know that
255          * for sure, since we always call posix_fallocate()
256          * ourselves */
257
258         old_size =
259                 le64toh(f->header->header_size) +
260                 le64toh(f->header->arena_size);
261
262         new_size = PAGE_ALIGN(offset + size);
263         if (new_size < le64toh(f->header->header_size))
264                 new_size = le64toh(f->header->header_size);
265
266         if (new_size <= old_size)
267                 return 0;
268
269         if (f->metrics.max_size > 0 &&
270             new_size > f->metrics.max_size)
271                 return -E2BIG;
272
273         if (new_size > f->metrics.min_size &&
274             f->metrics.keep_free > 0) {
275                 struct statvfs svfs;
276
277                 if (fstatvfs(f->fd, &svfs) >= 0) {
278                         uint64_t available;
279
280                         available = svfs.f_bfree * svfs.f_bsize;
281
282                         if (available >= f->metrics.keep_free)
283                                 available -= f->metrics.keep_free;
284                         else
285                                 available = 0;
286
287                         if (new_size - old_size > available)
288                                 return -E2BIG;
289                 }
290         }
291
292         /* Note that the glibc fallocate() fallback is very
293            inefficient, hence we try to minimize the allocation area
294            as we can. */
295         r = posix_fallocate(f->fd, old_size, new_size - old_size);
296         if (r != 0)
297                 return -r;
298
299         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
300
301         if (fstat(f->fd, &f->last_stat) < 0)
302                 return -errno;
303
304         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
305
306         return 0;
307 }
308
309 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
310         assert(f);
311         assert(ret);
312
313         /* Avoid SIGBUS on invalid accesses */
314         if (offset + size > (uint64_t) f->last_stat.st_size) {
315                 /* Hmm, out of range? Let's refresh the fstat() data
316                  * first, before we trust that check. */
317
318                 if (fstat(f->fd, &f->last_stat) < 0 ||
319                     offset + size > (uint64_t) f->last_stat.st_size)
320                         return -EADDRNOTAVAIL;
321         }
322
323         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
324 }
325
326 static uint64_t minimum_header_size(Object *o) {
327
328         static uint64_t table[] = {
329                 [OBJECT_DATA] = sizeof(DataObject),
330                 [OBJECT_FIELD] = sizeof(FieldObject),
331                 [OBJECT_ENTRY] = sizeof(EntryObject),
332                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
333                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
334                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
335                 [OBJECT_TAG] = sizeof(TagObject),
336         };
337
338         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
339                 return sizeof(ObjectHeader);
340
341         return table[o->object.type];
342 }
343
344 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
345         int r;
346         void *t;
347         Object *o;
348         uint64_t s;
349         unsigned context;
350
351         assert(f);
352         assert(ret);
353
354         /* One context for each type, plus one catch-all for the rest */
355         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
356
357         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
358         if (r < 0)
359                 return r;
360
361         o = (Object*) t;
362         s = le64toh(o->object.size);
363
364         if (s < sizeof(ObjectHeader))
365                 return -EBADMSG;
366
367         if (o->object.type <= OBJECT_UNUSED)
368                 return -EBADMSG;
369
370         if (s < minimum_header_size(o))
371                 return -EBADMSG;
372
373         if (type >= 0 && o->object.type != type)
374                 return -EBADMSG;
375
376         if (s > sizeof(ObjectHeader)) {
377                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
378                 if (r < 0)
379                         return r;
380
381                 o = (Object*) t;
382         }
383
384         *ret = o;
385         return 0;
386 }
387
388 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
389         uint64_t r;
390
391         assert(f);
392
393         r = le64toh(f->header->tail_entry_seqnum) + 1;
394
395         if (seqnum) {
396                 /* If an external seqnum counter was passed, we update
397                  * both the local and the external one, and set it to
398                  * the maximum of both */
399
400                 if (*seqnum + 1 > r)
401                         r = *seqnum + 1;
402
403                 *seqnum = r;
404         }
405
406         f->header->tail_entry_seqnum = htole64(r);
407
408         if (f->header->head_entry_seqnum == 0)
409                 f->header->head_entry_seqnum = htole64(r);
410
411         return r;
412 }
413
414 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
415         int r;
416         uint64_t p;
417         Object *tail, *o;
418         void *t;
419
420         assert(f);
421         assert(type > 0 && type < _OBJECT_TYPE_MAX);
422         assert(size >= sizeof(ObjectHeader));
423         assert(offset);
424         assert(ret);
425
426         p = le64toh(f->header->tail_object_offset);
427         if (p == 0)
428                 p = le64toh(f->header->header_size);
429         else {
430                 r = journal_file_move_to_object(f, -1, p, &tail);
431                 if (r < 0)
432                         return r;
433
434                 p += ALIGN64(le64toh(tail->object.size));
435         }
436
437         r = journal_file_allocate(f, p, size);
438         if (r < 0)
439                 return r;
440
441         r = journal_file_move_to(f, type, p, size, &t);
442         if (r < 0)
443                 return r;
444
445         o = (Object*) t;
446
447         zero(o->object);
448         o->object.type = type;
449         o->object.size = htole64(size);
450
451         f->header->tail_object_offset = htole64(p);
452         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
453
454         *ret = o;
455         *offset = p;
456
457         return 0;
458 }
459
460 static int journal_file_setup_data_hash_table(JournalFile *f) {
461         uint64_t s, p;
462         Object *o;
463         int r;
464
465         assert(f);
466
467         /* We estimate that we need 1 hash table entry per 768 of
468            journal file and we want to make sure we never get beyond
469            75% fill level. Calculate the hash table size for the
470            maximum file size based on these metrics. */
471
472         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
473         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
474                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
475
476         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
477
478         r = journal_file_append_object(f,
479                                        OBJECT_DATA_HASH_TABLE,
480                                        offsetof(Object, hash_table.items) + s,
481                                        &o, &p);
482         if (r < 0)
483                 return r;
484
485         memset(o->hash_table.items, 0, s);
486
487         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
488         f->header->data_hash_table_size = htole64(s);
489
490         return 0;
491 }
492
493 static int journal_file_setup_field_hash_table(JournalFile *f) {
494         uint64_t s, p;
495         Object *o;
496         int r;
497
498         assert(f);
499
500         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
501         r = journal_file_append_object(f,
502                                        OBJECT_FIELD_HASH_TABLE,
503                                        offsetof(Object, hash_table.items) + s,
504                                        &o, &p);
505         if (r < 0)
506                 return r;
507
508         memset(o->hash_table.items, 0, s);
509
510         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
511         f->header->field_hash_table_size = htole64(s);
512
513         return 0;
514 }
515
516 static int journal_file_map_data_hash_table(JournalFile *f) {
517         uint64_t s, p;
518         void *t;
519         int r;
520
521         assert(f);
522
523         p = le64toh(f->header->data_hash_table_offset);
524         s = le64toh(f->header->data_hash_table_size);
525
526         r = journal_file_move_to(f,
527                                  OBJECT_DATA_HASH_TABLE,
528                                  p, s,
529                                  &t);
530         if (r < 0)
531                 return r;
532
533         f->data_hash_table = t;
534         return 0;
535 }
536
537 static int journal_file_map_field_hash_table(JournalFile *f) {
538         uint64_t s, p;
539         void *t;
540         int r;
541
542         assert(f);
543
544         p = le64toh(f->header->field_hash_table_offset);
545         s = le64toh(f->header->field_hash_table_size);
546
547         r = journal_file_move_to(f,
548                                  OBJECT_FIELD_HASH_TABLE,
549                                  p, s,
550                                  &t);
551         if (r < 0)
552                 return r;
553
554         f->field_hash_table = t;
555         return 0;
556 }
557
558 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
559         uint64_t p, h;
560         int r;
561
562         assert(f);
563         assert(o);
564         assert(offset > 0);
565         assert(o->object.type == OBJECT_DATA);
566
567         /* This might alter the window we are looking at */
568
569         o->data.next_hash_offset = o->data.next_field_offset = 0;
570         o->data.entry_offset = o->data.entry_array_offset = 0;
571         o->data.n_entries = 0;
572
573         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
574         p = le64toh(f->data_hash_table[h].tail_hash_offset);
575         if (p == 0) {
576                 /* Only entry in the hash table is easy */
577                 f->data_hash_table[h].head_hash_offset = htole64(offset);
578         } else {
579                 /* Move back to the previous data object, to patch in
580                  * pointer */
581
582                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
583                 if (r < 0)
584                         return r;
585
586                 o->data.next_hash_offset = htole64(offset);
587         }
588
589         f->data_hash_table[h].tail_hash_offset = htole64(offset);
590
591         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
592                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
593
594         return 0;
595 }
596
597 int journal_file_find_data_object_with_hash(
598                 JournalFile *f,
599                 const void *data, uint64_t size, uint64_t hash,
600                 Object **ret, uint64_t *offset) {
601
602         uint64_t p, osize, h;
603         int r;
604
605         assert(f);
606         assert(data || size == 0);
607
608         osize = offsetof(Object, data.payload) + size;
609
610         if (f->header->data_hash_table_size == 0)
611                 return -EBADMSG;
612
613         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
614         p = le64toh(f->data_hash_table[h].head_hash_offset);
615
616         while (p > 0) {
617                 Object *o;
618
619                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
620                 if (r < 0)
621                         return r;
622
623                 if (le64toh(o->data.hash) != hash)
624                         goto next;
625
626                 if (o->object.flags & OBJECT_COMPRESSED) {
627 #ifdef HAVE_XZ
628                         uint64_t l, rsize;
629
630                         l = le64toh(o->object.size);
631                         if (l <= offsetof(Object, data.payload))
632                                 return -EBADMSG;
633
634                         l -= offsetof(Object, data.payload);
635
636                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
637                                 return -EBADMSG;
638
639                         if (rsize == size &&
640                             memcmp(f->compress_buffer, data, size) == 0) {
641
642                                 if (ret)
643                                         *ret = o;
644
645                                 if (offset)
646                                         *offset = p;
647
648                                 return 1;
649                         }
650 #else
651                         return -EPROTONOSUPPORT;
652 #endif
653
654                 } else if (le64toh(o->object.size) == osize &&
655                            memcmp(o->data.payload, data, size) == 0) {
656
657                         if (ret)
658                                 *ret = o;
659
660                         if (offset)
661                                 *offset = p;
662
663                         return 1;
664                 }
665
666         next:
667                 p = le64toh(o->data.next_hash_offset);
668         }
669
670         return 0;
671 }
672
673 int journal_file_find_data_object(
674                 JournalFile *f,
675                 const void *data, uint64_t size,
676                 Object **ret, uint64_t *offset) {
677
678         uint64_t hash;
679
680         assert(f);
681         assert(data || size == 0);
682
683         hash = hash64(data, size);
684
685         return journal_file_find_data_object_with_hash(f,
686                                                        data, size, hash,
687                                                        ret, offset);
688 }
689
690 static int journal_file_append_data(
691                 JournalFile *f,
692                 const void *data, uint64_t size,
693                 Object **ret, uint64_t *offset) {
694
695         uint64_t hash, p;
696         uint64_t osize;
697         Object *o;
698         int r;
699         bool compressed = false;
700
701         assert(f);
702         assert(data || size == 0);
703
704         hash = hash64(data, size);
705
706         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
707         if (r < 0)
708                 return r;
709         else if (r > 0) {
710
711                 if (ret)
712                         *ret = o;
713
714                 if (offset)
715                         *offset = p;
716
717                 return 0;
718         }
719
720         osize = offsetof(Object, data.payload) + size;
721         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
722         if (r < 0)
723                 return r;
724
725         o->data.hash = htole64(hash);
726
727 #ifdef HAVE_XZ
728         if (f->compress &&
729             size >= COMPRESSION_SIZE_THRESHOLD) {
730                 uint64_t rsize;
731
732                 compressed = compress_blob(data, size, o->data.payload, &rsize);
733
734                 if (compressed) {
735                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
736                         o->object.flags |= OBJECT_COMPRESSED;
737
738                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
739                 }
740         }
741 #endif
742
743         if (!compressed && size > 0)
744                 memcpy(o->data.payload, data, size);
745
746         r = journal_file_link_data(f, o, p, hash);
747         if (r < 0)
748                 return r;
749
750         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
751         if (r < 0)
752                 return r;
753
754         /* The linking might have altered the window, so let's
755          * refresh our pointer */
756         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
757         if (r < 0)
758                 return r;
759
760         if (ret)
761                 *ret = o;
762
763         if (offset)
764                 *offset = p;
765
766         return 0;
767 }
768
769 uint64_t journal_file_entry_n_items(Object *o) {
770         assert(o);
771         assert(o->object.type == OBJECT_ENTRY);
772
773         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
774 }
775
776 uint64_t journal_file_entry_array_n_items(Object *o) {
777         assert(o);
778         assert(o->object.type == OBJECT_ENTRY_ARRAY);
779
780         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
781 }
782
783 static int link_entry_into_array(JournalFile *f,
784                                  le64_t *first,
785                                  le64_t *idx,
786                                  uint64_t p) {
787         int r;
788         uint64_t n = 0, ap = 0, q, i, a, hidx;
789         Object *o;
790
791         assert(f);
792         assert(first);
793         assert(idx);
794         assert(p > 0);
795
796         a = le64toh(*first);
797         i = hidx = le64toh(*idx);
798         while (a > 0) {
799
800                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
801                 if (r < 0)
802                         return r;
803
804                 n = journal_file_entry_array_n_items(o);
805                 if (i < n) {
806                         o->entry_array.items[i] = htole64(p);
807                         *idx = htole64(hidx + 1);
808                         return 0;
809                 }
810
811                 i -= n;
812                 ap = a;
813                 a = le64toh(o->entry_array.next_entry_array_offset);
814         }
815
816         if (hidx > n)
817                 n = (hidx+1) * 2;
818         else
819                 n = n * 2;
820
821         if (n < 4)
822                 n = 4;
823
824         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
825                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
826                                        &o, &q);
827         if (r < 0)
828                 return r;
829
830         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
831         if (r < 0)
832                 return r;
833
834         o->entry_array.items[i] = htole64(p);
835
836         if (ap == 0)
837                 *first = htole64(q);
838         else {
839                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
840                 if (r < 0)
841                         return r;
842
843                 o->entry_array.next_entry_array_offset = htole64(q);
844         }
845
846         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
847                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
848
849         *idx = htole64(hidx + 1);
850
851         return 0;
852 }
853
854 static int link_entry_into_array_plus_one(JournalFile *f,
855                                           le64_t *extra,
856                                           le64_t *first,
857                                           le64_t *idx,
858                                           uint64_t p) {
859
860         int r;
861
862         assert(f);
863         assert(extra);
864         assert(first);
865         assert(idx);
866         assert(p > 0);
867
868         if (*idx == 0)
869                 *extra = htole64(p);
870         else {
871                 le64_t i;
872
873                 i = htole64(le64toh(*idx) - 1);
874                 r = link_entry_into_array(f, first, &i, p);
875                 if (r < 0)
876                         return r;
877         }
878
879         *idx = htole64(le64toh(*idx) + 1);
880         return 0;
881 }
882
883 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
884         uint64_t p;
885         int r;
886         assert(f);
887         assert(o);
888         assert(offset > 0);
889
890         p = le64toh(o->entry.items[i].object_offset);
891         if (p == 0)
892                 return -EINVAL;
893
894         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
895         if (r < 0)
896                 return r;
897
898         return link_entry_into_array_plus_one(f,
899                                               &o->data.entry_offset,
900                                               &o->data.entry_array_offset,
901                                               &o->data.n_entries,
902                                               offset);
903 }
904
905 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
906         uint64_t n, i;
907         int r;
908
909         assert(f);
910         assert(o);
911         assert(offset > 0);
912         assert(o->object.type == OBJECT_ENTRY);
913
914         __sync_synchronize();
915
916         /* Link up the entry itself */
917         r = link_entry_into_array(f,
918                                   &f->header->entry_array_offset,
919                                   &f->header->n_entries,
920                                   offset);
921         if (r < 0)
922                 return r;
923
924         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
925
926         if (f->header->head_entry_realtime == 0)
927                 f->header->head_entry_realtime = o->entry.realtime;
928
929         f->header->tail_entry_realtime = o->entry.realtime;
930         f->header->tail_entry_monotonic = o->entry.monotonic;
931
932         f->tail_entry_monotonic_valid = true;
933
934         /* Link up the items */
935         n = journal_file_entry_n_items(o);
936         for (i = 0; i < n; i++) {
937                 r = journal_file_link_entry_item(f, o, offset, i);
938                 if (r < 0)
939                         return r;
940         }
941
942         return 0;
943 }
944
945 static int journal_file_append_entry_internal(
946                 JournalFile *f,
947                 const dual_timestamp *ts,
948                 uint64_t xor_hash,
949                 const EntryItem items[], unsigned n_items,
950                 uint64_t *seqnum,
951                 Object **ret, uint64_t *offset) {
952         uint64_t np;
953         uint64_t osize;
954         Object *o;
955         int r;
956
957         assert(f);
958         assert(items || n_items == 0);
959         assert(ts);
960
961         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
962
963         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
964         if (r < 0)
965                 return r;
966
967         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
968         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
969         o->entry.realtime = htole64(ts->realtime);
970         o->entry.monotonic = htole64(ts->monotonic);
971         o->entry.xor_hash = htole64(xor_hash);
972         o->entry.boot_id = f->header->boot_id;
973
974         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
975         if (r < 0)
976                 return r;
977
978         r = journal_file_link_entry(f, o, np);
979         if (r < 0)
980                 return r;
981
982         if (ret)
983                 *ret = o;
984
985         if (offset)
986                 *offset = np;
987
988         return 0;
989 }
990
991 void journal_file_post_change(JournalFile *f) {
992         assert(f);
993
994         /* inotify() does not receive IN_MODIFY events from file
995          * accesses done via mmap(). After each access we hence
996          * trigger IN_MODIFY by truncating the journal file to its
997          * current size which triggers IN_MODIFY. */
998
999         __sync_synchronize();
1000
1001         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1002                 log_error("Failed to to truncate file to its own size: %m");
1003 }
1004
1005 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1006         unsigned i;
1007         EntryItem *items;
1008         int r;
1009         uint64_t xor_hash = 0;
1010         struct dual_timestamp _ts;
1011
1012         assert(f);
1013         assert(iovec || n_iovec == 0);
1014
1015         if (!f->writable)
1016                 return -EPERM;
1017
1018         if (!ts) {
1019                 dual_timestamp_get(&_ts);
1020                 ts = &_ts;
1021         }
1022
1023         if (f->tail_entry_monotonic_valid &&
1024             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1025                 return -EINVAL;
1026
1027         r = journal_file_maybe_append_tag(f, ts->realtime);
1028         if (r < 0)
1029                 return r;
1030
1031         /* alloca() can't take 0, hence let's allocate at least one */
1032         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1033
1034         for (i = 0; i < n_iovec; i++) {
1035                 uint64_t p;
1036                 Object *o;
1037
1038                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1039                 if (r < 0)
1040                         return r;
1041
1042                 xor_hash ^= le64toh(o->data.hash);
1043                 items[i].object_offset = htole64(p);
1044                 items[i].hash = o->data.hash;
1045         }
1046
1047         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1048
1049         journal_file_post_change(f);
1050
1051         return r;
1052 }
1053
1054 static int generic_array_get(JournalFile *f,
1055                              uint64_t first,
1056                              uint64_t i,
1057                              Object **ret, uint64_t *offset) {
1058
1059         Object *o;
1060         uint64_t p = 0, a;
1061         int r;
1062
1063         assert(f);
1064
1065         a = first;
1066         while (a > 0) {
1067                 uint64_t n;
1068
1069                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1070                 if (r < 0)
1071                         return r;
1072
1073                 n = journal_file_entry_array_n_items(o);
1074                 if (i < n) {
1075                         p = le64toh(o->entry_array.items[i]);
1076                         break;
1077                 }
1078
1079                 i -= n;
1080                 a = le64toh(o->entry_array.next_entry_array_offset);
1081         }
1082
1083         if (a <= 0 || p <= 0)
1084                 return 0;
1085
1086         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1087         if (r < 0)
1088                 return r;
1089
1090         if (ret)
1091                 *ret = o;
1092
1093         if (offset)
1094                 *offset = p;
1095
1096         return 1;
1097 }
1098
1099 static int generic_array_get_plus_one(JournalFile *f,
1100                                       uint64_t extra,
1101                                       uint64_t first,
1102                                       uint64_t i,
1103                                       Object **ret, uint64_t *offset) {
1104
1105         Object *o;
1106
1107         assert(f);
1108
1109         if (i == 0) {
1110                 int r;
1111
1112                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1113                 if (r < 0)
1114                         return r;
1115
1116                 if (ret)
1117                         *ret = o;
1118
1119                 if (offset)
1120                         *offset = extra;
1121
1122                 return 1;
1123         }
1124
1125         return generic_array_get(f, first, i-1, ret, offset);
1126 }
1127
1128 enum {
1129         TEST_FOUND,
1130         TEST_LEFT,
1131         TEST_RIGHT
1132 };
1133
1134 static int generic_array_bisect(JournalFile *f,
1135                                 uint64_t first,
1136                                 uint64_t n,
1137                                 uint64_t needle,
1138                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1139                                 direction_t direction,
1140                                 Object **ret,
1141                                 uint64_t *offset,
1142                                 uint64_t *idx) {
1143
1144         uint64_t a, p, t = 0, i = 0, last_p = 0;
1145         bool subtract_one = false;
1146         Object *o, *array = NULL;
1147         int r;
1148
1149         assert(f);
1150         assert(test_object);
1151
1152         a = first;
1153         while (a > 0) {
1154                 uint64_t left, right, k, lp;
1155
1156                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1157                 if (r < 0)
1158                         return r;
1159
1160                 k = journal_file_entry_array_n_items(array);
1161                 right = MIN(k, n);
1162                 if (right <= 0)
1163                         return 0;
1164
1165                 i = right - 1;
1166                 lp = p = le64toh(array->entry_array.items[i]);
1167                 if (p <= 0)
1168                         return -EBADMSG;
1169
1170                 r = test_object(f, p, needle);
1171                 if (r < 0)
1172                         return r;
1173
1174                 if (r == TEST_FOUND)
1175                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1176
1177                 if (r == TEST_RIGHT) {
1178                         left = 0;
1179                         right -= 1;
1180                         for (;;) {
1181                                 if (left == right) {
1182                                         if (direction == DIRECTION_UP)
1183                                                 subtract_one = true;
1184
1185                                         i = left;
1186                                         goto found;
1187                                 }
1188
1189                                 assert(left < right);
1190
1191                                 i = (left + right) / 2;
1192                                 p = le64toh(array->entry_array.items[i]);
1193                                 if (p <= 0)
1194                                         return -EBADMSG;
1195
1196                                 r = test_object(f, p, needle);
1197                                 if (r < 0)
1198                                         return r;
1199
1200                                 if (r == TEST_FOUND)
1201                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1202
1203                                 if (r == TEST_RIGHT)
1204                                         right = i;
1205                                 else
1206                                         left = i + 1;
1207                         }
1208                 }
1209
1210                 if (k > n) {
1211                         if (direction == DIRECTION_UP) {
1212                                 i = n;
1213                                 subtract_one = true;
1214                                 goto found;
1215                         }
1216
1217                         return 0;
1218                 }
1219
1220                 last_p = lp;
1221
1222                 n -= k;
1223                 t += k;
1224                 a = le64toh(array->entry_array.next_entry_array_offset);
1225         }
1226
1227         return 0;
1228
1229 found:
1230         if (subtract_one && t == 0 && i == 0)
1231                 return 0;
1232
1233         if (subtract_one && i == 0)
1234                 p = last_p;
1235         else if (subtract_one)
1236                 p = le64toh(array->entry_array.items[i-1]);
1237         else
1238                 p = le64toh(array->entry_array.items[i]);
1239
1240         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1241         if (r < 0)
1242                 return r;
1243
1244         if (ret)
1245                 *ret = o;
1246
1247         if (offset)
1248                 *offset = p;
1249
1250         if (idx)
1251                 *idx = t + i + (subtract_one ? -1 : 0);
1252
1253         return 1;
1254 }
1255
1256 static int generic_array_bisect_plus_one(JournalFile *f,
1257                                          uint64_t extra,
1258                                          uint64_t first,
1259                                          uint64_t n,
1260                                          uint64_t needle,
1261                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1262                                          direction_t direction,
1263                                          Object **ret,
1264                                          uint64_t *offset,
1265                                          uint64_t *idx) {
1266
1267         int r;
1268         bool step_back = false;
1269         Object *o;
1270
1271         assert(f);
1272         assert(test_object);
1273
1274         if (n <= 0)
1275                 return 0;
1276
1277         /* This bisects the array in object 'first', but first checks
1278          * an extra  */
1279         r = test_object(f, extra, needle);
1280         if (r < 0)
1281                 return r;
1282
1283         if (r == TEST_FOUND)
1284                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1285
1286         /* if we are looking with DIRECTION_UP then we need to first
1287            see if in the actual array there is a matching entry, and
1288            return the last one of that. But if there isn't any we need
1289            to return this one. Hence remember this, and return it
1290            below. */
1291         if (r == TEST_LEFT)
1292                 step_back = direction == DIRECTION_UP;
1293
1294         if (r == TEST_RIGHT) {
1295                 if (direction == DIRECTION_DOWN)
1296                         goto found;
1297                 else
1298                         return 0;
1299         }
1300
1301         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
1303         if (r == 0 && step_back)
1304                 goto found;
1305
1306         if (r > 0 && idx)
1307                 (*idx) ++;
1308
1309         return r;
1310
1311 found:
1312         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1313         if (r < 0)
1314                 return r;
1315
1316         if (ret)
1317                 *ret = o;
1318
1319         if (offset)
1320                 *offset = extra;
1321
1322         if (idx)
1323                 *idx = 0;
1324
1325         return 1;
1326 }
1327
1328 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1329         assert(f);
1330         assert(p > 0);
1331
1332         if (p == needle)
1333                 return TEST_FOUND;
1334         else if (p < needle)
1335                 return TEST_LEFT;
1336         else
1337                 return TEST_RIGHT;
1338 }
1339
1340 int journal_file_move_to_entry_by_offset(
1341                 JournalFile *f,
1342                 uint64_t p,
1343                 direction_t direction,
1344                 Object **ret,
1345                 uint64_t *offset) {
1346
1347         return generic_array_bisect(f,
1348                                     le64toh(f->header->entry_array_offset),
1349                                     le64toh(f->header->n_entries),
1350                                     p,
1351                                     test_object_offset,
1352                                     direction,
1353                                     ret, offset, NULL);
1354 }
1355
1356
1357 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1358         Object *o;
1359         int r;
1360
1361         assert(f);
1362         assert(p > 0);
1363
1364         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1365         if (r < 0)
1366                 return r;
1367
1368         if (le64toh(o->entry.seqnum) == needle)
1369                 return TEST_FOUND;
1370         else if (le64toh(o->entry.seqnum) < needle)
1371                 return TEST_LEFT;
1372         else
1373                 return TEST_RIGHT;
1374 }
1375
1376 int journal_file_move_to_entry_by_seqnum(
1377                 JournalFile *f,
1378                 uint64_t seqnum,
1379                 direction_t direction,
1380                 Object **ret,
1381                 uint64_t *offset) {
1382
1383         return generic_array_bisect(f,
1384                                     le64toh(f->header->entry_array_offset),
1385                                     le64toh(f->header->n_entries),
1386                                     seqnum,
1387                                     test_object_seqnum,
1388                                     direction,
1389                                     ret, offset, NULL);
1390 }
1391
1392 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1393         Object *o;
1394         int r;
1395
1396         assert(f);
1397         assert(p > 0);
1398
1399         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1400         if (r < 0)
1401                 return r;
1402
1403         if (le64toh(o->entry.realtime) == needle)
1404                 return TEST_FOUND;
1405         else if (le64toh(o->entry.realtime) < needle)
1406                 return TEST_LEFT;
1407         else
1408                 return TEST_RIGHT;
1409 }
1410
1411 int journal_file_move_to_entry_by_realtime(
1412                 JournalFile *f,
1413                 uint64_t realtime,
1414                 direction_t direction,
1415                 Object **ret,
1416                 uint64_t *offset) {
1417
1418         return generic_array_bisect(f,
1419                                     le64toh(f->header->entry_array_offset),
1420                                     le64toh(f->header->n_entries),
1421                                     realtime,
1422                                     test_object_realtime,
1423                                     direction,
1424                                     ret, offset, NULL);
1425 }
1426
1427 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1428         Object *o;
1429         int r;
1430
1431         assert(f);
1432         assert(p > 0);
1433
1434         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1435         if (r < 0)
1436                 return r;
1437
1438         if (le64toh(o->entry.monotonic) == needle)
1439                 return TEST_FOUND;
1440         else if (le64toh(o->entry.monotonic) < needle)
1441                 return TEST_LEFT;
1442         else
1443                 return TEST_RIGHT;
1444 }
1445
1446 int journal_file_move_to_entry_by_monotonic(
1447                 JournalFile *f,
1448                 sd_id128_t boot_id,
1449                 uint64_t monotonic,
1450                 direction_t direction,
1451                 Object **ret,
1452                 uint64_t *offset) {
1453
1454         char t[9+32+1] = "_BOOT_ID=";
1455         Object *o;
1456         int r;
1457
1458         assert(f);
1459
1460         sd_id128_to_string(boot_id, t + 9);
1461         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1462         if (r < 0)
1463                 return r;
1464         if (r == 0)
1465                 return -ENOENT;
1466
1467         return generic_array_bisect_plus_one(f,
1468                                              le64toh(o->data.entry_offset),
1469                                              le64toh(o->data.entry_array_offset),
1470                                              le64toh(o->data.n_entries),
1471                                              monotonic,
1472                                              test_object_monotonic,
1473                                              direction,
1474                                              ret, offset, NULL);
1475 }
1476
1477 int journal_file_next_entry(
1478                 JournalFile *f,
1479                 Object *o, uint64_t p,
1480                 direction_t direction,
1481                 Object **ret, uint64_t *offset) {
1482
1483         uint64_t i, n;
1484         int r;
1485
1486         assert(f);
1487         assert(p > 0 || !o);
1488
1489         n = le64toh(f->header->n_entries);
1490         if (n <= 0)
1491                 return 0;
1492
1493         if (!o)
1494                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1495         else {
1496                 if (o->object.type != OBJECT_ENTRY)
1497                         return -EINVAL;
1498
1499                 r = generic_array_bisect(f,
1500                                          le64toh(f->header->entry_array_offset),
1501                                          le64toh(f->header->n_entries),
1502                                          p,
1503                                          test_object_offset,
1504                                          DIRECTION_DOWN,
1505                                          NULL, NULL,
1506                                          &i);
1507                 if (r <= 0)
1508                         return r;
1509
1510                 if (direction == DIRECTION_DOWN) {
1511                         if (i >= n - 1)
1512                                 return 0;
1513
1514                         i++;
1515                 } else {
1516                         if (i <= 0)
1517                                 return 0;
1518
1519                         i--;
1520                 }
1521         }
1522
1523         /* And jump to it */
1524         return generic_array_get(f,
1525                                  le64toh(f->header->entry_array_offset),
1526                                  i,
1527                                  ret, offset);
1528 }
1529
1530 int journal_file_skip_entry(
1531                 JournalFile *f,
1532                 Object *o, uint64_t p,
1533                 int64_t skip,
1534                 Object **ret, uint64_t *offset) {
1535
1536         uint64_t i, n;
1537         int r;
1538
1539         assert(f);
1540         assert(o);
1541         assert(p > 0);
1542
1543         if (o->object.type != OBJECT_ENTRY)
1544                 return -EINVAL;
1545
1546         r = generic_array_bisect(f,
1547                                  le64toh(f->header->entry_array_offset),
1548                                  le64toh(f->header->n_entries),
1549                                  p,
1550                                  test_object_offset,
1551                                  DIRECTION_DOWN,
1552                                  NULL, NULL,
1553                                  &i);
1554         if (r <= 0)
1555                 return r;
1556
1557         /* Calculate new index */
1558         if (skip < 0) {
1559                 if ((uint64_t) -skip >= i)
1560                         i = 0;
1561                 else
1562                         i = i - (uint64_t) -skip;
1563         } else
1564                 i  += (uint64_t) skip;
1565
1566         n = le64toh(f->header->n_entries);
1567         if (n <= 0)
1568                 return -EBADMSG;
1569
1570         if (i >= n)
1571                 i = n-1;
1572
1573         return generic_array_get(f,
1574                                  le64toh(f->header->entry_array_offset),
1575                                  i,
1576                                  ret, offset);
1577 }
1578
1579 int journal_file_next_entry_for_data(
1580                 JournalFile *f,
1581                 Object *o, uint64_t p,
1582                 uint64_t data_offset,
1583                 direction_t direction,
1584                 Object **ret, uint64_t *offset) {
1585
1586         uint64_t n, i;
1587         int r;
1588         Object *d;
1589
1590         assert(f);
1591         assert(p > 0 || !o);
1592
1593         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1594         if (r < 0)
1595                 return r;
1596
1597         n = le64toh(d->data.n_entries);
1598         if (n <= 0)
1599                 return n;
1600
1601         if (!o)
1602                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1603         else {
1604                 if (o->object.type != OBJECT_ENTRY)
1605                         return -EINVAL;
1606
1607                 r = generic_array_bisect_plus_one(f,
1608                                                   le64toh(d->data.entry_offset),
1609                                                   le64toh(d->data.entry_array_offset),
1610                                                   le64toh(d->data.n_entries),
1611                                                   p,
1612                                                   test_object_offset,
1613                                                   DIRECTION_DOWN,
1614                                                   NULL, NULL,
1615                                                   &i);
1616
1617                 if (r <= 0)
1618                         return r;
1619
1620                 if (direction == DIRECTION_DOWN) {
1621                         if (i >= n - 1)
1622                                 return 0;
1623
1624                         i++;
1625                 } else {
1626                         if (i <= 0)
1627                                 return 0;
1628
1629                         i--;
1630                 }
1631
1632         }
1633
1634         return generic_array_get_plus_one(f,
1635                                           le64toh(d->data.entry_offset),
1636                                           le64toh(d->data.entry_array_offset),
1637                                           i,
1638                                           ret, offset);
1639 }
1640
1641 int journal_file_move_to_entry_by_offset_for_data(
1642                 JournalFile *f,
1643                 uint64_t data_offset,
1644                 uint64_t p,
1645                 direction_t direction,
1646                 Object **ret, uint64_t *offset) {
1647
1648         int r;
1649         Object *d;
1650
1651         assert(f);
1652
1653         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1654         if (r < 0)
1655                 return r;
1656
1657         return generic_array_bisect_plus_one(f,
1658                                              le64toh(d->data.entry_offset),
1659                                              le64toh(d->data.entry_array_offset),
1660                                              le64toh(d->data.n_entries),
1661                                              p,
1662                                              test_object_offset,
1663                                              direction,
1664                                              ret, offset, NULL);
1665 }
1666
1667 int journal_file_move_to_entry_by_monotonic_for_data(
1668                 JournalFile *f,
1669                 uint64_t data_offset,
1670                 sd_id128_t boot_id,
1671                 uint64_t monotonic,
1672                 direction_t direction,
1673                 Object **ret, uint64_t *offset) {
1674
1675         char t[9+32+1] = "_BOOT_ID=";
1676         Object *o, *d;
1677         int r;
1678         uint64_t b, z;
1679
1680         assert(f);
1681
1682         /* First, seek by time */
1683         sd_id128_to_string(boot_id, t + 9);
1684         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1685         if (r < 0)
1686                 return r;
1687         if (r == 0)
1688                 return -ENOENT;
1689
1690         r = generic_array_bisect_plus_one(f,
1691                                           le64toh(o->data.entry_offset),
1692                                           le64toh(o->data.entry_array_offset),
1693                                           le64toh(o->data.n_entries),
1694                                           monotonic,
1695                                           test_object_monotonic,
1696                                           direction,
1697                                           NULL, &z, NULL);
1698         if (r <= 0)
1699                 return r;
1700
1701         /* And now, continue seeking until we find an entry that
1702          * exists in both bisection arrays */
1703
1704         for (;;) {
1705                 Object *qo;
1706                 uint64_t p, q;
1707
1708                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1709                 if (r < 0)
1710                         return r;
1711
1712                 r = generic_array_bisect_plus_one(f,
1713                                                   le64toh(d->data.entry_offset),
1714                                                   le64toh(d->data.entry_array_offset),
1715                                                   le64toh(d->data.n_entries),
1716                                                   z,
1717                                                   test_object_offset,
1718                                                   direction,
1719                                                   NULL, &p, NULL);
1720                 if (r <= 0)
1721                         return r;
1722
1723                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1724                 if (r < 0)
1725                         return r;
1726
1727                 r = generic_array_bisect_plus_one(f,
1728                                                   le64toh(o->data.entry_offset),
1729                                                   le64toh(o->data.entry_array_offset),
1730                                                   le64toh(o->data.n_entries),
1731                                                   p,
1732                                                   test_object_offset,
1733                                                   direction,
1734                                                   &qo, &q, NULL);
1735
1736                 if (r <= 0)
1737                         return r;
1738
1739                 if (p == q) {
1740                         if (ret)
1741                                 *ret = qo;
1742                         if (offset)
1743                                 *offset = q;
1744
1745                         return 1;
1746                 }
1747
1748                 z = q;
1749         }
1750
1751         return 0;
1752 }
1753
1754 int journal_file_move_to_entry_by_seqnum_for_data(
1755                 JournalFile *f,
1756                 uint64_t data_offset,
1757                 uint64_t seqnum,
1758                 direction_t direction,
1759                 Object **ret, uint64_t *offset) {
1760
1761         Object *d;
1762         int r;
1763
1764         assert(f);
1765
1766         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1767         if (r < 0)
1768                 return r;
1769
1770         return generic_array_bisect_plus_one(f,
1771                                              le64toh(d->data.entry_offset),
1772                                              le64toh(d->data.entry_array_offset),
1773                                              le64toh(d->data.n_entries),
1774                                              seqnum,
1775                                              test_object_seqnum,
1776                                              direction,
1777                                              ret, offset, NULL);
1778 }
1779
1780 int journal_file_move_to_entry_by_realtime_for_data(
1781                 JournalFile *f,
1782                 uint64_t data_offset,
1783                 uint64_t realtime,
1784                 direction_t direction,
1785                 Object **ret, uint64_t *offset) {
1786
1787         Object *d;
1788         int r;
1789
1790         assert(f);
1791
1792         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1793         if (r < 0)
1794                 return r;
1795
1796         return generic_array_bisect_plus_one(f,
1797                                              le64toh(d->data.entry_offset),
1798                                              le64toh(d->data.entry_array_offset),
1799                                              le64toh(d->data.n_entries),
1800                                              realtime,
1801                                              test_object_realtime,
1802                                              direction,
1803                                              ret, offset, NULL);
1804 }
1805
1806 void journal_file_dump(JournalFile *f) {
1807         Object *o;
1808         int r;
1809         uint64_t p;
1810
1811         assert(f);
1812
1813         journal_file_print_header(f);
1814
1815         p = le64toh(f->header->header_size);
1816         while (p != 0) {
1817                 r = journal_file_move_to_object(f, -1, p, &o);
1818                 if (r < 0)
1819                         goto fail;
1820
1821                 switch (o->object.type) {
1822
1823                 case OBJECT_UNUSED:
1824                         printf("Type: OBJECT_UNUSED\n");
1825                         break;
1826
1827                 case OBJECT_DATA:
1828                         printf("Type: OBJECT_DATA\n");
1829                         break;
1830
1831                 case OBJECT_ENTRY:
1832                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1833                                (unsigned long long) le64toh(o->entry.seqnum),
1834                                (unsigned long long) le64toh(o->entry.monotonic),
1835                                (unsigned long long) le64toh(o->entry.realtime));
1836                         break;
1837
1838                 case OBJECT_FIELD_HASH_TABLE:
1839                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1840                         break;
1841
1842                 case OBJECT_DATA_HASH_TABLE:
1843                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1844                         break;
1845
1846                 case OBJECT_ENTRY_ARRAY:
1847                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1848                         break;
1849
1850                 case OBJECT_TAG:
1851                         printf("Type: OBJECT_TAG %llu\n",
1852                                (unsigned long long) le64toh(o->tag.seqnum));
1853                         break;
1854                 }
1855
1856                 if (o->object.flags & OBJECT_COMPRESSED)
1857                         printf("Flags: COMPRESSED\n");
1858
1859                 if (p == le64toh(f->header->tail_object_offset))
1860                         p = 0;
1861                 else
1862                         p = p + ALIGN64(le64toh(o->object.size));
1863         }
1864
1865         return;
1866 fail:
1867         log_error("File corrupt");
1868 }
1869
1870 void journal_file_print_header(JournalFile *f) {
1871         char a[33], b[33], c[33];
1872         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1873
1874         assert(f);
1875
1876         printf("File Path: %s\n"
1877                "File ID: %s\n"
1878                "Machine ID: %s\n"
1879                "Boot ID: %s\n"
1880                "Sequential Number ID: %s\n"
1881                "State: %s\n"
1882                "Compatible Flags:%s%s\n"
1883                "Incompatible Flags:%s%s\n"
1884                "Header size: %llu\n"
1885                "Arena size: %llu\n"
1886                "Data Hash Table Size: %llu\n"
1887                "Field Hash Table Size: %llu\n"
1888                "Objects: %llu\n"
1889                "Entry Objects: %llu\n"
1890                "Rotate Suggested: %s\n"
1891                "Head Sequential Number: %llu\n"
1892                "Tail Sequential Number: %llu\n"
1893                "Head Realtime Timestamp: %s\n"
1894                "Tail Realtime Timestamp: %s\n",
1895                f->path,
1896                sd_id128_to_string(f->header->file_id, a),
1897                sd_id128_to_string(f->header->machine_id, b),
1898                sd_id128_to_string(f->header->boot_id, c),
1899                sd_id128_to_string(f->header->seqnum_id, c),
1900                f->header->state == STATE_OFFLINE ? "offline" :
1901                f->header->state == STATE_ONLINE ? "online" :
1902                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1903                (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1904                (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1905                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1906                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1907                (unsigned long long) le64toh(f->header->header_size),
1908                (unsigned long long) le64toh(f->header->arena_size),
1909                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1910                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1911                (unsigned long long) le64toh(f->header->n_objects),
1912                (unsigned long long) le64toh(f->header->n_entries),
1913                yes_no(journal_file_rotate_suggested(f)),
1914                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1915                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1916                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1917                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1918
1919         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1920                 printf("Data Objects: %llu\n"
1921                        "Data Hash Table Fill: %.1f%%\n",
1922                        (unsigned long long) le64toh(f->header->n_data),
1923                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1924
1925         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1926                 printf("Field Objects: %llu\n"
1927                        "Field Hash Table Fill: %.1f%%\n",
1928                        (unsigned long long) le64toh(f->header->n_fields),
1929                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1930 }
1931
1932 int journal_file_open(
1933                 const char *fname,
1934                 int flags,
1935                 mode_t mode,
1936                 bool compress,
1937                 bool seal,
1938                 JournalMetrics *metrics,
1939                 MMapCache *mmap_cache,
1940                 JournalFile *template,
1941                 JournalFile **ret) {
1942
1943         JournalFile *f;
1944         int r;
1945         bool newly_created = false;
1946
1947         assert(fname);
1948
1949         if ((flags & O_ACCMODE) != O_RDONLY &&
1950             (flags & O_ACCMODE) != O_RDWR)
1951                 return -EINVAL;
1952
1953         if (!endswith(fname, ".journal"))
1954                 return -EINVAL;
1955
1956         f = new0(JournalFile, 1);
1957         if (!f)
1958                 return -ENOMEM;
1959
1960         f->fd = -1;
1961         f->mode = mode;
1962
1963         f->flags = flags;
1964         f->prot = prot_from_flags(flags);
1965         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1966         f->compress = compress;
1967         f->seal = seal;
1968
1969         if (mmap_cache)
1970                 f->mmap = mmap_cache_ref(mmap_cache);
1971         else {
1972                 /* One context for each type, plus the zeroth catchall
1973                  * context. One fd for the file plus one for each type
1974                  * (which we need during verification */
1975                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1976                 if (!f->mmap) {
1977                         r = -ENOMEM;
1978                         goto fail;
1979                 }
1980         }
1981
1982         f->path = strdup(fname);
1983         if (!f->path) {
1984                 r = -ENOMEM;
1985                 goto fail;
1986         }
1987
1988         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1989         if (f->fd < 0) {
1990                 r = -errno;
1991                 goto fail;
1992         }
1993
1994         if (fstat(f->fd, &f->last_stat) < 0) {
1995                 r = -errno;
1996                 goto fail;
1997         }
1998
1999         if (f->last_stat.st_size == 0 && f->writable) {
2000                 newly_created = true;
2001
2002                 /* Try to load the FSPRG state, and if we can't, then
2003                  * just don't do sealing */
2004                 r = journal_file_fss_load(f);
2005                 if (r < 0)
2006                         f->seal = false;
2007
2008                 r = journal_file_init_header(f, template);
2009                 if (r < 0)
2010                         goto fail;
2011
2012                 if (fstat(f->fd, &f->last_stat) < 0) {
2013                         r = -errno;
2014                         goto fail;
2015                 }
2016         }
2017
2018         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2019                 r = -EIO;
2020                 goto fail;
2021         }
2022
2023         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2024         if (f->header == MAP_FAILED) {
2025                 f->header = NULL;
2026                 r = -errno;
2027                 goto fail;
2028         }
2029
2030         if (!newly_created) {
2031                 r = journal_file_verify_header(f);
2032                 if (r < 0)
2033                         goto fail;
2034         }
2035
2036         if (!newly_created && f->writable) {
2037                 r = journal_file_fss_load(f);
2038                 if (r < 0)
2039                         goto fail;
2040         }
2041
2042         if (f->writable) {
2043                 if (metrics) {
2044                         journal_default_metrics(metrics, f->fd);
2045                         f->metrics = *metrics;
2046                 } else if (template)
2047                         f->metrics = template->metrics;
2048
2049                 r = journal_file_refresh_header(f);
2050                 if (r < 0)
2051                         goto fail;
2052         }
2053
2054         r = journal_file_hmac_setup(f);
2055         if (r < 0)
2056                 goto fail;
2057
2058         if (newly_created) {
2059                 r = journal_file_setup_field_hash_table(f);
2060                 if (r < 0)
2061                         goto fail;
2062
2063                 r = journal_file_setup_data_hash_table(f);
2064                 if (r < 0)
2065                         goto fail;
2066
2067                 r = journal_file_append_first_tag(f);
2068                 if (r < 0)
2069                         goto fail;
2070         }
2071
2072         r = journal_file_map_field_hash_table(f);
2073         if (r < 0)
2074                 goto fail;
2075
2076         r = journal_file_map_data_hash_table(f);
2077         if (r < 0)
2078                 goto fail;
2079
2080         if (ret)
2081                 *ret = f;
2082
2083         return 0;
2084
2085 fail:
2086         journal_file_close(f);
2087
2088         return r;
2089 }
2090
2091 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2092         char *p;
2093         size_t l;
2094         JournalFile *old_file, *new_file = NULL;
2095         int r;
2096
2097         assert(f);
2098         assert(*f);
2099
2100         old_file = *f;
2101
2102         if (!old_file->writable)
2103                 return -EINVAL;
2104
2105         if (!endswith(old_file->path, ".journal"))
2106                 return -EINVAL;
2107
2108         l = strlen(old_file->path);
2109
2110         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2111         if (!p)
2112                 return -ENOMEM;
2113
2114         memcpy(p, old_file->path, l - 8);
2115         p[l-8] = '@';
2116         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2117         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2118                  "-%016llx-%016llx.journal",
2119                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2120                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2121
2122         r = rename(old_file->path, p);
2123         free(p);
2124
2125         if (r < 0)
2126                 return -errno;
2127
2128         old_file->header->state = STATE_ARCHIVED;
2129
2130         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2131         journal_file_close(old_file);
2132
2133         *f = new_file;
2134         return r;
2135 }
2136
2137 int journal_file_open_reliably(
2138                 const char *fname,
2139                 int flags,
2140                 mode_t mode,
2141                 bool compress,
2142                 bool seal,
2143                 JournalMetrics *metrics,
2144                 MMapCache *mmap_cache,
2145                 JournalFile *template,
2146                 JournalFile **ret) {
2147
2148         int r;
2149         size_t l;
2150         char *p;
2151
2152         r = journal_file_open(fname, flags, mode, compress, seal,
2153                               metrics, mmap_cache, template, ret);
2154         if (r != -EBADMSG && /* corrupted */
2155             r != -ENODATA && /* truncated */
2156             r != -EHOSTDOWN && /* other machine */
2157             r != -EPROTONOSUPPORT && /* incompatible feature */
2158             r != -EBUSY && /* unclean shutdown */
2159             r != -ESHUTDOWN /* already archived */)
2160                 return r;
2161
2162         if ((flags & O_ACCMODE) == O_RDONLY)
2163                 return r;
2164
2165         if (!(flags & O_CREAT))
2166                 return r;
2167
2168         if (!endswith(fname, ".journal"))
2169                 return r;
2170
2171         /* The file is corrupted. Rotate it away and try it again (but only once) */
2172
2173         l = strlen(fname);
2174         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2175                      (int) (l-8), fname,
2176                      (unsigned long long) now(CLOCK_REALTIME),
2177                      random_ull()) < 0)
2178                 return -ENOMEM;
2179
2180         r = rename(fname, p);
2181         free(p);
2182         if (r < 0)
2183                 return -errno;
2184
2185         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2186
2187         return journal_file_open(fname, flags, mode, compress, seal,
2188                                  metrics, mmap_cache, template, ret);
2189 }
2190
2191
2192 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2193         uint64_t i, n;
2194         uint64_t q, xor_hash = 0;
2195         int r;
2196         EntryItem *items;
2197         dual_timestamp ts;
2198
2199         assert(from);
2200         assert(to);
2201         assert(o);
2202         assert(p);
2203
2204         if (!to->writable)
2205                 return -EPERM;
2206
2207         ts.monotonic = le64toh(o->entry.monotonic);
2208         ts.realtime = le64toh(o->entry.realtime);
2209
2210         if (to->tail_entry_monotonic_valid &&
2211             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2212                 return -EINVAL;
2213
2214         n = journal_file_entry_n_items(o);
2215         items = alloca(sizeof(EntryItem) * n);
2216
2217         for (i = 0; i < n; i++) {
2218                 uint64_t l, h;
2219                 le64_t le_hash;
2220                 size_t t;
2221                 void *data;
2222                 Object *u;
2223
2224                 q = le64toh(o->entry.items[i].object_offset);
2225                 le_hash = o->entry.items[i].hash;
2226
2227                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2228                 if (r < 0)
2229                         return r;
2230
2231                 if (le_hash != o->data.hash)
2232                         return -EBADMSG;
2233
2234                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2235                 t = (size_t) l;
2236
2237                 /* We hit the limit on 32bit machines */
2238                 if ((uint64_t) t != l)
2239                         return -E2BIG;
2240
2241                 if (o->object.flags & OBJECT_COMPRESSED) {
2242 #ifdef HAVE_XZ
2243                         uint64_t rsize;
2244
2245                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2246                                 return -EBADMSG;
2247
2248                         data = from->compress_buffer;
2249                         l = rsize;
2250 #else
2251                         return -EPROTONOSUPPORT;
2252 #endif
2253                 } else
2254                         data = o->data.payload;
2255
2256                 r = journal_file_append_data(to, data, l, &u, &h);
2257                 if (r < 0)
2258                         return r;
2259
2260                 xor_hash ^= le64toh(u->data.hash);
2261                 items[i].object_offset = htole64(h);
2262                 items[i].hash = u->data.hash;
2263
2264                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2265                 if (r < 0)
2266                         return r;
2267         }
2268
2269         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2270 }
2271
2272 void journal_default_metrics(JournalMetrics *m, int fd) {
2273         uint64_t fs_size = 0;
2274         struct statvfs ss;
2275         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2276
2277         assert(m);
2278         assert(fd >= 0);
2279
2280         if (fstatvfs(fd, &ss) >= 0)
2281                 fs_size = ss.f_frsize * ss.f_blocks;
2282
2283         if (m->max_use == (uint64_t) -1) {
2284
2285                 if (fs_size > 0) {
2286                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2287
2288                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2289                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2290
2291                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2292                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2293                 } else
2294                         m->max_use = DEFAULT_MAX_USE_LOWER;
2295         } else {
2296                 m->max_use = PAGE_ALIGN(m->max_use);
2297
2298                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2299                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2300         }
2301
2302         if (m->max_size == (uint64_t) -1) {
2303                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2304
2305                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2306                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2307         } else
2308                 m->max_size = PAGE_ALIGN(m->max_size);
2309
2310         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2311                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2312
2313         if (m->max_size*2 > m->max_use)
2314                 m->max_use = m->max_size*2;
2315
2316         if (m->min_size == (uint64_t) -1)
2317                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2318         else {
2319                 m->min_size = PAGE_ALIGN(m->min_size);
2320
2321                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2322                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2323
2324                 if (m->min_size > m->max_size)
2325                         m->max_size = m->min_size;
2326         }
2327
2328         if (m->keep_free == (uint64_t) -1) {
2329
2330                 if (fs_size > 0) {
2331                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2332
2333                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2334                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2335
2336                 } else
2337                         m->keep_free = DEFAULT_KEEP_FREE;
2338         }
2339
2340         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2341                  format_bytes(a, sizeof(a), m->max_use),
2342                  format_bytes(b, sizeof(b), m->max_size),
2343                  format_bytes(c, sizeof(c), m->min_size),
2344                  format_bytes(d, sizeof(d), m->keep_free));
2345 }
2346
2347 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2348         assert(f);
2349         assert(from || to);
2350
2351         if (from) {
2352                 if (f->header->head_entry_realtime == 0)
2353                         return -ENOENT;
2354
2355                 *from = le64toh(f->header->head_entry_realtime);
2356         }
2357
2358         if (to) {
2359                 if (f->header->tail_entry_realtime == 0)
2360                         return -ENOENT;
2361
2362                 *to = le64toh(f->header->tail_entry_realtime);
2363         }
2364
2365         return 1;
2366 }
2367
2368 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2369         char t[9+32+1] = "_BOOT_ID=";
2370         Object *o;
2371         uint64_t p;
2372         int r;
2373
2374         assert(f);
2375         assert(from || to);
2376
2377         sd_id128_to_string(boot_id, t + 9);
2378
2379         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2380         if (r <= 0)
2381                 return r;
2382
2383         if (le64toh(o->data.n_entries) <= 0)
2384                 return 0;
2385
2386         if (from) {
2387                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2388                 if (r < 0)
2389                         return r;
2390
2391                 *from = le64toh(o->entry.monotonic);
2392         }
2393
2394         if (to) {
2395                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2396                 if (r < 0)
2397                         return r;
2398
2399                 r = generic_array_get_plus_one(f,
2400                                                le64toh(o->data.entry_offset),
2401                                                le64toh(o->data.entry_array_offset),
2402                                                le64toh(o->data.n_entries)-1,
2403                                                &o, NULL);
2404                 if (r <= 0)
2405                         return r;
2406
2407                 *to = le64toh(o->entry.monotonic);
2408         }
2409
2410         return 1;
2411 }
2412
2413 bool journal_file_rotate_suggested(JournalFile *f) {
2414         assert(f);
2415
2416         /* If we gained new header fields we gained new features,
2417          * hence suggest a rotation */
2418         if (le64toh(f->header->header_size) < sizeof(Header)) {
2419                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2420                 return true;
2421         }
2422
2423         /* Let's check if the hash tables grew over a certain fill
2424          * level (75%, borrowing this value from Java's hash table
2425          * implementation), and if so suggest a rotation. To calculate
2426          * the fill level we need the n_data field, which only exists
2427          * in newer versions. */
2428
2429         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2430                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2431                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2432                                   f->path,
2433                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2434                                   (unsigned long long) le64toh(f->header->n_data),
2435                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2436                                   (unsigned long long) (f->last_stat.st_size),
2437                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2438                         return true;
2439                 }
2440
2441         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2442                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2443                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2444                                   f->path,
2445                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2446                                   (unsigned long long) le64toh(f->header->n_fields),
2447                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2448                         return true;
2449                 }
2450
2451         return false;
2452 }