chiark / gitweb /
4d7a6ff17d4b1ab6ec8ac018985eb314b48507bd
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->seal)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fss_file)
100                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
101         else if (f->fsprg_state)
102                 free(f->fsprg_state);
103
104         free(f->fsprg_seed);
105
106         if (f->hmac)
107                 gcry_md_close(f->hmac);
108 #endif
109
110         free(f);
111 }
112
113 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
114         Header h;
115         ssize_t k;
116         int r;
117
118         assert(f);
119
120         zero(h);
121         memcpy(h.signature, HEADER_SIGNATURE, 8);
122         h.header_size = htole64(ALIGN64(sizeof(h)));
123
124         h.incompatible_flags =
125                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
126
127         h.compatible_flags =
128                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
129
130         r = sd_id128_randomize(&h.file_id);
131         if (r < 0)
132                 return r;
133
134         if (template) {
135                 h.seqnum_id = template->header->seqnum_id;
136                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
137         } else
138                 h.seqnum_id = h.file_id;
139
140         k = pwrite(f->fd, &h, sizeof(h), 0);
141         if (k < 0)
142                 return -errno;
143
144         if (k != sizeof(h))
145                 return -EIO;
146
147         return 0;
148 }
149
150 static int journal_file_refresh_header(JournalFile *f) {
151         int r;
152         sd_id128_t boot_id;
153
154         assert(f);
155
156         r = sd_id128_get_machine(&f->header->machine_id);
157         if (r < 0)
158                 return r;
159
160         r = sd_id128_get_boot(&boot_id);
161         if (r < 0)
162                 return r;
163
164         if (sd_id128_equal(boot_id, f->header->boot_id))
165                 f->tail_entry_monotonic_valid = true;
166
167         f->header->boot_id = boot_id;
168
169         f->header->state = STATE_ONLINE;
170
171         /* Sync the online state to disk */
172         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
173         fdatasync(f->fd);
174
175         return 0;
176 }
177
178 static int journal_file_verify_header(JournalFile *f) {
179         assert(f);
180
181         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
182                 return -EBADMSG;
183
184         /* In both read and write mode we refuse to open files with
185          * incompatible flags we don't know */
186 #ifdef HAVE_XZ
187         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
188                 return -EPROTONOSUPPORT;
189 #else
190         if (f->header->incompatible_flags != 0)
191                 return -EPROTONOSUPPORT;
192 #endif
193
194         /* When open for writing we refuse to open files with
195          * compatible flags, too */
196         if (f->writable) {
197 #ifdef HAVE_GCRYPT
198                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
199                         return -EPROTONOSUPPORT;
200 #else
201                 if (f->header->compatible_flags != 0)
202                         return -EPROTONOSUPPORT;
203 #endif
204         }
205
206         /* The first addition was n_data, so check that we are at least this large */
207         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
208                 return -EBADMSG;
209
210         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED) &&
211                 !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
212                 return -EBADMSG;
213
214         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
215                 return -ENODATA;
216
217         if (f->writable) {
218                 uint8_t state;
219                 sd_id128_t machine_id;
220                 int r;
221
222                 r = sd_id128_get_machine(&machine_id);
223                 if (r < 0)
224                         return r;
225
226                 if (!sd_id128_equal(machine_id, f->header->machine_id))
227                         return -EHOSTDOWN;
228
229                 state = f->header->state;
230
231                 if (state == STATE_ONLINE) {
232                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
233                         return -EBUSY;
234                 } else if (state == STATE_ARCHIVED)
235                         return -ESHUTDOWN;
236                 else if (state != STATE_OFFLINE) {
237                         log_debug("Journal file %s has unknown state %u.", f->path, state);
238                         return -EBUSY;
239                 }
240         }
241
242         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
243         f->seal = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_SEALED);
244
245         return 0;
246 }
247
248 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
249         uint64_t old_size, new_size;
250         int r;
251
252         assert(f);
253
254         /* We assume that this file is not sparse, and we know that
255          * for sure, since we always call posix_fallocate()
256          * ourselves */
257
258         old_size =
259                 le64toh(f->header->header_size) +
260                 le64toh(f->header->arena_size);
261
262         new_size = PAGE_ALIGN(offset + size);
263         if (new_size < le64toh(f->header->header_size))
264                 new_size = le64toh(f->header->header_size);
265
266         if (new_size <= old_size)
267                 return 0;
268
269         if (f->metrics.max_size > 0 &&
270             new_size > f->metrics.max_size)
271                 return -E2BIG;
272
273         if (new_size > f->metrics.min_size &&
274             f->metrics.keep_free > 0) {
275                 struct statvfs svfs;
276
277                 if (fstatvfs(f->fd, &svfs) >= 0) {
278                         uint64_t available;
279
280                         available = svfs.f_bfree * svfs.f_bsize;
281
282                         if (available >= f->metrics.keep_free)
283                                 available -= f->metrics.keep_free;
284                         else
285                                 available = 0;
286
287                         if (new_size - old_size > available)
288                                 return -E2BIG;
289                 }
290         }
291
292         /* Note that the glibc fallocate() fallback is very
293            inefficient, hence we try to minimize the allocation area
294            as we can. */
295         r = posix_fallocate(f->fd, old_size, new_size - old_size);
296         if (r != 0)
297                 return -r;
298
299         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
300
301         if (fstat(f->fd, &f->last_stat) < 0)
302                 return -errno;
303
304         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
305
306         return 0;
307 }
308
309 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
310         assert(f);
311         assert(ret);
312
313         /* Avoid SIGBUS on invalid accesses */
314         if (offset + size > (uint64_t) f->last_stat.st_size) {
315                 /* Hmm, out of range? Let's refresh the fstat() data
316                  * first, before we trust that check. */
317
318                 if (fstat(f->fd, &f->last_stat) < 0 ||
319                     offset + size > (uint64_t) f->last_stat.st_size)
320                         return -EADDRNOTAVAIL;
321         }
322
323         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
324 }
325
326 static uint64_t minimum_header_size(Object *o) {
327
328         static uint64_t table[] = {
329                 [OBJECT_DATA] = sizeof(DataObject),
330                 [OBJECT_FIELD] = sizeof(FieldObject),
331                 [OBJECT_ENTRY] = sizeof(EntryObject),
332                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
333                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
334                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
335                 [OBJECT_TAG] = sizeof(TagObject),
336         };
337
338         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
339                 return sizeof(ObjectHeader);
340
341         return table[o->object.type];
342 }
343
344 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
345         int r;
346         void *t;
347         Object *o;
348         uint64_t s;
349         unsigned context;
350
351         assert(f);
352         assert(ret);
353
354         /* One context for each type, plus one catch-all for the rest */
355         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
356
357         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
358         if (r < 0)
359                 return r;
360
361         o = (Object*) t;
362         s = le64toh(o->object.size);
363
364         if (s < sizeof(ObjectHeader))
365                 return -EBADMSG;
366
367         if (o->object.type <= OBJECT_UNUSED)
368                 return -EBADMSG;
369
370         if (s < minimum_header_size(o))
371                 return -EBADMSG;
372
373         if (type >= 0 && o->object.type != type)
374                 return -EBADMSG;
375
376         if (s > sizeof(ObjectHeader)) {
377                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
378                 if (r < 0)
379                         return r;
380
381                 o = (Object*) t;
382         }
383
384         *ret = o;
385         return 0;
386 }
387
388 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
389         uint64_t r;
390
391         assert(f);
392
393         r = le64toh(f->header->tail_entry_seqnum) + 1;
394
395         if (seqnum) {
396                 /* If an external seqnum counter was passed, we update
397                  * both the local and the external one, and set it to
398                  * the maximum of both */
399
400                 if (*seqnum + 1 > r)
401                         r = *seqnum + 1;
402
403                 *seqnum = r;
404         }
405
406         f->header->tail_entry_seqnum = htole64(r);
407
408         if (f->header->head_entry_seqnum == 0)
409                 f->header->head_entry_seqnum = htole64(r);
410
411         return r;
412 }
413
414 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
415         int r;
416         uint64_t p;
417         Object *tail, *o;
418         void *t;
419
420         assert(f);
421         assert(type > 0 && type < _OBJECT_TYPE_MAX);
422         assert(size >= sizeof(ObjectHeader));
423         assert(offset);
424         assert(ret);
425
426         p = le64toh(f->header->tail_object_offset);
427         if (p == 0)
428                 p = le64toh(f->header->header_size);
429         else {
430                 r = journal_file_move_to_object(f, -1, p, &tail);
431                 if (r < 0)
432                         return r;
433
434                 p += ALIGN64(le64toh(tail->object.size));
435         }
436
437         r = journal_file_allocate(f, p, size);
438         if (r < 0)
439                 return r;
440
441         r = journal_file_move_to(f, type, p, size, &t);
442         if (r < 0)
443                 return r;
444
445         o = (Object*) t;
446
447         zero(o->object);
448         o->object.type = type;
449         o->object.size = htole64(size);
450
451         f->header->tail_object_offset = htole64(p);
452         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
453
454         *ret = o;
455         *offset = p;
456
457         return 0;
458 }
459
460 static int journal_file_setup_data_hash_table(JournalFile *f) {
461         uint64_t s, p;
462         Object *o;
463         int r;
464
465         assert(f);
466
467         /* We estimate that we need 1 hash table entry per 768 of
468            journal file and we want to make sure we never get beyond
469            75% fill level. Calculate the hash table size for the
470            maximum file size based on these metrics. */
471
472         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
473         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
474                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
475
476         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
477
478         r = journal_file_append_object(f,
479                                        OBJECT_DATA_HASH_TABLE,
480                                        offsetof(Object, hash_table.items) + s,
481                                        &o, &p);
482         if (r < 0)
483                 return r;
484
485         memset(o->hash_table.items, 0, s);
486
487         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
488         f->header->data_hash_table_size = htole64(s);
489
490         return 0;
491 }
492
493 static int journal_file_setup_field_hash_table(JournalFile *f) {
494         uint64_t s, p;
495         Object *o;
496         int r;
497
498         assert(f);
499
500         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
501         r = journal_file_append_object(f,
502                                        OBJECT_FIELD_HASH_TABLE,
503                                        offsetof(Object, hash_table.items) + s,
504                                        &o, &p);
505         if (r < 0)
506                 return r;
507
508         memset(o->hash_table.items, 0, s);
509
510         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
511         f->header->field_hash_table_size = htole64(s);
512
513         return 0;
514 }
515
516 static int journal_file_map_data_hash_table(JournalFile *f) {
517         uint64_t s, p;
518         void *t;
519         int r;
520
521         assert(f);
522
523         p = le64toh(f->header->data_hash_table_offset);
524         s = le64toh(f->header->data_hash_table_size);
525
526         r = journal_file_move_to(f,
527                                  OBJECT_DATA_HASH_TABLE,
528                                  p, s,
529                                  &t);
530         if (r < 0)
531                 return r;
532
533         f->data_hash_table = t;
534         return 0;
535 }
536
537 static int journal_file_map_field_hash_table(JournalFile *f) {
538         uint64_t s, p;
539         void *t;
540         int r;
541
542         assert(f);
543
544         p = le64toh(f->header->field_hash_table_offset);
545         s = le64toh(f->header->field_hash_table_size);
546
547         r = journal_file_move_to(f,
548                                  OBJECT_FIELD_HASH_TABLE,
549                                  p, s,
550                                  &t);
551         if (r < 0)
552                 return r;
553
554         f->field_hash_table = t;
555         return 0;
556 }
557
558 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
559         uint64_t p, h;
560         int r;
561
562         assert(f);
563         assert(o);
564         assert(offset > 0);
565         assert(o->object.type == OBJECT_DATA);
566
567         /* This might alter the window we are looking at */
568
569         o->data.next_hash_offset = o->data.next_field_offset = 0;
570         o->data.entry_offset = o->data.entry_array_offset = 0;
571         o->data.n_entries = 0;
572
573         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
574         p = le64toh(f->data_hash_table[h].tail_hash_offset);
575         if (p == 0) {
576                 /* Only entry in the hash table is easy */
577                 f->data_hash_table[h].head_hash_offset = htole64(offset);
578         } else {
579                 /* Move back to the previous data object, to patch in
580                  * pointer */
581
582                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
583                 if (r < 0)
584                         return r;
585
586                 o->data.next_hash_offset = htole64(offset);
587         }
588
589         f->data_hash_table[h].tail_hash_offset = htole64(offset);
590
591         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
592                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
593
594         return 0;
595 }
596
597 int journal_file_find_data_object_with_hash(
598                 JournalFile *f,
599                 const void *data, uint64_t size, uint64_t hash,
600                 Object **ret, uint64_t *offset) {
601
602         uint64_t p, osize, h;
603         int r;
604
605         assert(f);
606         assert(data || size == 0);
607
608         osize = offsetof(Object, data.payload) + size;
609
610         if (f->header->data_hash_table_size == 0)
611                 return -EBADMSG;
612
613         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
614         p = le64toh(f->data_hash_table[h].head_hash_offset);
615
616         while (p > 0) {
617                 Object *o;
618
619                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
620                 if (r < 0)
621                         return r;
622
623                 if (le64toh(o->data.hash) != hash)
624                         goto next;
625
626                 if (o->object.flags & OBJECT_COMPRESSED) {
627 #ifdef HAVE_XZ
628                         uint64_t l, rsize;
629
630                         l = le64toh(o->object.size);
631                         if (l <= offsetof(Object, data.payload))
632                                 return -EBADMSG;
633
634                         l -= offsetof(Object, data.payload);
635
636                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
637                                 return -EBADMSG;
638
639                         if (rsize == size &&
640                             memcmp(f->compress_buffer, data, size) == 0) {
641
642                                 if (ret)
643                                         *ret = o;
644
645                                 if (offset)
646                                         *offset = p;
647
648                                 return 1;
649                         }
650 #else
651                         return -EPROTONOSUPPORT;
652 #endif
653
654                 } else if (le64toh(o->object.size) == osize &&
655                            memcmp(o->data.payload, data, size) == 0) {
656
657                         if (ret)
658                                 *ret = o;
659
660                         if (offset)
661                                 *offset = p;
662
663                         return 1;
664                 }
665
666         next:
667                 p = le64toh(o->data.next_hash_offset);
668         }
669
670         return 0;
671 }
672
673 int journal_file_find_data_object(
674                 JournalFile *f,
675                 const void *data, uint64_t size,
676                 Object **ret, uint64_t *offset) {
677
678         uint64_t hash;
679
680         assert(f);
681         assert(data || size == 0);
682
683         hash = hash64(data, size);
684
685         return journal_file_find_data_object_with_hash(f,
686                                                        data, size, hash,
687                                                        ret, offset);
688 }
689
690 static int journal_file_append_data(
691                 JournalFile *f,
692                 const void *data, uint64_t size,
693                 Object **ret, uint64_t *offset) {
694
695         uint64_t hash, p;
696         uint64_t osize;
697         Object *o;
698         int r;
699         bool compressed = false;
700
701         assert(f);
702         assert(data || size == 0);
703
704         hash = hash64(data, size);
705
706         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
707         if (r < 0)
708                 return r;
709         else if (r > 0) {
710
711                 if (ret)
712                         *ret = o;
713
714                 if (offset)
715                         *offset = p;
716
717                 return 0;
718         }
719
720         osize = offsetof(Object, data.payload) + size;
721         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
722         if (r < 0)
723                 return r;
724
725         o->data.hash = htole64(hash);
726
727 #ifdef HAVE_XZ
728         if (f->compress &&
729             size >= COMPRESSION_SIZE_THRESHOLD) {
730                 uint64_t rsize;
731
732                 compressed = compress_blob(data, size, o->data.payload, &rsize);
733
734                 if (compressed) {
735                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
736                         o->object.flags |= OBJECT_COMPRESSED;
737
738                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
739                 }
740         }
741 #endif
742
743         if (!compressed && size > 0)
744                 memcpy(o->data.payload, data, size);
745
746         r = journal_file_link_data(f, o, p, hash);
747         if (r < 0)
748                 return r;
749
750         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
751         if (r < 0)
752                 return r;
753
754         /* The linking might have altered the window, so let's
755          * refresh our pointer */
756         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
757         if (r < 0)
758                 return r;
759
760         if (ret)
761                 *ret = o;
762
763         if (offset)
764                 *offset = p;
765
766         return 0;
767 }
768
769 uint64_t journal_file_entry_n_items(Object *o) {
770         assert(o);
771         assert(o->object.type == OBJECT_ENTRY);
772
773         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
774 }
775
776 uint64_t journal_file_entry_array_n_items(Object *o) {
777         assert(o);
778         assert(o->object.type == OBJECT_ENTRY_ARRAY);
779
780         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
781 }
782
783 static int link_entry_into_array(JournalFile *f,
784                                  le64_t *first,
785                                  le64_t *idx,
786                                  uint64_t p) {
787         int r;
788         uint64_t n = 0, ap = 0, q, i, a, hidx;
789         Object *o;
790
791         assert(f);
792         assert(first);
793         assert(idx);
794         assert(p > 0);
795
796         a = le64toh(*first);
797         i = hidx = le64toh(*idx);
798         while (a > 0) {
799
800                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
801                 if (r < 0)
802                         return r;
803
804                 n = journal_file_entry_array_n_items(o);
805                 if (i < n) {
806                         o->entry_array.items[i] = htole64(p);
807                         *idx = htole64(hidx + 1);
808                         return 0;
809                 }
810
811                 i -= n;
812                 ap = a;
813                 a = le64toh(o->entry_array.next_entry_array_offset);
814         }
815
816         if (hidx > n)
817                 n = (hidx+1) * 2;
818         else
819                 n = n * 2;
820
821         if (n < 4)
822                 n = 4;
823
824         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
825                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
826                                        &o, &q);
827         if (r < 0)
828                 return r;
829
830         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
831         if (r < 0)
832                 return r;
833
834         o->entry_array.items[i] = htole64(p);
835
836         if (ap == 0)
837                 *first = htole64(q);
838         else {
839                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
840                 if (r < 0)
841                         return r;
842
843                 o->entry_array.next_entry_array_offset = htole64(q);
844         }
845
846         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
847                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
848
849         *idx = htole64(hidx + 1);
850
851         return 0;
852 }
853
854 static int link_entry_into_array_plus_one(JournalFile *f,
855                                           le64_t *extra,
856                                           le64_t *first,
857                                           le64_t *idx,
858                                           uint64_t p) {
859
860         int r;
861
862         assert(f);
863         assert(extra);
864         assert(first);
865         assert(idx);
866         assert(p > 0);
867
868         if (*idx == 0)
869                 *extra = htole64(p);
870         else {
871                 le64_t i;
872
873                 i = htole64(le64toh(*idx) - 1);
874                 r = link_entry_into_array(f, first, &i, p);
875                 if (r < 0)
876                         return r;
877         }
878
879         *idx = htole64(le64toh(*idx) + 1);
880         return 0;
881 }
882
883 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
884         uint64_t p;
885         int r;
886         assert(f);
887         assert(o);
888         assert(offset > 0);
889
890         p = le64toh(o->entry.items[i].object_offset);
891         if (p == 0)
892                 return -EINVAL;
893
894         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
895         if (r < 0)
896                 return r;
897
898         return link_entry_into_array_plus_one(f,
899                                               &o->data.entry_offset,
900                                               &o->data.entry_array_offset,
901                                               &o->data.n_entries,
902                                               offset);
903 }
904
905 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
906         uint64_t n, i;
907         int r;
908
909         assert(f);
910         assert(o);
911         assert(offset > 0);
912         assert(o->object.type == OBJECT_ENTRY);
913
914         __sync_synchronize();
915
916         /* Link up the entry itself */
917         r = link_entry_into_array(f,
918                                   &f->header->entry_array_offset,
919                                   &f->header->n_entries,
920                                   offset);
921         if (r < 0)
922                 return r;
923
924         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
925
926         if (f->header->head_entry_realtime == 0)
927                 f->header->head_entry_realtime = o->entry.realtime;
928
929         f->header->tail_entry_realtime = o->entry.realtime;
930         f->header->tail_entry_monotonic = o->entry.monotonic;
931
932         f->tail_entry_monotonic_valid = true;
933
934         /* Link up the items */
935         n = journal_file_entry_n_items(o);
936         for (i = 0; i < n; i++) {
937                 r = journal_file_link_entry_item(f, o, offset, i);
938                 if (r < 0)
939                         return r;
940         }
941
942         return 0;
943 }
944
945 static int journal_file_append_entry_internal(
946                 JournalFile *f,
947                 const dual_timestamp *ts,
948                 uint64_t xor_hash,
949                 const EntryItem items[], unsigned n_items,
950                 uint64_t *seqnum,
951                 Object **ret, uint64_t *offset) {
952         uint64_t np;
953         uint64_t osize;
954         Object *o;
955         int r;
956
957         assert(f);
958         assert(items || n_items == 0);
959         assert(ts);
960
961         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
962
963         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
964         if (r < 0)
965                 return r;
966
967         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
968         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
969         o->entry.realtime = htole64(ts->realtime);
970         o->entry.monotonic = htole64(ts->monotonic);
971         o->entry.xor_hash = htole64(xor_hash);
972         o->entry.boot_id = f->header->boot_id;
973
974         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
975         if (r < 0)
976                 return r;
977
978         r = journal_file_link_entry(f, o, np);
979         if (r < 0)
980                 return r;
981
982         if (ret)
983                 *ret = o;
984
985         if (offset)
986                 *offset = np;
987
988         return 0;
989 }
990
991 void journal_file_post_change(JournalFile *f) {
992         assert(f);
993
994         /* inotify() does not receive IN_MODIFY events from file
995          * accesses done via mmap(). After each access we hence
996          * trigger IN_MODIFY by truncating the journal file to its
997          * current size which triggers IN_MODIFY. */
998
999         __sync_synchronize();
1000
1001         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1002                 log_error("Failed to to truncate file to its own size: %m");
1003 }
1004
1005 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1006         unsigned i;
1007         EntryItem *items;
1008         int r;
1009         uint64_t xor_hash = 0;
1010         struct dual_timestamp _ts;
1011
1012         assert(f);
1013         assert(iovec || n_iovec == 0);
1014
1015         if (!f->writable)
1016                 return -EPERM;
1017
1018         if (!ts) {
1019                 dual_timestamp_get(&_ts);
1020                 ts = &_ts;
1021         }
1022
1023         if (f->tail_entry_monotonic_valid &&
1024             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1025                 return -EINVAL;
1026
1027         r = journal_file_maybe_append_tag(f, ts->realtime);
1028         if (r < 0)
1029                 return r;
1030
1031         /* alloca() can't take 0, hence let's allocate at least one */
1032         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1033
1034         for (i = 0; i < n_iovec; i++) {
1035                 uint64_t p;
1036                 Object *o;
1037
1038                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1039                 if (r < 0)
1040                         return r;
1041
1042                 xor_hash ^= le64toh(o->data.hash);
1043                 items[i].object_offset = htole64(p);
1044                 items[i].hash = o->data.hash;
1045         }
1046
1047         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1048
1049         journal_file_post_change(f);
1050
1051         return r;
1052 }
1053
1054 static int generic_array_get(JournalFile *f,
1055                              uint64_t first,
1056                              uint64_t i,
1057                              Object **ret, uint64_t *offset) {
1058
1059         Object *o;
1060         uint64_t p = 0, a;
1061         int r;
1062
1063         assert(f);
1064
1065         a = first;
1066         while (a > 0) {
1067                 uint64_t n;
1068
1069                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1070                 if (r < 0)
1071                         return r;
1072
1073                 n = journal_file_entry_array_n_items(o);
1074                 if (i < n) {
1075                         p = le64toh(o->entry_array.items[i]);
1076                         break;
1077                 }
1078
1079                 i -= n;
1080                 a = le64toh(o->entry_array.next_entry_array_offset);
1081         }
1082
1083         if (a <= 0 || p <= 0)
1084                 return 0;
1085
1086         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1087         if (r < 0)
1088                 return r;
1089
1090         if (ret)
1091                 *ret = o;
1092
1093         if (offset)
1094                 *offset = p;
1095
1096         return 1;
1097 }
1098
1099 static int generic_array_get_plus_one(JournalFile *f,
1100                                       uint64_t extra,
1101                                       uint64_t first,
1102                                       uint64_t i,
1103                                       Object **ret, uint64_t *offset) {
1104
1105         Object *o;
1106
1107         assert(f);
1108
1109         if (i == 0) {
1110                 int r;
1111
1112                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1113                 if (r < 0)
1114                         return r;
1115
1116                 if (ret)
1117                         *ret = o;
1118
1119                 if (offset)
1120                         *offset = extra;
1121
1122                 return 1;
1123         }
1124
1125         return generic_array_get(f, first, i-1, ret, offset);
1126 }
1127
1128 enum {
1129         TEST_FOUND,
1130         TEST_LEFT,
1131         TEST_RIGHT
1132 };
1133
1134 static int generic_array_bisect(JournalFile *f,
1135                                 uint64_t first,
1136                                 uint64_t n,
1137                                 uint64_t needle,
1138                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1139                                 direction_t direction,
1140                                 Object **ret,
1141                                 uint64_t *offset,
1142                                 uint64_t *idx) {
1143
1144         uint64_t a, p, t = 0, i = 0, last_p = 0;
1145         bool subtract_one = false;
1146         Object *o, *array = NULL;
1147         int r;
1148
1149         assert(f);
1150         assert(test_object);
1151
1152         a = first;
1153         while (a > 0) {
1154                 uint64_t left, right, k, lp;
1155
1156                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1157                 if (r < 0)
1158                         return r;
1159
1160                 k = journal_file_entry_array_n_items(array);
1161                 right = MIN(k, n);
1162                 if (right <= 0)
1163                         return 0;
1164
1165                 i = right - 1;
1166                 lp = p = le64toh(array->entry_array.items[i]);
1167                 if (p <= 0)
1168                         return -EBADMSG;
1169
1170                 r = test_object(f, p, needle);
1171                 if (r < 0)
1172                         return r;
1173
1174                 if (r == TEST_FOUND)
1175                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1176
1177                 if (r == TEST_RIGHT) {
1178                         left = 0;
1179                         right -= 1;
1180                         for (;;) {
1181                                 if (left == right) {
1182                                         if (direction == DIRECTION_UP)
1183                                                 subtract_one = true;
1184
1185                                         i = left;
1186                                         goto found;
1187                                 }
1188
1189                                 assert(left < right);
1190
1191                                 i = (left + right) / 2;
1192                                 p = le64toh(array->entry_array.items[i]);
1193                                 if (p <= 0)
1194                                         return -EBADMSG;
1195
1196                                 r = test_object(f, p, needle);
1197                                 if (r < 0)
1198                                         return r;
1199
1200                                 if (r == TEST_FOUND)
1201                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1202
1203                                 if (r == TEST_RIGHT)
1204                                         right = i;
1205                                 else
1206                                         left = i + 1;
1207                         }
1208                 }
1209
1210                 if (k > n) {
1211                         if (direction == DIRECTION_UP) {
1212                                 i = n;
1213                                 subtract_one = true;
1214                                 goto found;
1215                         }
1216
1217                         return 0;
1218                 }
1219
1220                 last_p = lp;
1221
1222                 n -= k;
1223                 t += k;
1224                 a = le64toh(array->entry_array.next_entry_array_offset);
1225         }
1226
1227         return 0;
1228
1229 found:
1230         if (subtract_one && t == 0 && i == 0)
1231                 return 0;
1232
1233         if (subtract_one && i == 0)
1234                 p = last_p;
1235         else if (subtract_one)
1236                 p = le64toh(array->entry_array.items[i-1]);
1237         else
1238                 p = le64toh(array->entry_array.items[i]);
1239
1240         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1241         if (r < 0)
1242                 return r;
1243
1244         if (ret)
1245                 *ret = o;
1246
1247         if (offset)
1248                 *offset = p;
1249
1250         if (idx)
1251                 *idx = t + i + (subtract_one ? -1 : 0);
1252
1253         return 1;
1254 }
1255
1256 static int generic_array_bisect_plus_one(JournalFile *f,
1257                                          uint64_t extra,
1258                                          uint64_t first,
1259                                          uint64_t n,
1260                                          uint64_t needle,
1261                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1262                                          direction_t direction,
1263                                          Object **ret,
1264                                          uint64_t *offset,
1265                                          uint64_t *idx) {
1266
1267         int r;
1268         bool step_back = false;
1269         Object *o;
1270
1271         assert(f);
1272         assert(test_object);
1273
1274         if (n <= 0)
1275                 return 0;
1276
1277         /* This bisects the array in object 'first', but first checks
1278          * an extra  */
1279         r = test_object(f, extra, needle);
1280         if (r < 0)
1281                 return r;
1282
1283         if (r == TEST_FOUND)
1284                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1285
1286         /* if we are looking with DIRECTION_UP then we need to first
1287            see if in the actual array there is a matching entry, and
1288            return the last one of that. But if there isn't any we need
1289            to return this one. Hence remember this, and return it
1290            below. */
1291         if (r == TEST_LEFT)
1292                 step_back = direction == DIRECTION_UP;
1293
1294         if (r == TEST_RIGHT) {
1295                 if (direction == DIRECTION_DOWN)
1296                         goto found;
1297                 else
1298                         return 0;
1299         }
1300
1301         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1302
1303         if (r == 0 && step_back)
1304                 goto found;
1305
1306         if (r > 0 && idx)
1307                 (*idx) ++;
1308
1309         return r;
1310
1311 found:
1312         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1313         if (r < 0)
1314                 return r;
1315
1316         if (ret)
1317                 *ret = o;
1318
1319         if (offset)
1320                 *offset = extra;
1321
1322         if (idx)
1323                 *idx = 0;
1324
1325         return 1;
1326 }
1327
1328 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1329         assert(f);
1330         assert(p > 0);
1331
1332         if (p == needle)
1333                 return TEST_FOUND;
1334         else if (p < needle)
1335                 return TEST_LEFT;
1336         else
1337                 return TEST_RIGHT;
1338 }
1339
1340 int journal_file_move_to_entry_by_offset(
1341                 JournalFile *f,
1342                 uint64_t p,
1343                 direction_t direction,
1344                 Object **ret,
1345                 uint64_t *offset) {
1346
1347         return generic_array_bisect(f,
1348                                     le64toh(f->header->entry_array_offset),
1349                                     le64toh(f->header->n_entries),
1350                                     p,
1351                                     test_object_offset,
1352                                     direction,
1353                                     ret, offset, NULL);
1354 }
1355
1356
1357 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1358         Object *o;
1359         int r;
1360
1361         assert(f);
1362         assert(p > 0);
1363
1364         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1365         if (r < 0)
1366                 return r;
1367
1368         if (le64toh(o->entry.seqnum) == needle)
1369                 return TEST_FOUND;
1370         else if (le64toh(o->entry.seqnum) < needle)
1371                 return TEST_LEFT;
1372         else
1373                 return TEST_RIGHT;
1374 }
1375
1376 int journal_file_move_to_entry_by_seqnum(
1377                 JournalFile *f,
1378                 uint64_t seqnum,
1379                 direction_t direction,
1380                 Object **ret,
1381                 uint64_t *offset) {
1382
1383         return generic_array_bisect(f,
1384                                     le64toh(f->header->entry_array_offset),
1385                                     le64toh(f->header->n_entries),
1386                                     seqnum,
1387                                     test_object_seqnum,
1388                                     direction,
1389                                     ret, offset, NULL);
1390 }
1391
1392 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1393         Object *o;
1394         int r;
1395
1396         assert(f);
1397         assert(p > 0);
1398
1399         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1400         if (r < 0)
1401                 return r;
1402
1403         if (le64toh(o->entry.realtime) == needle)
1404                 return TEST_FOUND;
1405         else if (le64toh(o->entry.realtime) < needle)
1406                 return TEST_LEFT;
1407         else
1408                 return TEST_RIGHT;
1409 }
1410
1411 int journal_file_move_to_entry_by_realtime(
1412                 JournalFile *f,
1413                 uint64_t realtime,
1414                 direction_t direction,
1415                 Object **ret,
1416                 uint64_t *offset) {
1417
1418         return generic_array_bisect(f,
1419                                     le64toh(f->header->entry_array_offset),
1420                                     le64toh(f->header->n_entries),
1421                                     realtime,
1422                                     test_object_realtime,
1423                                     direction,
1424                                     ret, offset, NULL);
1425 }
1426
1427 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1428         Object *o;
1429         int r;
1430
1431         assert(f);
1432         assert(p > 0);
1433
1434         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1435         if (r < 0)
1436                 return r;
1437
1438         if (le64toh(o->entry.monotonic) == needle)
1439                 return TEST_FOUND;
1440         else if (le64toh(o->entry.monotonic) < needle)
1441                 return TEST_LEFT;
1442         else
1443                 return TEST_RIGHT;
1444 }
1445
1446 int journal_file_move_to_entry_by_monotonic(
1447                 JournalFile *f,
1448                 sd_id128_t boot_id,
1449                 uint64_t monotonic,
1450                 direction_t direction,
1451                 Object **ret,
1452                 uint64_t *offset) {
1453
1454         char t[9+32+1] = "_BOOT_ID=";
1455         Object *o;
1456         int r;
1457
1458         assert(f);
1459
1460         sd_id128_to_string(boot_id, t + 9);
1461         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1462         if (r < 0)
1463                 return r;
1464         if (r == 0)
1465                 return -ENOENT;
1466
1467         return generic_array_bisect_plus_one(f,
1468                                              le64toh(o->data.entry_offset),
1469                                              le64toh(o->data.entry_array_offset),
1470                                              le64toh(o->data.n_entries),
1471                                              monotonic,
1472                                              test_object_monotonic,
1473                                              direction,
1474                                              ret, offset, NULL);
1475 }
1476
1477 int journal_file_next_entry(
1478                 JournalFile *f,
1479                 Object *o, uint64_t p,
1480                 direction_t direction,
1481                 Object **ret, uint64_t *offset) {
1482
1483         uint64_t i, n;
1484         int r;
1485
1486         assert(f);
1487         assert(p > 0 || !o);
1488
1489         n = le64toh(f->header->n_entries);
1490         if (n <= 0)
1491                 return 0;
1492
1493         if (!o)
1494                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1495         else {
1496                 if (o->object.type != OBJECT_ENTRY)
1497                         return -EINVAL;
1498
1499                 r = generic_array_bisect(f,
1500                                          le64toh(f->header->entry_array_offset),
1501                                          le64toh(f->header->n_entries),
1502                                          p,
1503                                          test_object_offset,
1504                                          DIRECTION_DOWN,
1505                                          NULL, NULL,
1506                                          &i);
1507                 if (r <= 0)
1508                         return r;
1509
1510                 if (direction == DIRECTION_DOWN) {
1511                         if (i >= n - 1)
1512                                 return 0;
1513
1514                         i++;
1515                 } else {
1516                         if (i <= 0)
1517                                 return 0;
1518
1519                         i--;
1520                 }
1521         }
1522
1523         /* And jump to it */
1524         return generic_array_get(f,
1525                                  le64toh(f->header->entry_array_offset),
1526                                  i,
1527                                  ret, offset);
1528 }
1529
1530 int journal_file_skip_entry(
1531                 JournalFile *f,
1532                 Object *o, uint64_t p,
1533                 int64_t skip,
1534                 Object **ret, uint64_t *offset) {
1535
1536         uint64_t i, n;
1537         int r;
1538
1539         assert(f);
1540         assert(o);
1541         assert(p > 0);
1542
1543         if (o->object.type != OBJECT_ENTRY)
1544                 return -EINVAL;
1545
1546         r = generic_array_bisect(f,
1547                                  le64toh(f->header->entry_array_offset),
1548                                  le64toh(f->header->n_entries),
1549                                  p,
1550                                  test_object_offset,
1551                                  DIRECTION_DOWN,
1552                                  NULL, NULL,
1553                                  &i);
1554         if (r <= 0)
1555                 return r;
1556
1557         /* Calculate new index */
1558         if (skip < 0) {
1559                 if ((uint64_t) -skip >= i)
1560                         i = 0;
1561                 else
1562                         i = i - (uint64_t) -skip;
1563         } else
1564                 i  += (uint64_t) skip;
1565
1566         n = le64toh(f->header->n_entries);
1567         if (n <= 0)
1568                 return -EBADMSG;
1569
1570         if (i >= n)
1571                 i = n-1;
1572
1573         return generic_array_get(f,
1574                                  le64toh(f->header->entry_array_offset),
1575                                  i,
1576                                  ret, offset);
1577 }
1578
1579 int journal_file_next_entry_for_data(
1580                 JournalFile *f,
1581                 Object *o, uint64_t p,
1582                 uint64_t data_offset,
1583                 direction_t direction,
1584                 Object **ret, uint64_t *offset) {
1585
1586         uint64_t n, i;
1587         int r;
1588         Object *d;
1589
1590         assert(f);
1591         assert(p > 0 || !o);
1592
1593         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1594         if (r < 0)
1595                 return r;
1596
1597         n = le64toh(d->data.n_entries);
1598         if (n <= 0)
1599                 return n;
1600
1601         if (!o)
1602                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1603         else {
1604                 if (o->object.type != OBJECT_ENTRY)
1605                         return -EINVAL;
1606
1607                 r = generic_array_bisect_plus_one(f,
1608                                                   le64toh(d->data.entry_offset),
1609                                                   le64toh(d->data.entry_array_offset),
1610                                                   le64toh(d->data.n_entries),
1611                                                   p,
1612                                                   test_object_offset,
1613                                                   DIRECTION_DOWN,
1614                                                   NULL, NULL,
1615                                                   &i);
1616
1617                 if (r <= 0)
1618                         return r;
1619
1620                 if (direction == DIRECTION_DOWN) {
1621                         if (i >= n - 1)
1622                                 return 0;
1623
1624                         i++;
1625                 } else {
1626                         if (i <= 0)
1627                                 return 0;
1628
1629                         i--;
1630                 }
1631
1632         }
1633
1634         return generic_array_get_plus_one(f,
1635                                           le64toh(d->data.entry_offset),
1636                                           le64toh(d->data.entry_array_offset),
1637                                           i,
1638                                           ret, offset);
1639 }
1640
1641 int journal_file_move_to_entry_by_offset_for_data(
1642                 JournalFile *f,
1643                 uint64_t data_offset,
1644                 uint64_t p,
1645                 direction_t direction,
1646                 Object **ret, uint64_t *offset) {
1647
1648         int r;
1649         Object *d;
1650
1651         assert(f);
1652
1653         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1654         if (r < 0)
1655                 return r;
1656
1657         return generic_array_bisect_plus_one(f,
1658                                              le64toh(d->data.entry_offset),
1659                                              le64toh(d->data.entry_array_offset),
1660                                              le64toh(d->data.n_entries),
1661                                              p,
1662                                              test_object_offset,
1663                                              direction,
1664                                              ret, offset, NULL);
1665 }
1666
1667 int journal_file_move_to_entry_by_monotonic_for_data(
1668                 JournalFile *f,
1669                 uint64_t data_offset,
1670                 sd_id128_t boot_id,
1671                 uint64_t monotonic,
1672                 direction_t direction,
1673                 Object **ret, uint64_t *offset) {
1674
1675         char t[9+32+1] = "_BOOT_ID=";
1676         Object *o, *d;
1677         int r;
1678         uint64_t b, z;
1679
1680         assert(f);
1681
1682         /* First, seek by time */
1683         sd_id128_to_string(boot_id, t + 9);
1684         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1685         if (r < 0)
1686                 return r;
1687         if (r == 0)
1688                 return -ENOENT;
1689
1690         r = generic_array_bisect_plus_one(f,
1691                                           le64toh(o->data.entry_offset),
1692                                           le64toh(o->data.entry_array_offset),
1693                                           le64toh(o->data.n_entries),
1694                                           monotonic,
1695                                           test_object_monotonic,
1696                                           direction,
1697                                           NULL, &z, NULL);
1698         if (r <= 0)
1699                 return r;
1700
1701         /* And now, continue seeking until we find an entry that
1702          * exists in both bisection arrays */
1703
1704         for (;;) {
1705                 Object *qo;
1706                 uint64_t p, q;
1707
1708                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1709                 if (r < 0)
1710                         return r;
1711
1712                 r = generic_array_bisect_plus_one(f,
1713                                                   le64toh(d->data.entry_offset),
1714                                                   le64toh(d->data.entry_array_offset),
1715                                                   le64toh(d->data.n_entries),
1716                                                   z,
1717                                                   test_object_offset,
1718                                                   direction,
1719                                                   NULL, &p, NULL);
1720                 if (r <= 0)
1721                         return r;
1722
1723                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1724                 if (r < 0)
1725                         return r;
1726
1727                 r = generic_array_bisect_plus_one(f,
1728                                                   le64toh(o->data.entry_offset),
1729                                                   le64toh(o->data.entry_array_offset),
1730                                                   le64toh(o->data.n_entries),
1731                                                   p,
1732                                                   test_object_offset,
1733                                                   direction,
1734                                                   &qo, &q, NULL);
1735
1736                 if (r <= 0)
1737                         return r;
1738
1739                 if (p == q) {
1740                         if (ret)
1741                                 *ret = qo;
1742                         if (offset)
1743                                 *offset = q;
1744
1745                         return 1;
1746                 }
1747
1748                 z = q;
1749         }
1750
1751         return 0;
1752 }
1753
1754 int journal_file_move_to_entry_by_seqnum_for_data(
1755                 JournalFile *f,
1756                 uint64_t data_offset,
1757                 uint64_t seqnum,
1758                 direction_t direction,
1759                 Object **ret, uint64_t *offset) {
1760
1761         Object *d;
1762         int r;
1763
1764         assert(f);
1765
1766         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1767         if (r < 0)
1768                 return r;
1769
1770         return generic_array_bisect_plus_one(f,
1771                                              le64toh(d->data.entry_offset),
1772                                              le64toh(d->data.entry_array_offset),
1773                                              le64toh(d->data.n_entries),
1774                                              seqnum,
1775                                              test_object_seqnum,
1776                                              direction,
1777                                              ret, offset, NULL);
1778 }
1779
1780 int journal_file_move_to_entry_by_realtime_for_data(
1781                 JournalFile *f,
1782                 uint64_t data_offset,
1783                 uint64_t realtime,
1784                 direction_t direction,
1785                 Object **ret, uint64_t *offset) {
1786
1787         Object *d;
1788         int r;
1789
1790         assert(f);
1791
1792         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1793         if (r < 0)
1794                 return r;
1795
1796         return generic_array_bisect_plus_one(f,
1797                                              le64toh(d->data.entry_offset),
1798                                              le64toh(d->data.entry_array_offset),
1799                                              le64toh(d->data.n_entries),
1800                                              realtime,
1801                                              test_object_realtime,
1802                                              direction,
1803                                              ret, offset, NULL);
1804 }
1805
1806 void journal_file_dump(JournalFile *f) {
1807         Object *o;
1808         int r;
1809         uint64_t p;
1810
1811         assert(f);
1812
1813         journal_file_print_header(f);
1814
1815         p = le64toh(f->header->header_size);
1816         while (p != 0) {
1817                 r = journal_file_move_to_object(f, -1, p, &o);
1818                 if (r < 0)
1819                         goto fail;
1820
1821                 switch (o->object.type) {
1822
1823                 case OBJECT_UNUSED:
1824                         printf("Type: OBJECT_UNUSED\n");
1825                         break;
1826
1827                 case OBJECT_DATA:
1828                         printf("Type: OBJECT_DATA\n");
1829                         break;
1830
1831                 case OBJECT_ENTRY:
1832                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1833                                (unsigned long long) le64toh(o->entry.seqnum),
1834                                (unsigned long long) le64toh(o->entry.monotonic),
1835                                (unsigned long long) le64toh(o->entry.realtime));
1836                         break;
1837
1838                 case OBJECT_FIELD_HASH_TABLE:
1839                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1840                         break;
1841
1842                 case OBJECT_DATA_HASH_TABLE:
1843                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1844                         break;
1845
1846                 case OBJECT_ENTRY_ARRAY:
1847                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1848                         break;
1849
1850                 case OBJECT_TAG:
1851                         printf("Type: OBJECT_TAG %llu\n",
1852                                (unsigned long long) le64toh(o->tag.seqnum));
1853                         break;
1854                 }
1855
1856                 if (o->object.flags & OBJECT_COMPRESSED)
1857                         printf("Flags: COMPRESSED\n");
1858
1859                 if (p == le64toh(f->header->tail_object_offset))
1860                         p = 0;
1861                 else
1862                         p = p + ALIGN64(le64toh(o->object.size));
1863         }
1864
1865         return;
1866 fail:
1867         log_error("File corrupt");
1868 }
1869
1870 void journal_file_print_header(JournalFile *f) {
1871         char a[33], b[33], c[33];
1872         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1873
1874         assert(f);
1875
1876         printf("File Path: %s\n"
1877                "File ID: %s\n"
1878                "Machine ID: %s\n"
1879                "Boot ID: %s\n"
1880                "Sequential Number ID: %s\n"
1881                "State: %s\n"
1882                "Compatible Flags:%s%s\n"
1883                "Incompatible Flags:%s%s\n"
1884                "Header size: %llu\n"
1885                "Arena size: %llu\n"
1886                "Data Hash Table Size: %llu\n"
1887                "Field Hash Table Size: %llu\n"
1888                "Rotate Suggested: %s\n"
1889                "Head Sequential Number: %llu\n"
1890                "Tail Sequential Number: %llu\n"
1891                "Head Realtime Timestamp: %s\n"
1892                "Tail Realtime Timestamp: %s\n"
1893                "Objects: %llu\n"
1894                "Entry Objects: %llu\n",
1895                f->path,
1896                sd_id128_to_string(f->header->file_id, a),
1897                sd_id128_to_string(f->header->machine_id, b),
1898                sd_id128_to_string(f->header->boot_id, c),
1899                sd_id128_to_string(f->header->seqnum_id, c),
1900                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1901                f->header->state == STATE_ONLINE ? "ONLINE" :
1902                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1903                (f->header->compatible_flags & HEADER_COMPATIBLE_SEALED) ? " SEALED" : "",
1904                (f->header->compatible_flags & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1905                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1906                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1907                (unsigned long long) le64toh(f->header->header_size),
1908                (unsigned long long) le64toh(f->header->arena_size),
1909                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1910                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1911                yes_no(journal_file_rotate_suggested(f)),
1912                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1913                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1914                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1915                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1916                (unsigned long long) le64toh(f->header->n_objects),
1917                (unsigned long long) le64toh(f->header->n_entries));
1918
1919         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1920                 printf("Data Objects: %llu\n"
1921                        "Data Hash Table Fill: %.1f%%\n",
1922                        (unsigned long long) le64toh(f->header->n_data),
1923                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1924
1925         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1926                 printf("Field Objects: %llu\n"
1927                        "Field Hash Table Fill: %.1f%%\n",
1928                        (unsigned long long) le64toh(f->header->n_fields),
1929                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1930
1931         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1932                 printf("Tag Objects: %llu\n",
1933                        (unsigned long long) le64toh(f->header->n_tags));
1934         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1935                 printf("Entry Array Objects: %llu\n",
1936                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1937 }
1938
1939 int journal_file_open(
1940                 const char *fname,
1941                 int flags,
1942                 mode_t mode,
1943                 bool compress,
1944                 bool seal,
1945                 JournalMetrics *metrics,
1946                 MMapCache *mmap_cache,
1947                 JournalFile *template,
1948                 JournalFile **ret) {
1949
1950         JournalFile *f;
1951         int r;
1952         bool newly_created = false;
1953
1954         assert(fname);
1955
1956         if ((flags & O_ACCMODE) != O_RDONLY &&
1957             (flags & O_ACCMODE) != O_RDWR)
1958                 return -EINVAL;
1959
1960         if (!endswith(fname, ".journal"))
1961                 return -EINVAL;
1962
1963         f = new0(JournalFile, 1);
1964         if (!f)
1965                 return -ENOMEM;
1966
1967         f->fd = -1;
1968         f->mode = mode;
1969
1970         f->flags = flags;
1971         f->prot = prot_from_flags(flags);
1972         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1973         f->compress = compress;
1974         f->seal = seal;
1975
1976         if (mmap_cache)
1977                 f->mmap = mmap_cache_ref(mmap_cache);
1978         else {
1979                 /* One context for each type, plus the zeroth catchall
1980                  * context. One fd for the file plus one for each type
1981                  * (which we need during verification */
1982                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1983                 if (!f->mmap) {
1984                         r = -ENOMEM;
1985                         goto fail;
1986                 }
1987         }
1988
1989         f->path = strdup(fname);
1990         if (!f->path) {
1991                 r = -ENOMEM;
1992                 goto fail;
1993         }
1994
1995         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1996         if (f->fd < 0) {
1997                 r = -errno;
1998                 goto fail;
1999         }
2000
2001         if (fstat(f->fd, &f->last_stat) < 0) {
2002                 r = -errno;
2003                 goto fail;
2004         }
2005
2006         if (f->last_stat.st_size == 0 && f->writable) {
2007                 newly_created = true;
2008
2009                 /* Try to load the FSPRG state, and if we can't, then
2010                  * just don't do sealing */
2011                 r = journal_file_fss_load(f);
2012                 if (r < 0)
2013                         f->seal = false;
2014
2015                 r = journal_file_init_header(f, template);
2016                 if (r < 0)
2017                         goto fail;
2018
2019                 if (fstat(f->fd, &f->last_stat) < 0) {
2020                         r = -errno;
2021                         goto fail;
2022                 }
2023         }
2024
2025         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2026                 r = -EIO;
2027                 goto fail;
2028         }
2029
2030         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2031         if (f->header == MAP_FAILED) {
2032                 f->header = NULL;
2033                 r = -errno;
2034                 goto fail;
2035         }
2036
2037         if (!newly_created) {
2038                 r = journal_file_verify_header(f);
2039                 if (r < 0)
2040                         goto fail;
2041         }
2042
2043         if (!newly_created && f->writable) {
2044                 r = journal_file_fss_load(f);
2045                 if (r < 0)
2046                         goto fail;
2047         }
2048
2049         if (f->writable) {
2050                 if (metrics) {
2051                         journal_default_metrics(metrics, f->fd);
2052                         f->metrics = *metrics;
2053                 } else if (template)
2054                         f->metrics = template->metrics;
2055
2056                 r = journal_file_refresh_header(f);
2057                 if (r < 0)
2058                         goto fail;
2059         }
2060
2061         r = journal_file_hmac_setup(f);
2062         if (r < 0)
2063                 goto fail;
2064
2065         if (newly_created) {
2066                 r = journal_file_setup_field_hash_table(f);
2067                 if (r < 0)
2068                         goto fail;
2069
2070                 r = journal_file_setup_data_hash_table(f);
2071                 if (r < 0)
2072                         goto fail;
2073
2074                 r = journal_file_append_first_tag(f);
2075                 if (r < 0)
2076                         goto fail;
2077         }
2078
2079         r = journal_file_map_field_hash_table(f);
2080         if (r < 0)
2081                 goto fail;
2082
2083         r = journal_file_map_data_hash_table(f);
2084         if (r < 0)
2085                 goto fail;
2086
2087         if (ret)
2088                 *ret = f;
2089
2090         return 0;
2091
2092 fail:
2093         journal_file_close(f);
2094
2095         return r;
2096 }
2097
2098 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2099         char *p;
2100         size_t l;
2101         JournalFile *old_file, *new_file = NULL;
2102         int r;
2103
2104         assert(f);
2105         assert(*f);
2106
2107         old_file = *f;
2108
2109         if (!old_file->writable)
2110                 return -EINVAL;
2111
2112         if (!endswith(old_file->path, ".journal"))
2113                 return -EINVAL;
2114
2115         l = strlen(old_file->path);
2116
2117         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2118         if (!p)
2119                 return -ENOMEM;
2120
2121         memcpy(p, old_file->path, l - 8);
2122         p[l-8] = '@';
2123         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2124         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2125                  "-%016llx-%016llx.journal",
2126                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2127                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2128
2129         r = rename(old_file->path, p);
2130         free(p);
2131
2132         if (r < 0)
2133                 return -errno;
2134
2135         old_file->header->state = STATE_ARCHIVED;
2136
2137         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2138         journal_file_close(old_file);
2139
2140         *f = new_file;
2141         return r;
2142 }
2143
2144 int journal_file_open_reliably(
2145                 const char *fname,
2146                 int flags,
2147                 mode_t mode,
2148                 bool compress,
2149                 bool seal,
2150                 JournalMetrics *metrics,
2151                 MMapCache *mmap_cache,
2152                 JournalFile *template,
2153                 JournalFile **ret) {
2154
2155         int r;
2156         size_t l;
2157         char *p;
2158
2159         r = journal_file_open(fname, flags, mode, compress, seal,
2160                               metrics, mmap_cache, template, ret);
2161         if (r != -EBADMSG && /* corrupted */
2162             r != -ENODATA && /* truncated */
2163             r != -EHOSTDOWN && /* other machine */
2164             r != -EPROTONOSUPPORT && /* incompatible feature */
2165             r != -EBUSY && /* unclean shutdown */
2166             r != -ESHUTDOWN /* already archived */)
2167                 return r;
2168
2169         if ((flags & O_ACCMODE) == O_RDONLY)
2170                 return r;
2171
2172         if (!(flags & O_CREAT))
2173                 return r;
2174
2175         if (!endswith(fname, ".journal"))
2176                 return r;
2177
2178         /* The file is corrupted. Rotate it away and try it again (but only once) */
2179
2180         l = strlen(fname);
2181         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2182                      (int) (l-8), fname,
2183                      (unsigned long long) now(CLOCK_REALTIME),
2184                      random_ull()) < 0)
2185                 return -ENOMEM;
2186
2187         r = rename(fname, p);
2188         free(p);
2189         if (r < 0)
2190                 return -errno;
2191
2192         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2193
2194         return journal_file_open(fname, flags, mode, compress, seal,
2195                                  metrics, mmap_cache, template, ret);
2196 }
2197
2198
2199 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2200         uint64_t i, n;
2201         uint64_t q, xor_hash = 0;
2202         int r;
2203         EntryItem *items;
2204         dual_timestamp ts;
2205
2206         assert(from);
2207         assert(to);
2208         assert(o);
2209         assert(p);
2210
2211         if (!to->writable)
2212                 return -EPERM;
2213
2214         ts.monotonic = le64toh(o->entry.monotonic);
2215         ts.realtime = le64toh(o->entry.realtime);
2216
2217         if (to->tail_entry_monotonic_valid &&
2218             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2219                 return -EINVAL;
2220
2221         n = journal_file_entry_n_items(o);
2222         items = alloca(sizeof(EntryItem) * n);
2223
2224         for (i = 0; i < n; i++) {
2225                 uint64_t l, h;
2226                 le64_t le_hash;
2227                 size_t t;
2228                 void *data;
2229                 Object *u;
2230
2231                 q = le64toh(o->entry.items[i].object_offset);
2232                 le_hash = o->entry.items[i].hash;
2233
2234                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2235                 if (r < 0)
2236                         return r;
2237
2238                 if (le_hash != o->data.hash)
2239                         return -EBADMSG;
2240
2241                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2242                 t = (size_t) l;
2243
2244                 /* We hit the limit on 32bit machines */
2245                 if ((uint64_t) t != l)
2246                         return -E2BIG;
2247
2248                 if (o->object.flags & OBJECT_COMPRESSED) {
2249 #ifdef HAVE_XZ
2250                         uint64_t rsize;
2251
2252                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2253                                 return -EBADMSG;
2254
2255                         data = from->compress_buffer;
2256                         l = rsize;
2257 #else
2258                         return -EPROTONOSUPPORT;
2259 #endif
2260                 } else
2261                         data = o->data.payload;
2262
2263                 r = journal_file_append_data(to, data, l, &u, &h);
2264                 if (r < 0)
2265                         return r;
2266
2267                 xor_hash ^= le64toh(u->data.hash);
2268                 items[i].object_offset = htole64(h);
2269                 items[i].hash = u->data.hash;
2270
2271                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2272                 if (r < 0)
2273                         return r;
2274         }
2275
2276         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2277 }
2278
2279 void journal_default_metrics(JournalMetrics *m, int fd) {
2280         uint64_t fs_size = 0;
2281         struct statvfs ss;
2282         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2283
2284         assert(m);
2285         assert(fd >= 0);
2286
2287         if (fstatvfs(fd, &ss) >= 0)
2288                 fs_size = ss.f_frsize * ss.f_blocks;
2289
2290         if (m->max_use == (uint64_t) -1) {
2291
2292                 if (fs_size > 0) {
2293                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2294
2295                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2296                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2297
2298                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2299                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2300                 } else
2301                         m->max_use = DEFAULT_MAX_USE_LOWER;
2302         } else {
2303                 m->max_use = PAGE_ALIGN(m->max_use);
2304
2305                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2306                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2307         }
2308
2309         if (m->max_size == (uint64_t) -1) {
2310                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2311
2312                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2313                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2314         } else
2315                 m->max_size = PAGE_ALIGN(m->max_size);
2316
2317         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2318                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2319
2320         if (m->max_size*2 > m->max_use)
2321                 m->max_use = m->max_size*2;
2322
2323         if (m->min_size == (uint64_t) -1)
2324                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2325         else {
2326                 m->min_size = PAGE_ALIGN(m->min_size);
2327
2328                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2329                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2330
2331                 if (m->min_size > m->max_size)
2332                         m->max_size = m->min_size;
2333         }
2334
2335         if (m->keep_free == (uint64_t) -1) {
2336
2337                 if (fs_size > 0) {
2338                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2339
2340                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2341                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2342
2343                 } else
2344                         m->keep_free = DEFAULT_KEEP_FREE;
2345         }
2346
2347         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2348                  format_bytes(a, sizeof(a), m->max_use),
2349                  format_bytes(b, sizeof(b), m->max_size),
2350                  format_bytes(c, sizeof(c), m->min_size),
2351                  format_bytes(d, sizeof(d), m->keep_free));
2352 }
2353
2354 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2355         assert(f);
2356         assert(from || to);
2357
2358         if (from) {
2359                 if (f->header->head_entry_realtime == 0)
2360                         return -ENOENT;
2361
2362                 *from = le64toh(f->header->head_entry_realtime);
2363         }
2364
2365         if (to) {
2366                 if (f->header->tail_entry_realtime == 0)
2367                         return -ENOENT;
2368
2369                 *to = le64toh(f->header->tail_entry_realtime);
2370         }
2371
2372         return 1;
2373 }
2374
2375 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2376         char t[9+32+1] = "_BOOT_ID=";
2377         Object *o;
2378         uint64_t p;
2379         int r;
2380
2381         assert(f);
2382         assert(from || to);
2383
2384         sd_id128_to_string(boot_id, t + 9);
2385
2386         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2387         if (r <= 0)
2388                 return r;
2389
2390         if (le64toh(o->data.n_entries) <= 0)
2391                 return 0;
2392
2393         if (from) {
2394                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2395                 if (r < 0)
2396                         return r;
2397
2398                 *from = le64toh(o->entry.monotonic);
2399         }
2400
2401         if (to) {
2402                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2403                 if (r < 0)
2404                         return r;
2405
2406                 r = generic_array_get_plus_one(f,
2407                                                le64toh(o->data.entry_offset),
2408                                                le64toh(o->data.entry_array_offset),
2409                                                le64toh(o->data.n_entries)-1,
2410                                                &o, NULL);
2411                 if (r <= 0)
2412                         return r;
2413
2414                 *to = le64toh(o->entry.monotonic);
2415         }
2416
2417         return 1;
2418 }
2419
2420 bool journal_file_rotate_suggested(JournalFile *f) {
2421         assert(f);
2422
2423         /* If we gained new header fields we gained new features,
2424          * hence suggest a rotation */
2425         if (le64toh(f->header->header_size) < sizeof(Header)) {
2426                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2427                 return true;
2428         }
2429
2430         /* Let's check if the hash tables grew over a certain fill
2431          * level (75%, borrowing this value from Java's hash table
2432          * implementation), and if so suggest a rotation. To calculate
2433          * the fill level we need the n_data field, which only exists
2434          * in newer versions. */
2435
2436         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2437                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2438                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2439                                   f->path,
2440                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2441                                   (unsigned long long) le64toh(f->header->n_data),
2442                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2443                                   (unsigned long long) (f->last_stat.st_size),
2444                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2445                         return true;
2446                 }
2447
2448         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2449                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2450                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2451                                   f->path,
2452                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2453                                   (unsigned long long) le64toh(f->header->n_fields),
2454                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2455                         return true;
2456                 }
2457
2458         return false;
2459 }