chiark / gitweb /
journal: fix unitialized var
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67         /* Write the final tag */
68         if (f->authenticate)
69                 journal_file_append_tag(f);
70
71         /* Sync everything to disk, before we mark the file offline */
72         if (f->mmap && f->fd >= 0)
73                 mmap_cache_close_fd(f->mmap, f->fd);
74
75         if (f->writable && f->fd >= 0)
76                 fdatasync(f->fd);
77
78         if (f->header) {
79                 /* Mark the file offline. Don't override the archived state if it already is set */
80                 if (f->writable && f->header->state == STATE_ONLINE)
81                         f->header->state = STATE_OFFLINE;
82
83                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
84         }
85
86         if (f->fd >= 0)
87                 close_nointr_nofail(f->fd);
88
89         free(f->path);
90
91         if (f->mmap)
92                 mmap_cache_unref(f->mmap);
93
94 #ifdef HAVE_XZ
95         free(f->compress_buffer);
96 #endif
97
98 #ifdef HAVE_GCRYPT
99         if (f->fsprg_header)
100                 munmap(f->fsprg_header, PAGE_ALIGN(f->fsprg_size));
101
102         if (f->hmac)
103                 gcry_md_close(f->hmac);
104 #endif
105
106         free(f);
107 }
108
109 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
110         Header h;
111         ssize_t k;
112         int r;
113
114         assert(f);
115
116         zero(h);
117         memcpy(h.signature, HEADER_SIGNATURE, 8);
118         h.header_size = htole64(ALIGN64(sizeof(h)));
119
120         h.incompatible_flags =
121                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
122
123         h.compatible_flags =
124                 htole32(f->authenticate ? HEADER_COMPATIBLE_AUTHENTICATED : 0);
125
126         r = sd_id128_randomize(&h.file_id);
127         if (r < 0)
128                 return r;
129
130         if (template) {
131                 h.seqnum_id = template->header->seqnum_id;
132                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
133         } else
134                 h.seqnum_id = h.file_id;
135
136         k = pwrite(f->fd, &h, sizeof(h), 0);
137         if (k < 0)
138                 return -errno;
139
140         if (k != sizeof(h))
141                 return -EIO;
142
143         return 0;
144 }
145
146 static int journal_file_refresh_header(JournalFile *f) {
147         int r;
148         sd_id128_t boot_id;
149
150         assert(f);
151
152         r = sd_id128_get_machine(&f->header->machine_id);
153         if (r < 0)
154                 return r;
155
156         r = sd_id128_get_boot(&boot_id);
157         if (r < 0)
158                 return r;
159
160         if (sd_id128_equal(boot_id, f->header->boot_id))
161                 f->tail_entry_monotonic_valid = true;
162
163         f->header->boot_id = boot_id;
164
165         f->header->state = STATE_ONLINE;
166
167         /* Sync the online state to disk */
168         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
169         fdatasync(f->fd);
170
171         return 0;
172 }
173
174 static int journal_file_verify_header(JournalFile *f) {
175         assert(f);
176
177         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
178                 return -EBADMSG;
179
180         /* In both read and write mode we refuse to open files with
181          * incompatible flags we don't know */
182 #ifdef HAVE_XZ
183         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
184                 return -EPROTONOSUPPORT;
185 #else
186         if (f->header->incompatible_flags != 0)
187                 return -EPROTONOSUPPORT;
188 #endif
189
190         /* When open for writing we refuse to open files with
191          * compatible flags, too */
192         if (f->writable) {
193 #ifdef HAVE_GCRYPT
194                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_AUTHENTICATED) != 0)
195                         return -EPROTONOSUPPORT;
196 #else
197                 if (f->header->compatible_flags != 0)
198                         return -EPROTONOSUPPORT;
199 #endif
200         }
201
202         /* The first addition was n_data, so check that we are at least this large */
203         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
204                 return -EBADMSG;
205
206         if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
207                 !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
208                 return -EBADMSG;
209
210         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
211                 return -ENODATA;
212
213         if (f->writable) {
214                 uint8_t state;
215                 sd_id128_t machine_id;
216                 int r;
217
218                 r = sd_id128_get_machine(&machine_id);
219                 if (r < 0)
220                         return r;
221
222                 if (!sd_id128_equal(machine_id, f->header->machine_id))
223                         return -EHOSTDOWN;
224
225                 state = f->header->state;
226
227                 if (state == STATE_ONLINE) {
228                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
229                         return -EBUSY;
230                 } else if (state == STATE_ARCHIVED)
231                         return -ESHUTDOWN;
232                 else if (state != STATE_OFFLINE) {
233                         log_debug("Journal file %s has unknown state %u.", f->path, state);
234                         return -EBUSY;
235                 }
236         }
237
238         f->compress = !!(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED);
239         f->authenticate = !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
240
241         return 0;
242 }
243
244 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
245         uint64_t old_size, new_size;
246         int r;
247
248         assert(f);
249
250         /* We assume that this file is not sparse, and we know that
251          * for sure, since we always call posix_fallocate()
252          * ourselves */
253
254         old_size =
255                 le64toh(f->header->header_size) +
256                 le64toh(f->header->arena_size);
257
258         new_size = PAGE_ALIGN(offset + size);
259         if (new_size < le64toh(f->header->header_size))
260                 new_size = le64toh(f->header->header_size);
261
262         if (new_size <= old_size)
263                 return 0;
264
265         if (f->metrics.max_size > 0 &&
266             new_size > f->metrics.max_size)
267                 return -E2BIG;
268
269         if (new_size > f->metrics.min_size &&
270             f->metrics.keep_free > 0) {
271                 struct statvfs svfs;
272
273                 if (fstatvfs(f->fd, &svfs) >= 0) {
274                         uint64_t available;
275
276                         available = svfs.f_bfree * svfs.f_bsize;
277
278                         if (available >= f->metrics.keep_free)
279                                 available -= f->metrics.keep_free;
280                         else
281                                 available = 0;
282
283                         if (new_size - old_size > available)
284                                 return -E2BIG;
285                 }
286         }
287
288         /* Note that the glibc fallocate() fallback is very
289            inefficient, hence we try to minimize the allocation area
290            as we can. */
291         r = posix_fallocate(f->fd, old_size, new_size - old_size);
292         if (r != 0)
293                 return -r;
294
295         mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
296
297         if (fstat(f->fd, &f->last_stat) < 0)
298                 return -errno;
299
300         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
301
302         return 0;
303 }
304
305 static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
306         assert(f);
307         assert(ret);
308
309         /* Avoid SIGBUS on invalid accesses */
310         if (offset + size > (uint64_t) f->last_stat.st_size) {
311                 /* Hmm, out of range? Let's refresh the fstat() data
312                  * first, before we trust that check. */
313
314                 if (fstat(f->fd, &f->last_stat) < 0 ||
315                     offset + size > (uint64_t) f->last_stat.st_size)
316                         return -EADDRNOTAVAIL;
317         }
318
319         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
320 }
321
322 static uint64_t minimum_header_size(Object *o) {
323
324         static uint64_t table[] = {
325                 [OBJECT_DATA] = sizeof(DataObject),
326                 [OBJECT_FIELD] = sizeof(FieldObject),
327                 [OBJECT_ENTRY] = sizeof(EntryObject),
328                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
329                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
330                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
331                 [OBJECT_TAG] = sizeof(TagObject),
332         };
333
334         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
335                 return sizeof(ObjectHeader);
336
337         return table[o->object.type];
338 }
339
340 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
341         int r;
342         void *t;
343         Object *o;
344         uint64_t s;
345         unsigned context;
346
347         assert(f);
348         assert(ret);
349
350         /* One context for each type, plus one catch-all for the rest */
351         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
352
353         r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
354         if (r < 0)
355                 return r;
356
357         o = (Object*) t;
358         s = le64toh(o->object.size);
359
360         if (s < sizeof(ObjectHeader))
361                 return -EBADMSG;
362
363         if (o->object.type <= OBJECT_UNUSED)
364                 return -EBADMSG;
365
366         if (s < minimum_header_size(o))
367                 return -EBADMSG;
368
369         if (type >= 0 && o->object.type != type)
370                 return -EBADMSG;
371
372         if (s > sizeof(ObjectHeader)) {
373                 r = journal_file_move_to(f, o->object.type, offset, s, &t);
374                 if (r < 0)
375                         return r;
376
377                 o = (Object*) t;
378         }
379
380         *ret = o;
381         return 0;
382 }
383
384 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
385         uint64_t r;
386
387         assert(f);
388
389         r = le64toh(f->header->tail_entry_seqnum) + 1;
390
391         if (seqnum) {
392                 /* If an external seqnum counter was passed, we update
393                  * both the local and the external one, and set it to
394                  * the maximum of both */
395
396                 if (*seqnum + 1 > r)
397                         r = *seqnum + 1;
398
399                 *seqnum = r;
400         }
401
402         f->header->tail_entry_seqnum = htole64(r);
403
404         if (f->header->head_entry_seqnum == 0)
405                 f->header->head_entry_seqnum = htole64(r);
406
407         return r;
408 }
409
410 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
411         int r;
412         uint64_t p;
413         Object *tail, *o;
414         void *t;
415
416         assert(f);
417         assert(type > 0 && type < _OBJECT_TYPE_MAX);
418         assert(size >= sizeof(ObjectHeader));
419         assert(offset);
420         assert(ret);
421
422         p = le64toh(f->header->tail_object_offset);
423         if (p == 0)
424                 p = le64toh(f->header->header_size);
425         else {
426                 r = journal_file_move_to_object(f, -1, p, &tail);
427                 if (r < 0)
428                         return r;
429
430                 p += ALIGN64(le64toh(tail->object.size));
431         }
432
433         r = journal_file_allocate(f, p, size);
434         if (r < 0)
435                 return r;
436
437         r = journal_file_move_to(f, type, p, size, &t);
438         if (r < 0)
439                 return r;
440
441         o = (Object*) t;
442
443         zero(o->object);
444         o->object.type = type;
445         o->object.size = htole64(size);
446
447         f->header->tail_object_offset = htole64(p);
448         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
449
450         *ret = o;
451         *offset = p;
452
453         return 0;
454 }
455
456 static int journal_file_setup_data_hash_table(JournalFile *f) {
457         uint64_t s, p;
458         Object *o;
459         int r;
460
461         assert(f);
462
463         /* We estimate that we need 1 hash table entry per 768 of
464            journal file and we want to make sure we never get beyond
465            75% fill level. Calculate the hash table size for the
466            maximum file size based on these metrics. */
467
468         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
469         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
470                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
471
472         log_info("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
473
474         r = journal_file_append_object(f,
475                                        OBJECT_DATA_HASH_TABLE,
476                                        offsetof(Object, hash_table.items) + s,
477                                        &o, &p);
478         if (r < 0)
479                 return r;
480
481         memset(o->hash_table.items, 0, s);
482
483         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
484         f->header->data_hash_table_size = htole64(s);
485
486         return 0;
487 }
488
489 static int journal_file_setup_field_hash_table(JournalFile *f) {
490         uint64_t s, p;
491         Object *o;
492         int r;
493
494         assert(f);
495
496         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
497         r = journal_file_append_object(f,
498                                        OBJECT_FIELD_HASH_TABLE,
499                                        offsetof(Object, hash_table.items) + s,
500                                        &o, &p);
501         if (r < 0)
502                 return r;
503
504         memset(o->hash_table.items, 0, s);
505
506         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
507         f->header->field_hash_table_size = htole64(s);
508
509         return 0;
510 }
511
512 static int journal_file_map_data_hash_table(JournalFile *f) {
513         uint64_t s, p;
514         void *t;
515         int r;
516
517         assert(f);
518
519         p = le64toh(f->header->data_hash_table_offset);
520         s = le64toh(f->header->data_hash_table_size);
521
522         r = journal_file_move_to(f,
523                                  OBJECT_DATA_HASH_TABLE,
524                                  p, s,
525                                  &t);
526         if (r < 0)
527                 return r;
528
529         f->data_hash_table = t;
530         return 0;
531 }
532
533 static int journal_file_map_field_hash_table(JournalFile *f) {
534         uint64_t s, p;
535         void *t;
536         int r;
537
538         assert(f);
539
540         p = le64toh(f->header->field_hash_table_offset);
541         s = le64toh(f->header->field_hash_table_size);
542
543         r = journal_file_move_to(f,
544                                  OBJECT_FIELD_HASH_TABLE,
545                                  p, s,
546                                  &t);
547         if (r < 0)
548                 return r;
549
550         f->field_hash_table = t;
551         return 0;
552 }
553
554 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
555         uint64_t p, h;
556         int r;
557
558         assert(f);
559         assert(o);
560         assert(offset > 0);
561         assert(o->object.type == OBJECT_DATA);
562
563         /* This might alter the window we are looking at */
564
565         o->data.next_hash_offset = o->data.next_field_offset = 0;
566         o->data.entry_offset = o->data.entry_array_offset = 0;
567         o->data.n_entries = 0;
568
569         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
570         p = le64toh(f->data_hash_table[h].tail_hash_offset);
571         if (p == 0) {
572                 /* Only entry in the hash table is easy */
573                 f->data_hash_table[h].head_hash_offset = htole64(offset);
574         } else {
575                 /* Move back to the previous data object, to patch in
576                  * pointer */
577
578                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
579                 if (r < 0)
580                         return r;
581
582                 o->data.next_hash_offset = htole64(offset);
583         }
584
585         f->data_hash_table[h].tail_hash_offset = htole64(offset);
586
587         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
588                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
589
590         return 0;
591 }
592
593 int journal_file_find_data_object_with_hash(
594                 JournalFile *f,
595                 const void *data, uint64_t size, uint64_t hash,
596                 Object **ret, uint64_t *offset) {
597
598         uint64_t p, osize, h;
599         int r;
600
601         assert(f);
602         assert(data || size == 0);
603
604         osize = offsetof(Object, data.payload) + size;
605
606         if (f->header->data_hash_table_size == 0)
607                 return -EBADMSG;
608
609         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
610         p = le64toh(f->data_hash_table[h].head_hash_offset);
611
612         while (p > 0) {
613                 Object *o;
614
615                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
616                 if (r < 0)
617                         return r;
618
619                 if (le64toh(o->data.hash) != hash)
620                         goto next;
621
622                 if (o->object.flags & OBJECT_COMPRESSED) {
623 #ifdef HAVE_XZ
624                         uint64_t l, rsize;
625
626                         l = le64toh(o->object.size);
627                         if (l <= offsetof(Object, data.payload))
628                                 return -EBADMSG;
629
630                         l -= offsetof(Object, data.payload);
631
632                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
633                                 return -EBADMSG;
634
635                         if (rsize == size &&
636                             memcmp(f->compress_buffer, data, size) == 0) {
637
638                                 if (ret)
639                                         *ret = o;
640
641                                 if (offset)
642                                         *offset = p;
643
644                                 return 1;
645                         }
646 #else
647                         return -EPROTONOSUPPORT;
648 #endif
649
650                 } else if (le64toh(o->object.size) == osize &&
651                            memcmp(o->data.payload, data, size) == 0) {
652
653                         if (ret)
654                                 *ret = o;
655
656                         if (offset)
657                                 *offset = p;
658
659                         return 1;
660                 }
661
662         next:
663                 p = le64toh(o->data.next_hash_offset);
664         }
665
666         return 0;
667 }
668
669 int journal_file_find_data_object(
670                 JournalFile *f,
671                 const void *data, uint64_t size,
672                 Object **ret, uint64_t *offset) {
673
674         uint64_t hash;
675
676         assert(f);
677         assert(data || size == 0);
678
679         hash = hash64(data, size);
680
681         return journal_file_find_data_object_with_hash(f,
682                                                        data, size, hash,
683                                                        ret, offset);
684 }
685
686 static int journal_file_append_data(
687                 JournalFile *f,
688                 const void *data, uint64_t size,
689                 Object **ret, uint64_t *offset) {
690
691         uint64_t hash, p;
692         uint64_t osize;
693         Object *o;
694         int r;
695         bool compressed = false;
696
697         assert(f);
698         assert(data || size == 0);
699
700         hash = hash64(data, size);
701
702         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
703         if (r < 0)
704                 return r;
705         else if (r > 0) {
706
707                 if (ret)
708                         *ret = o;
709
710                 if (offset)
711                         *offset = p;
712
713                 return 0;
714         }
715
716         osize = offsetof(Object, data.payload) + size;
717         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
718         if (r < 0)
719                 return r;
720
721         o->data.hash = htole64(hash);
722
723 #ifdef HAVE_XZ
724         if (f->compress &&
725             size >= COMPRESSION_SIZE_THRESHOLD) {
726                 uint64_t rsize;
727
728                 compressed = compress_blob(data, size, o->data.payload, &rsize);
729
730                 if (compressed) {
731                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
732                         o->object.flags |= OBJECT_COMPRESSED;
733
734                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
735                 }
736         }
737 #endif
738
739         if (!compressed && size > 0)
740                 memcpy(o->data.payload, data, size);
741
742         r = journal_file_link_data(f, o, p, hash);
743         if (r < 0)
744                 return r;
745
746         r = journal_file_hmac_put_object(f, OBJECT_DATA, p);
747         if (r < 0)
748                 return r;
749
750         /* The linking might have altered the window, so let's
751          * refresh our pointer */
752         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
753         if (r < 0)
754                 return r;
755
756         if (ret)
757                 *ret = o;
758
759         if (offset)
760                 *offset = p;
761
762         return 0;
763 }
764
765 uint64_t journal_file_entry_n_items(Object *o) {
766         assert(o);
767         assert(o->object.type == OBJECT_ENTRY);
768
769         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
770 }
771
772 uint64_t journal_file_entry_array_n_items(Object *o) {
773         assert(o);
774         assert(o->object.type == OBJECT_ENTRY_ARRAY);
775
776         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
777 }
778
779 static int link_entry_into_array(JournalFile *f,
780                                  le64_t *first,
781                                  le64_t *idx,
782                                  uint64_t p) {
783         int r;
784         uint64_t n = 0, ap = 0, q, i, a, hidx;
785         Object *o;
786
787         assert(f);
788         assert(first);
789         assert(idx);
790         assert(p > 0);
791
792         a = le64toh(*first);
793         i = hidx = le64toh(*idx);
794         while (a > 0) {
795
796                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
797                 if (r < 0)
798                         return r;
799
800                 n = journal_file_entry_array_n_items(o);
801                 if (i < n) {
802                         o->entry_array.items[i] = htole64(p);
803                         *idx = htole64(hidx + 1);
804                         return 0;
805                 }
806
807                 i -= n;
808                 ap = a;
809                 a = le64toh(o->entry_array.next_entry_array_offset);
810         }
811
812         if (hidx > n)
813                 n = (hidx+1) * 2;
814         else
815                 n = n * 2;
816
817         if (n < 4)
818                 n = 4;
819
820         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
821                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
822                                        &o, &q);
823         if (r < 0)
824                 return r;
825
826         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, q);
827         if (r < 0)
828                 return r;
829
830         o->entry_array.items[i] = htole64(p);
831
832         if (ap == 0)
833                 *first = htole64(q);
834         else {
835                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
836                 if (r < 0)
837                         return r;
838
839                 o->entry_array.next_entry_array_offset = htole64(q);
840         }
841
842         *idx = htole64(hidx + 1);
843
844         return 0;
845 }
846
847 static int link_entry_into_array_plus_one(JournalFile *f,
848                                           le64_t *extra,
849                                           le64_t *first,
850                                           le64_t *idx,
851                                           uint64_t p) {
852
853         int r;
854
855         assert(f);
856         assert(extra);
857         assert(first);
858         assert(idx);
859         assert(p > 0);
860
861         if (*idx == 0)
862                 *extra = htole64(p);
863         else {
864                 le64_t i;
865
866                 i = htole64(le64toh(*idx) - 1);
867                 r = link_entry_into_array(f, first, &i, p);
868                 if (r < 0)
869                         return r;
870         }
871
872         *idx = htole64(le64toh(*idx) + 1);
873         return 0;
874 }
875
876 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
877         uint64_t p;
878         int r;
879         assert(f);
880         assert(o);
881         assert(offset > 0);
882
883         p = le64toh(o->entry.items[i].object_offset);
884         if (p == 0)
885                 return -EINVAL;
886
887         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
888         if (r < 0)
889                 return r;
890
891         return link_entry_into_array_plus_one(f,
892                                               &o->data.entry_offset,
893                                               &o->data.entry_array_offset,
894                                               &o->data.n_entries,
895                                               offset);
896 }
897
898 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
899         uint64_t n, i;
900         int r;
901
902         assert(f);
903         assert(o);
904         assert(offset > 0);
905         assert(o->object.type == OBJECT_ENTRY);
906
907         __sync_synchronize();
908
909         /* Link up the entry itself */
910         r = link_entry_into_array(f,
911                                   &f->header->entry_array_offset,
912                                   &f->header->n_entries,
913                                   offset);
914         if (r < 0)
915                 return r;
916
917         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
918
919         if (f->header->head_entry_realtime == 0)
920                 f->header->head_entry_realtime = o->entry.realtime;
921
922         f->header->tail_entry_realtime = o->entry.realtime;
923         f->header->tail_entry_monotonic = o->entry.monotonic;
924
925         f->tail_entry_monotonic_valid = true;
926
927         /* Link up the items */
928         n = journal_file_entry_n_items(o);
929         for (i = 0; i < n; i++) {
930                 r = journal_file_link_entry_item(f, o, offset, i);
931                 if (r < 0)
932                         return r;
933         }
934
935         return 0;
936 }
937
938 static int journal_file_append_entry_internal(
939                 JournalFile *f,
940                 const dual_timestamp *ts,
941                 uint64_t xor_hash,
942                 const EntryItem items[], unsigned n_items,
943                 uint64_t *seqnum,
944                 Object **ret, uint64_t *offset) {
945         uint64_t np;
946         uint64_t osize;
947         Object *o;
948         int r;
949
950         assert(f);
951         assert(items || n_items == 0);
952         assert(ts);
953
954         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
955
956         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
957         if (r < 0)
958                 return r;
959
960         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
961         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
962         o->entry.realtime = htole64(ts->realtime);
963         o->entry.monotonic = htole64(ts->monotonic);
964         o->entry.xor_hash = htole64(xor_hash);
965         o->entry.boot_id = f->header->boot_id;
966
967         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, np);
968         if (r < 0)
969                 return r;
970
971         r = journal_file_link_entry(f, o, np);
972         if (r < 0)
973                 return r;
974
975         if (ret)
976                 *ret = o;
977
978         if (offset)
979                 *offset = np;
980
981         return 0;
982 }
983
984 void journal_file_post_change(JournalFile *f) {
985         assert(f);
986
987         /* inotify() does not receive IN_MODIFY events from file
988          * accesses done via mmap(). After each access we hence
989          * trigger IN_MODIFY by truncating the journal file to its
990          * current size which triggers IN_MODIFY. */
991
992         __sync_synchronize();
993
994         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
995                 log_error("Failed to to truncate file to its own size: %m");
996 }
997
998 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
999         unsigned i;
1000         EntryItem *items;
1001         int r;
1002         uint64_t xor_hash = 0;
1003         struct dual_timestamp _ts;
1004
1005         assert(f);
1006         assert(iovec || n_iovec == 0);
1007
1008         if (!f->writable)
1009                 return -EPERM;
1010
1011         if (!ts) {
1012                 dual_timestamp_get(&_ts);
1013                 ts = &_ts;
1014         }
1015
1016         if (f->tail_entry_monotonic_valid &&
1017             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1018                 return -EINVAL;
1019
1020         r = journal_file_maybe_append_tag(f, ts->realtime);
1021         if (r < 0)
1022                 return r;
1023
1024         /* alloca() can't take 0, hence let's allocate at least one */
1025         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1026
1027         for (i = 0; i < n_iovec; i++) {
1028                 uint64_t p;
1029                 Object *o;
1030
1031                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1032                 if (r < 0)
1033                         return r;
1034
1035                 xor_hash ^= le64toh(o->data.hash);
1036                 items[i].object_offset = htole64(p);
1037                 items[i].hash = o->data.hash;
1038         }
1039
1040         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1041
1042         journal_file_post_change(f);
1043
1044         return r;
1045 }
1046
1047 static int generic_array_get(JournalFile *f,
1048                              uint64_t first,
1049                              uint64_t i,
1050                              Object **ret, uint64_t *offset) {
1051
1052         Object *o;
1053         uint64_t p = 0, a;
1054         int r;
1055
1056         assert(f);
1057
1058         a = first;
1059         while (a > 0) {
1060                 uint64_t n;
1061
1062                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1063                 if (r < 0)
1064                         return r;
1065
1066                 n = journal_file_entry_array_n_items(o);
1067                 if (i < n) {
1068                         p = le64toh(o->entry_array.items[i]);
1069                         break;
1070                 }
1071
1072                 i -= n;
1073                 a = le64toh(o->entry_array.next_entry_array_offset);
1074         }
1075
1076         if (a <= 0 || p <= 0)
1077                 return 0;
1078
1079         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1080         if (r < 0)
1081                 return r;
1082
1083         if (ret)
1084                 *ret = o;
1085
1086         if (offset)
1087                 *offset = p;
1088
1089         return 1;
1090 }
1091
1092 static int generic_array_get_plus_one(JournalFile *f,
1093                                       uint64_t extra,
1094                                       uint64_t first,
1095                                       uint64_t i,
1096                                       Object **ret, uint64_t *offset) {
1097
1098         Object *o;
1099
1100         assert(f);
1101
1102         if (i == 0) {
1103                 int r;
1104
1105                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1106                 if (r < 0)
1107                         return r;
1108
1109                 if (ret)
1110                         *ret = o;
1111
1112                 if (offset)
1113                         *offset = extra;
1114
1115                 return 1;
1116         }
1117
1118         return generic_array_get(f, first, i-1, ret, offset);
1119 }
1120
1121 enum {
1122         TEST_FOUND,
1123         TEST_LEFT,
1124         TEST_RIGHT
1125 };
1126
1127 static int generic_array_bisect(JournalFile *f,
1128                                 uint64_t first,
1129                                 uint64_t n,
1130                                 uint64_t needle,
1131                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1132                                 direction_t direction,
1133                                 Object **ret,
1134                                 uint64_t *offset,
1135                                 uint64_t *idx) {
1136
1137         uint64_t a, p, t = 0, i = 0, last_p = 0;
1138         bool subtract_one = false;
1139         Object *o, *array = NULL;
1140         int r;
1141
1142         assert(f);
1143         assert(test_object);
1144
1145         a = first;
1146         while (a > 0) {
1147                 uint64_t left, right, k, lp;
1148
1149                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1150                 if (r < 0)
1151                         return r;
1152
1153                 k = journal_file_entry_array_n_items(array);
1154                 right = MIN(k, n);
1155                 if (right <= 0)
1156                         return 0;
1157
1158                 i = right - 1;
1159                 lp = p = le64toh(array->entry_array.items[i]);
1160                 if (p <= 0)
1161                         return -EBADMSG;
1162
1163                 r = test_object(f, p, needle);
1164                 if (r < 0)
1165                         return r;
1166
1167                 if (r == TEST_FOUND)
1168                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1169
1170                 if (r == TEST_RIGHT) {
1171                         left = 0;
1172                         right -= 1;
1173                         for (;;) {
1174                                 if (left == right) {
1175                                         if (direction == DIRECTION_UP)
1176                                                 subtract_one = true;
1177
1178                                         i = left;
1179                                         goto found;
1180                                 }
1181
1182                                 assert(left < right);
1183
1184                                 i = (left + right) / 2;
1185                                 p = le64toh(array->entry_array.items[i]);
1186                                 if (p <= 0)
1187                                         return -EBADMSG;
1188
1189                                 r = test_object(f, p, needle);
1190                                 if (r < 0)
1191                                         return r;
1192
1193                                 if (r == TEST_FOUND)
1194                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1195
1196                                 if (r == TEST_RIGHT)
1197                                         right = i;
1198                                 else
1199                                         left = i + 1;
1200                         }
1201                 }
1202
1203                 if (k > n) {
1204                         if (direction == DIRECTION_UP) {
1205                                 i = n;
1206                                 subtract_one = true;
1207                                 goto found;
1208                         }
1209
1210                         return 0;
1211                 }
1212
1213                 last_p = lp;
1214
1215                 n -= k;
1216                 t += k;
1217                 a = le64toh(array->entry_array.next_entry_array_offset);
1218         }
1219
1220         return 0;
1221
1222 found:
1223         if (subtract_one && t == 0 && i == 0)
1224                 return 0;
1225
1226         if (subtract_one && i == 0)
1227                 p = last_p;
1228         else if (subtract_one)
1229                 p = le64toh(array->entry_array.items[i-1]);
1230         else
1231                 p = le64toh(array->entry_array.items[i]);
1232
1233         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1234         if (r < 0)
1235                 return r;
1236
1237         if (ret)
1238                 *ret = o;
1239
1240         if (offset)
1241                 *offset = p;
1242
1243         if (idx)
1244                 *idx = t + i + (subtract_one ? -1 : 0);
1245
1246         return 1;
1247 }
1248
1249 static int generic_array_bisect_plus_one(JournalFile *f,
1250                                          uint64_t extra,
1251                                          uint64_t first,
1252                                          uint64_t n,
1253                                          uint64_t needle,
1254                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1255                                          direction_t direction,
1256                                          Object **ret,
1257                                          uint64_t *offset,
1258                                          uint64_t *idx) {
1259
1260         int r;
1261         bool step_back = false;
1262         Object *o;
1263
1264         assert(f);
1265         assert(test_object);
1266
1267         if (n <= 0)
1268                 return 0;
1269
1270         /* This bisects the array in object 'first', but first checks
1271          * an extra  */
1272         r = test_object(f, extra, needle);
1273         if (r < 0)
1274                 return r;
1275
1276         if (r == TEST_FOUND)
1277                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1278
1279         /* if we are looking with DIRECTION_UP then we need to first
1280            see if in the actual array there is a matching entry, and
1281            return the last one of that. But if there isn't any we need
1282            to return this one. Hence remember this, and return it
1283            below. */
1284         if (r == TEST_LEFT)
1285                 step_back = direction == DIRECTION_UP;
1286
1287         if (r == TEST_RIGHT) {
1288                 if (direction == DIRECTION_DOWN)
1289                         goto found;
1290                 else
1291                         return 0;
1292         }
1293
1294         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1295
1296         if (r == 0 && step_back)
1297                 goto found;
1298
1299         if (r > 0 && idx)
1300                 (*idx) ++;
1301
1302         return r;
1303
1304 found:
1305         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1306         if (r < 0)
1307                 return r;
1308
1309         if (ret)
1310                 *ret = o;
1311
1312         if (offset)
1313                 *offset = extra;
1314
1315         if (idx)
1316                 *idx = 0;
1317
1318         return 1;
1319 }
1320
1321 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1322         assert(f);
1323         assert(p > 0);
1324
1325         if (p == needle)
1326                 return TEST_FOUND;
1327         else if (p < needle)
1328                 return TEST_LEFT;
1329         else
1330                 return TEST_RIGHT;
1331 }
1332
1333 int journal_file_move_to_entry_by_offset(
1334                 JournalFile *f,
1335                 uint64_t p,
1336                 direction_t direction,
1337                 Object **ret,
1338                 uint64_t *offset) {
1339
1340         return generic_array_bisect(f,
1341                                     le64toh(f->header->entry_array_offset),
1342                                     le64toh(f->header->n_entries),
1343                                     p,
1344                                     test_object_offset,
1345                                     direction,
1346                                     ret, offset, NULL);
1347 }
1348
1349
1350 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1351         Object *o;
1352         int r;
1353
1354         assert(f);
1355         assert(p > 0);
1356
1357         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1358         if (r < 0)
1359                 return r;
1360
1361         if (le64toh(o->entry.seqnum) == needle)
1362                 return TEST_FOUND;
1363         else if (le64toh(o->entry.seqnum) < needle)
1364                 return TEST_LEFT;
1365         else
1366                 return TEST_RIGHT;
1367 }
1368
1369 int journal_file_move_to_entry_by_seqnum(
1370                 JournalFile *f,
1371                 uint64_t seqnum,
1372                 direction_t direction,
1373                 Object **ret,
1374                 uint64_t *offset) {
1375
1376         return generic_array_bisect(f,
1377                                     le64toh(f->header->entry_array_offset),
1378                                     le64toh(f->header->n_entries),
1379                                     seqnum,
1380                                     test_object_seqnum,
1381                                     direction,
1382                                     ret, offset, NULL);
1383 }
1384
1385 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1386         Object *o;
1387         int r;
1388
1389         assert(f);
1390         assert(p > 0);
1391
1392         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1393         if (r < 0)
1394                 return r;
1395
1396         if (le64toh(o->entry.realtime) == needle)
1397                 return TEST_FOUND;
1398         else if (le64toh(o->entry.realtime) < needle)
1399                 return TEST_LEFT;
1400         else
1401                 return TEST_RIGHT;
1402 }
1403
1404 int journal_file_move_to_entry_by_realtime(
1405                 JournalFile *f,
1406                 uint64_t realtime,
1407                 direction_t direction,
1408                 Object **ret,
1409                 uint64_t *offset) {
1410
1411         return generic_array_bisect(f,
1412                                     le64toh(f->header->entry_array_offset),
1413                                     le64toh(f->header->n_entries),
1414                                     realtime,
1415                                     test_object_realtime,
1416                                     direction,
1417                                     ret, offset, NULL);
1418 }
1419
1420 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1421         Object *o;
1422         int r;
1423
1424         assert(f);
1425         assert(p > 0);
1426
1427         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1428         if (r < 0)
1429                 return r;
1430
1431         if (le64toh(o->entry.monotonic) == needle)
1432                 return TEST_FOUND;
1433         else if (le64toh(o->entry.monotonic) < needle)
1434                 return TEST_LEFT;
1435         else
1436                 return TEST_RIGHT;
1437 }
1438
1439 int journal_file_move_to_entry_by_monotonic(
1440                 JournalFile *f,
1441                 sd_id128_t boot_id,
1442                 uint64_t monotonic,
1443                 direction_t direction,
1444                 Object **ret,
1445                 uint64_t *offset) {
1446
1447         char t[9+32+1] = "_BOOT_ID=";
1448         Object *o;
1449         int r;
1450
1451         assert(f);
1452
1453         sd_id128_to_string(boot_id, t + 9);
1454         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1455         if (r < 0)
1456                 return r;
1457         if (r == 0)
1458                 return -ENOENT;
1459
1460         return generic_array_bisect_plus_one(f,
1461                                              le64toh(o->data.entry_offset),
1462                                              le64toh(o->data.entry_array_offset),
1463                                              le64toh(o->data.n_entries),
1464                                              monotonic,
1465                                              test_object_monotonic,
1466                                              direction,
1467                                              ret, offset, NULL);
1468 }
1469
1470 int journal_file_next_entry(
1471                 JournalFile *f,
1472                 Object *o, uint64_t p,
1473                 direction_t direction,
1474                 Object **ret, uint64_t *offset) {
1475
1476         uint64_t i, n;
1477         int r;
1478
1479         assert(f);
1480         assert(p > 0 || !o);
1481
1482         n = le64toh(f->header->n_entries);
1483         if (n <= 0)
1484                 return 0;
1485
1486         if (!o)
1487                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1488         else {
1489                 if (o->object.type != OBJECT_ENTRY)
1490                         return -EINVAL;
1491
1492                 r = generic_array_bisect(f,
1493                                          le64toh(f->header->entry_array_offset),
1494                                          le64toh(f->header->n_entries),
1495                                          p,
1496                                          test_object_offset,
1497                                          DIRECTION_DOWN,
1498                                          NULL, NULL,
1499                                          &i);
1500                 if (r <= 0)
1501                         return r;
1502
1503                 if (direction == DIRECTION_DOWN) {
1504                         if (i >= n - 1)
1505                                 return 0;
1506
1507                         i++;
1508                 } else {
1509                         if (i <= 0)
1510                                 return 0;
1511
1512                         i--;
1513                 }
1514         }
1515
1516         /* And jump to it */
1517         return generic_array_get(f,
1518                                  le64toh(f->header->entry_array_offset),
1519                                  i,
1520                                  ret, offset);
1521 }
1522
1523 int journal_file_skip_entry(
1524                 JournalFile *f,
1525                 Object *o, uint64_t p,
1526                 int64_t skip,
1527                 Object **ret, uint64_t *offset) {
1528
1529         uint64_t i, n;
1530         int r;
1531
1532         assert(f);
1533         assert(o);
1534         assert(p > 0);
1535
1536         if (o->object.type != OBJECT_ENTRY)
1537                 return -EINVAL;
1538
1539         r = generic_array_bisect(f,
1540                                  le64toh(f->header->entry_array_offset),
1541                                  le64toh(f->header->n_entries),
1542                                  p,
1543                                  test_object_offset,
1544                                  DIRECTION_DOWN,
1545                                  NULL, NULL,
1546                                  &i);
1547         if (r <= 0)
1548                 return r;
1549
1550         /* Calculate new index */
1551         if (skip < 0) {
1552                 if ((uint64_t) -skip >= i)
1553                         i = 0;
1554                 else
1555                         i = i - (uint64_t) -skip;
1556         } else
1557                 i  += (uint64_t) skip;
1558
1559         n = le64toh(f->header->n_entries);
1560         if (n <= 0)
1561                 return -EBADMSG;
1562
1563         if (i >= n)
1564                 i = n-1;
1565
1566         return generic_array_get(f,
1567                                  le64toh(f->header->entry_array_offset),
1568                                  i,
1569                                  ret, offset);
1570 }
1571
1572 int journal_file_next_entry_for_data(
1573                 JournalFile *f,
1574                 Object *o, uint64_t p,
1575                 uint64_t data_offset,
1576                 direction_t direction,
1577                 Object **ret, uint64_t *offset) {
1578
1579         uint64_t n, i;
1580         int r;
1581         Object *d;
1582
1583         assert(f);
1584         assert(p > 0 || !o);
1585
1586         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1587         if (r < 0)
1588                 return r;
1589
1590         n = le64toh(d->data.n_entries);
1591         if (n <= 0)
1592                 return n;
1593
1594         if (!o)
1595                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1596         else {
1597                 if (o->object.type != OBJECT_ENTRY)
1598                         return -EINVAL;
1599
1600                 r = generic_array_bisect_plus_one(f,
1601                                                   le64toh(d->data.entry_offset),
1602                                                   le64toh(d->data.entry_array_offset),
1603                                                   le64toh(d->data.n_entries),
1604                                                   p,
1605                                                   test_object_offset,
1606                                                   DIRECTION_DOWN,
1607                                                   NULL, NULL,
1608                                                   &i);
1609
1610                 if (r <= 0)
1611                         return r;
1612
1613                 if (direction == DIRECTION_DOWN) {
1614                         if (i >= n - 1)
1615                                 return 0;
1616
1617                         i++;
1618                 } else {
1619                         if (i <= 0)
1620                                 return 0;
1621
1622                         i--;
1623                 }
1624
1625         }
1626
1627         return generic_array_get_plus_one(f,
1628                                           le64toh(d->data.entry_offset),
1629                                           le64toh(d->data.entry_array_offset),
1630                                           i,
1631                                           ret, offset);
1632 }
1633
1634 int journal_file_move_to_entry_by_offset_for_data(
1635                 JournalFile *f,
1636                 uint64_t data_offset,
1637                 uint64_t p,
1638                 direction_t direction,
1639                 Object **ret, uint64_t *offset) {
1640
1641         int r;
1642         Object *d;
1643
1644         assert(f);
1645
1646         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1647         if (r < 0)
1648                 return r;
1649
1650         return generic_array_bisect_plus_one(f,
1651                                              le64toh(d->data.entry_offset),
1652                                              le64toh(d->data.entry_array_offset),
1653                                              le64toh(d->data.n_entries),
1654                                              p,
1655                                              test_object_offset,
1656                                              direction,
1657                                              ret, offset, NULL);
1658 }
1659
1660 int journal_file_move_to_entry_by_monotonic_for_data(
1661                 JournalFile *f,
1662                 uint64_t data_offset,
1663                 sd_id128_t boot_id,
1664                 uint64_t monotonic,
1665                 direction_t direction,
1666                 Object **ret, uint64_t *offset) {
1667
1668         char t[9+32+1] = "_BOOT_ID=";
1669         Object *o, *d;
1670         int r;
1671         uint64_t b, z;
1672
1673         assert(f);
1674
1675         /* First, seek by time */
1676         sd_id128_to_string(boot_id, t + 9);
1677         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1678         if (r < 0)
1679                 return r;
1680         if (r == 0)
1681                 return -ENOENT;
1682
1683         r = generic_array_bisect_plus_one(f,
1684                                           le64toh(o->data.entry_offset),
1685                                           le64toh(o->data.entry_array_offset),
1686                                           le64toh(o->data.n_entries),
1687                                           monotonic,
1688                                           test_object_monotonic,
1689                                           direction,
1690                                           NULL, &z, NULL);
1691         if (r <= 0)
1692                 return r;
1693
1694         /* And now, continue seeking until we find an entry that
1695          * exists in both bisection arrays */
1696
1697         for (;;) {
1698                 Object *qo;
1699                 uint64_t p, q;
1700
1701                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1702                 if (r < 0)
1703                         return r;
1704
1705                 r = generic_array_bisect_plus_one(f,
1706                                                   le64toh(d->data.entry_offset),
1707                                                   le64toh(d->data.entry_array_offset),
1708                                                   le64toh(d->data.n_entries),
1709                                                   z,
1710                                                   test_object_offset,
1711                                                   direction,
1712                                                   NULL, &p, NULL);
1713                 if (r <= 0)
1714                         return r;
1715
1716                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1717                 if (r < 0)
1718                         return r;
1719
1720                 r = generic_array_bisect_plus_one(f,
1721                                                   le64toh(o->data.entry_offset),
1722                                                   le64toh(o->data.entry_array_offset),
1723                                                   le64toh(o->data.n_entries),
1724                                                   p,
1725                                                   test_object_offset,
1726                                                   direction,
1727                                                   &qo, &q, NULL);
1728
1729                 if (r <= 0)
1730                         return r;
1731
1732                 if (p == q) {
1733                         if (ret)
1734                                 *ret = qo;
1735                         if (offset)
1736                                 *offset = q;
1737
1738                         return 1;
1739                 }
1740
1741                 z = q;
1742         }
1743
1744         return 0;
1745 }
1746
1747 int journal_file_move_to_entry_by_seqnum_for_data(
1748                 JournalFile *f,
1749                 uint64_t data_offset,
1750                 uint64_t seqnum,
1751                 direction_t direction,
1752                 Object **ret, uint64_t *offset) {
1753
1754         Object *d;
1755         int r;
1756
1757         assert(f);
1758
1759         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1760         if (r < 0)
1761                 return r;
1762
1763         return generic_array_bisect_plus_one(f,
1764                                              le64toh(d->data.entry_offset),
1765                                              le64toh(d->data.entry_array_offset),
1766                                              le64toh(d->data.n_entries),
1767                                              seqnum,
1768                                              test_object_seqnum,
1769                                              direction,
1770                                              ret, offset, NULL);
1771 }
1772
1773 int journal_file_move_to_entry_by_realtime_for_data(
1774                 JournalFile *f,
1775                 uint64_t data_offset,
1776                 uint64_t realtime,
1777                 direction_t direction,
1778                 Object **ret, uint64_t *offset) {
1779
1780         Object *d;
1781         int r;
1782
1783         assert(f);
1784
1785         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1786         if (r < 0)
1787                 return r;
1788
1789         return generic_array_bisect_plus_one(f,
1790                                              le64toh(d->data.entry_offset),
1791                                              le64toh(d->data.entry_array_offset),
1792                                              le64toh(d->data.n_entries),
1793                                              realtime,
1794                                              test_object_realtime,
1795                                              direction,
1796                                              ret, offset, NULL);
1797 }
1798
1799 void journal_file_dump(JournalFile *f) {
1800         Object *o;
1801         int r;
1802         uint64_t p;
1803
1804         assert(f);
1805
1806         journal_file_print_header(f);
1807
1808         p = le64toh(f->header->header_size);
1809         while (p != 0) {
1810                 r = journal_file_move_to_object(f, -1, p, &o);
1811                 if (r < 0)
1812                         goto fail;
1813
1814                 switch (o->object.type) {
1815
1816                 case OBJECT_UNUSED:
1817                         printf("Type: OBJECT_UNUSED\n");
1818                         break;
1819
1820                 case OBJECT_DATA:
1821                         printf("Type: OBJECT_DATA\n");
1822                         break;
1823
1824                 case OBJECT_ENTRY:
1825                         printf("Type: OBJECT_ENTRY %llu %llu %llu\n",
1826                                (unsigned long long) le64toh(o->entry.seqnum),
1827                                (unsigned long long) le64toh(o->entry.monotonic),
1828                                (unsigned long long) le64toh(o->entry.realtime));
1829                         break;
1830
1831                 case OBJECT_FIELD_HASH_TABLE:
1832                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1833                         break;
1834
1835                 case OBJECT_DATA_HASH_TABLE:
1836                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1837                         break;
1838
1839                 case OBJECT_ENTRY_ARRAY:
1840                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1841                         break;
1842
1843                 case OBJECT_TAG:
1844                         printf("Type: OBJECT_TAG %llu\n",
1845                                (unsigned long long) le64toh(o->tag.seqnum));
1846                         break;
1847                 }
1848
1849                 if (o->object.flags & OBJECT_COMPRESSED)
1850                         printf("Flags: COMPRESSED\n");
1851
1852                 if (p == le64toh(f->header->tail_object_offset))
1853                         p = 0;
1854                 else
1855                         p = p + ALIGN64(le64toh(o->object.size));
1856         }
1857
1858         return;
1859 fail:
1860         log_error("File corrupt");
1861 }
1862
1863 void journal_file_print_header(JournalFile *f) {
1864         char a[33], b[33], c[33];
1865         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1866
1867         assert(f);
1868
1869         printf("File Path: %s\n"
1870                "File ID: %s\n"
1871                "Machine ID: %s\n"
1872                "Boot ID: %s\n"
1873                "Sequential Number ID: %s\n"
1874                "State: %s\n"
1875                "Compatible Flags:%s%s\n"
1876                "Incompatible Flags:%s%s\n"
1877                "Header size: %llu\n"
1878                "Arena size: %llu\n"
1879                "Data Hash Table Size: %llu\n"
1880                "Field Hash Table Size: %llu\n"
1881                "Objects: %llu\n"
1882                "Entry Objects: %llu\n"
1883                "Rotate Suggested: %s\n"
1884                "Head Sequential Number: %llu\n"
1885                "Tail Sequential Number: %llu\n"
1886                "Head Realtime Timestamp: %s\n"
1887                "Tail Realtime Timestamp: %s\n",
1888                f->path,
1889                sd_id128_to_string(f->header->file_id, a),
1890                sd_id128_to_string(f->header->machine_id, b),
1891                sd_id128_to_string(f->header->boot_id, c),
1892                sd_id128_to_string(f->header->seqnum_id, c),
1893                f->header->state == STATE_OFFLINE ? "offline" :
1894                f->header->state == STATE_ONLINE ? "online" :
1895                f->header->state == STATE_ARCHIVED ? "archived" : "unknown",
1896                (f->header->compatible_flags & HEADER_COMPATIBLE_AUTHENTICATED) ? " AUTHENTICATED" : "",
1897                (f->header->compatible_flags & ~HEADER_COMPATIBLE_AUTHENTICATED) ? " ???" : "",
1898                (f->header->incompatible_flags & HEADER_INCOMPATIBLE_COMPRESSED) ? " COMPRESSED" : "",
1899                (f->header->incompatible_flags & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1900                (unsigned long long) le64toh(f->header->header_size),
1901                (unsigned long long) le64toh(f->header->arena_size),
1902                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1903                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1904                (unsigned long long) le64toh(f->header->n_objects),
1905                (unsigned long long) le64toh(f->header->n_entries),
1906                yes_no(journal_file_rotate_suggested(f)),
1907                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1908                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1909                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1910                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
1911
1912         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1913                 printf("Data Objects: %llu\n"
1914                        "Data Hash Table Fill: %.1f%%\n",
1915                        (unsigned long long) le64toh(f->header->n_data),
1916                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1917
1918         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1919                 printf("Field Objects: %llu\n"
1920                        "Field Hash Table Fill: %.1f%%\n",
1921                        (unsigned long long) le64toh(f->header->n_fields),
1922                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1923 }
1924
1925 int journal_file_open(
1926                 const char *fname,
1927                 int flags,
1928                 mode_t mode,
1929                 bool compress,
1930                 bool authenticate,
1931                 JournalMetrics *metrics,
1932                 MMapCache *mmap_cache,
1933                 JournalFile *template,
1934                 JournalFile **ret) {
1935
1936         JournalFile *f;
1937         int r;
1938         bool newly_created = false;
1939
1940         assert(fname);
1941
1942         if ((flags & O_ACCMODE) != O_RDONLY &&
1943             (flags & O_ACCMODE) != O_RDWR)
1944                 return -EINVAL;
1945
1946         if (!endswith(fname, ".journal"))
1947                 return -EINVAL;
1948
1949         f = new0(JournalFile, 1);
1950         if (!f)
1951                 return -ENOMEM;
1952
1953         f->fd = -1;
1954         f->mode = mode;
1955
1956         f->flags = flags;
1957         f->prot = prot_from_flags(flags);
1958         f->writable = (flags & O_ACCMODE) != O_RDONLY;
1959         f->compress = compress;
1960         f->authenticate = authenticate;
1961
1962         if (mmap_cache)
1963                 f->mmap = mmap_cache_ref(mmap_cache);
1964         else {
1965                 /* One context for each type, plus the zeroth catchall
1966                  * context. One fd for the file plus one for each type
1967                  * (which we need during verification */
1968                 f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
1969                 if (!f->mmap) {
1970                         r = -ENOMEM;
1971                         goto fail;
1972                 }
1973         }
1974
1975         f->path = strdup(fname);
1976         if (!f->path) {
1977                 r = -ENOMEM;
1978                 goto fail;
1979         }
1980
1981         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
1982         if (f->fd < 0) {
1983                 r = -errno;
1984                 goto fail;
1985         }
1986
1987         if (fstat(f->fd, &f->last_stat) < 0) {
1988                 r = -errno;
1989                 goto fail;
1990         }
1991
1992         if (f->last_stat.st_size == 0 && f->writable) {
1993                 newly_created = true;
1994
1995                 /* Try to load the FSPRG state, and if we can't, then
1996                  * just don't do authentication */
1997                 r = journal_file_load_fsprg(f);
1998                 if (r < 0)
1999                         f->authenticate = false;
2000
2001                 r = journal_file_init_header(f, template);
2002                 if (r < 0)
2003                         goto fail;
2004
2005                 if (fstat(f->fd, &f->last_stat) < 0) {
2006                         r = -errno;
2007                         goto fail;
2008                 }
2009         }
2010
2011         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2012                 r = -EIO;
2013                 goto fail;
2014         }
2015
2016         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2017         if (f->header == MAP_FAILED) {
2018                 f->header = NULL;
2019                 r = -errno;
2020                 goto fail;
2021         }
2022
2023         if (!newly_created) {
2024                 r = journal_file_verify_header(f);
2025                 if (r < 0)
2026                         goto fail;
2027         }
2028
2029         if (!newly_created && f->writable) {
2030                 r = journal_file_load_fsprg(f);
2031                 if (r < 0)
2032                         goto fail;
2033         }
2034
2035         if (f->writable) {
2036                 if (metrics) {
2037                         journal_default_metrics(metrics, f->fd);
2038                         f->metrics = *metrics;
2039                 } else if (template)
2040                         f->metrics = template->metrics;
2041
2042                 r = journal_file_refresh_header(f);
2043                 if (r < 0)
2044                         goto fail;
2045
2046                 r = journal_file_setup_hmac(f);
2047                 if (r < 0)
2048                         goto fail;
2049         }
2050
2051         if (newly_created) {
2052                 r = journal_file_setup_field_hash_table(f);
2053                 if (r < 0)
2054                         goto fail;
2055
2056                 r = journal_file_setup_data_hash_table(f);
2057                 if (r < 0)
2058                         goto fail;
2059
2060                 r = journal_file_append_first_tag(f);
2061                 if (r < 0)
2062                         goto fail;
2063         }
2064
2065         r = journal_file_map_field_hash_table(f);
2066         if (r < 0)
2067                 goto fail;
2068
2069         r = journal_file_map_data_hash_table(f);
2070         if (r < 0)
2071                 goto fail;
2072
2073         if (ret)
2074                 *ret = f;
2075
2076         return 0;
2077
2078 fail:
2079         journal_file_close(f);
2080
2081         return r;
2082 }
2083
2084 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
2085         char *p;
2086         size_t l;
2087         JournalFile *old_file, *new_file = NULL;
2088         int r;
2089
2090         assert(f);
2091         assert(*f);
2092
2093         old_file = *f;
2094
2095         if (!old_file->writable)
2096                 return -EINVAL;
2097
2098         if (!endswith(old_file->path, ".journal"))
2099                 return -EINVAL;
2100
2101         l = strlen(old_file->path);
2102
2103         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2104         if (!p)
2105                 return -ENOMEM;
2106
2107         memcpy(p, old_file->path, l - 8);
2108         p[l-8] = '@';
2109         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2110         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2111                  "-%016llx-%016llx.journal",
2112                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2113                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2114
2115         r = rename(old_file->path, p);
2116         free(p);
2117
2118         if (r < 0)
2119                 return -errno;
2120
2121         old_file->header->state = STATE_ARCHIVED;
2122
2123         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
2124         journal_file_close(old_file);
2125
2126         *f = new_file;
2127         return r;
2128 }
2129
2130 int journal_file_open_reliably(
2131                 const char *fname,
2132                 int flags,
2133                 mode_t mode,
2134                 bool compress,
2135                 bool authenticate,
2136                 JournalMetrics *metrics,
2137                 MMapCache *mmap,
2138                 JournalFile *template,
2139                 JournalFile **ret) {
2140
2141         int r;
2142         size_t l;
2143         char *p;
2144
2145         r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2146         if (r != -EBADMSG && /* corrupted */
2147             r != -ENODATA && /* truncated */
2148             r != -EHOSTDOWN && /* other machine */
2149             r != -EPROTONOSUPPORT && /* incompatible feature */
2150             r != -EBUSY && /* unclean shutdown */
2151             r != -ESHUTDOWN /* already archived */)
2152                 return r;
2153
2154         if ((flags & O_ACCMODE) == O_RDONLY)
2155                 return r;
2156
2157         if (!(flags & O_CREAT))
2158                 return r;
2159
2160         if (!endswith(fname, ".journal"))
2161                 return r;
2162
2163         /* The file is corrupted. Rotate it away and try it again (but only once) */
2164
2165         l = strlen(fname);
2166         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2167                      (int) (l-8), fname,
2168                      (unsigned long long) now(CLOCK_REALTIME),
2169                      random_ull()) < 0)
2170                 return -ENOMEM;
2171
2172         r = rename(fname, p);
2173         free(p);
2174         if (r < 0)
2175                 return -errno;
2176
2177         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2178
2179         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
2180 }
2181
2182
2183 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2184         uint64_t i, n;
2185         uint64_t q, xor_hash = 0;
2186         int r;
2187         EntryItem *items;
2188         dual_timestamp ts;
2189
2190         assert(from);
2191         assert(to);
2192         assert(o);
2193         assert(p);
2194
2195         if (!to->writable)
2196                 return -EPERM;
2197
2198         ts.monotonic = le64toh(o->entry.monotonic);
2199         ts.realtime = le64toh(o->entry.realtime);
2200
2201         if (to->tail_entry_monotonic_valid &&
2202             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2203                 return -EINVAL;
2204
2205         n = journal_file_entry_n_items(o);
2206         items = alloca(sizeof(EntryItem) * n);
2207
2208         for (i = 0; i < n; i++) {
2209                 uint64_t l, h;
2210                 le64_t le_hash;
2211                 size_t t;
2212                 void *data;
2213                 Object *u;
2214
2215                 q = le64toh(o->entry.items[i].object_offset);
2216                 le_hash = o->entry.items[i].hash;
2217
2218                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2219                 if (r < 0)
2220                         return r;
2221
2222                 if (le_hash != o->data.hash)
2223                         return -EBADMSG;
2224
2225                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2226                 t = (size_t) l;
2227
2228                 /* We hit the limit on 32bit machines */
2229                 if ((uint64_t) t != l)
2230                         return -E2BIG;
2231
2232                 if (o->object.flags & OBJECT_COMPRESSED) {
2233 #ifdef HAVE_XZ
2234                         uint64_t rsize;
2235
2236                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2237                                 return -EBADMSG;
2238
2239                         data = from->compress_buffer;
2240                         l = rsize;
2241 #else
2242                         return -EPROTONOSUPPORT;
2243 #endif
2244                 } else
2245                         data = o->data.payload;
2246
2247                 r = journal_file_append_data(to, data, l, &u, &h);
2248                 if (r < 0)
2249                         return r;
2250
2251                 xor_hash ^= le64toh(u->data.hash);
2252                 items[i].object_offset = htole64(h);
2253                 items[i].hash = u->data.hash;
2254
2255                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2256                 if (r < 0)
2257                         return r;
2258         }
2259
2260         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2261 }
2262
2263 void journal_default_metrics(JournalMetrics *m, int fd) {
2264         uint64_t fs_size = 0;
2265         struct statvfs ss;
2266         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2267
2268         assert(m);
2269         assert(fd >= 0);
2270
2271         if (fstatvfs(fd, &ss) >= 0)
2272                 fs_size = ss.f_frsize * ss.f_blocks;
2273
2274         if (m->max_use == (uint64_t) -1) {
2275
2276                 if (fs_size > 0) {
2277                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2278
2279                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2280                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2281
2282                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2283                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2284                 } else
2285                         m->max_use = DEFAULT_MAX_USE_LOWER;
2286         } else {
2287                 m->max_use = PAGE_ALIGN(m->max_use);
2288
2289                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2290                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2291         }
2292
2293         if (m->max_size == (uint64_t) -1) {
2294                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2295
2296                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2297                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2298         } else
2299                 m->max_size = PAGE_ALIGN(m->max_size);
2300
2301         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2302                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2303
2304         if (m->max_size*2 > m->max_use)
2305                 m->max_use = m->max_size*2;
2306
2307         if (m->min_size == (uint64_t) -1)
2308                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2309         else {
2310                 m->min_size = PAGE_ALIGN(m->min_size);
2311
2312                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2313                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2314
2315                 if (m->min_size > m->max_size)
2316                         m->max_size = m->min_size;
2317         }
2318
2319         if (m->keep_free == (uint64_t) -1) {
2320
2321                 if (fs_size > 0) {
2322                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2323
2324                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2325                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2326
2327                 } else
2328                         m->keep_free = DEFAULT_KEEP_FREE;
2329         }
2330
2331         log_info("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2332                  format_bytes(a, sizeof(a), m->max_use),
2333                  format_bytes(b, sizeof(b), m->max_size),
2334                  format_bytes(c, sizeof(c), m->min_size),
2335                  format_bytes(d, sizeof(d), m->keep_free));
2336 }
2337
2338 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2339         assert(f);
2340         assert(from || to);
2341
2342         if (from) {
2343                 if (f->header->head_entry_realtime == 0)
2344                         return -ENOENT;
2345
2346                 *from = le64toh(f->header->head_entry_realtime);
2347         }
2348
2349         if (to) {
2350                 if (f->header->tail_entry_realtime == 0)
2351                         return -ENOENT;
2352
2353                 *to = le64toh(f->header->tail_entry_realtime);
2354         }
2355
2356         return 1;
2357 }
2358
2359 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2360         char t[9+32+1] = "_BOOT_ID=";
2361         Object *o;
2362         uint64_t p;
2363         int r;
2364
2365         assert(f);
2366         assert(from || to);
2367
2368         sd_id128_to_string(boot_id, t + 9);
2369
2370         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2371         if (r <= 0)
2372                 return r;
2373
2374         if (le64toh(o->data.n_entries) <= 0)
2375                 return 0;
2376
2377         if (from) {
2378                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2379                 if (r < 0)
2380                         return r;
2381
2382                 *from = le64toh(o->entry.monotonic);
2383         }
2384
2385         if (to) {
2386                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2387                 if (r < 0)
2388                         return r;
2389
2390                 r = generic_array_get_plus_one(f,
2391                                                le64toh(o->data.entry_offset),
2392                                                le64toh(o->data.entry_array_offset),
2393                                                le64toh(o->data.n_entries)-1,
2394                                                &o, NULL);
2395                 if (r <= 0)
2396                         return r;
2397
2398                 *to = le64toh(o->entry.monotonic);
2399         }
2400
2401         return 1;
2402 }
2403
2404 bool journal_file_rotate_suggested(JournalFile *f) {
2405         assert(f);
2406
2407         /* If we gained new header fields we gained new features,
2408          * hence suggest a rotation */
2409         if (le64toh(f->header->header_size) < sizeof(Header)) {
2410                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2411                 return true;
2412         }
2413
2414         /* Let's check if the hash tables grew over a certain fill
2415          * level (75%, borrowing this value from Java's hash table
2416          * implementation), and if so suggest a rotation. To calculate
2417          * the fill level we need the n_data field, which only exists
2418          * in newer versions. */
2419
2420         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2421                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2422                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2423                                   f->path,
2424                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2425                                   (unsigned long long) le64toh(f->header->n_data),
2426                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2427                                   (unsigned long long) (f->last_stat.st_size),
2428                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2429                         return true;
2430                 }
2431
2432         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2433                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2434                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2435                                   f->path,
2436                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2437                                   (unsigned long long) le64toh(f->header->n_fields),
2438                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2439                         return true;
2440                 }
2441
2442         return false;
2443 }