chiark / gitweb /
man: fix a bunch of typos in docs
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67 #ifdef HAVE_GCRYPT
68         /* Write the final tag */
69         if (f->seal && f->writable)
70                 journal_file_append_tag(f);
71 #endif
72
73         /* Sync everything to disk, before we mark the file offline */
74         if (f->mmap && f->fd >= 0)
75                 mmap_cache_close_fd(f->mmap, f->fd);
76
77         if (f->writable && f->fd >= 0)
78                 fdatasync(f->fd);
79
80         if (f->header) {
81                 /* Mark the file offline. Don't override the archived state if it already is set */
82                 if (f->writable && f->header->state == STATE_ONLINE)
83                         f->header->state = STATE_OFFLINE;
84
85                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86         }
87
88         if (f->fd >= 0)
89                 close_nointr_nofail(f->fd);
90
91         free(f->path);
92
93         if (f->mmap)
94                 mmap_cache_unref(f->mmap);
95
96 #ifdef HAVE_XZ
97         free(f->compress_buffer);
98 #endif
99
100 #ifdef HAVE_GCRYPT
101         if (f->fss_file)
102                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103         else if (f->fsprg_state)
104                 free(f->fsprg_state);
105
106         free(f->fsprg_seed);
107
108         if (f->hmac)
109                 gcry_md_close(f->hmac);
110 #endif
111
112         free(f);
113 }
114
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
116         Header h;
117         ssize_t k;
118         int r;
119
120         assert(f);
121
122         zero(h);
123         memcpy(h.signature, HEADER_SIGNATURE, 8);
124         h.header_size = htole64(ALIGN64(sizeof(h)));
125
126         h.incompatible_flags =
127                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129         h.compatible_flags =
130                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
131
132         r = sd_id128_randomize(&h.file_id);
133         if (r < 0)
134                 return r;
135
136         if (template) {
137                 h.seqnum_id = template->header->seqnum_id;
138                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
139         } else
140                 h.seqnum_id = h.file_id;
141
142         k = pwrite(f->fd, &h, sizeof(h), 0);
143         if (k < 0)
144                 return -errno;
145
146         if (k != sizeof(h))
147                 return -EIO;
148
149         return 0;
150 }
151
152 static int journal_file_refresh_header(JournalFile *f) {
153         int r;
154         sd_id128_t boot_id;
155
156         assert(f);
157
158         r = sd_id128_get_machine(&f->header->machine_id);
159         if (r < 0)
160                 return r;
161
162         r = sd_id128_get_boot(&boot_id);
163         if (r < 0)
164                 return r;
165
166         if (sd_id128_equal(boot_id, f->header->boot_id))
167                 f->tail_entry_monotonic_valid = true;
168
169         f->header->boot_id = boot_id;
170
171         f->header->state = STATE_ONLINE;
172
173         /* Sync the online state to disk */
174         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175         fdatasync(f->fd);
176
177         return 0;
178 }
179
180 static int journal_file_verify_header(JournalFile *f) {
181         assert(f);
182
183         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
184                 return -EBADMSG;
185
186         /* In both read and write mode we refuse to open files with
187          * incompatible flags we don't know */
188 #ifdef HAVE_XZ
189         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190                 return -EPROTONOSUPPORT;
191 #else
192         if (f->header->incompatible_flags != 0)
193                 return -EPROTONOSUPPORT;
194 #endif
195
196         /* When open for writing we refuse to open files with
197          * compatible flags, too */
198         if (f->writable) {
199 #ifdef HAVE_GCRYPT
200                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201                         return -EPROTONOSUPPORT;
202 #else
203                 if (f->header->compatible_flags != 0)
204                         return -EPROTONOSUPPORT;
205 #endif
206         }
207
208         if (f->header->state >= _STATE_MAX)
209                 return -EBADMSG;
210
211         /* The first addition was n_data, so check that we are at least this large */
212         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
213                 return -EBADMSG;
214
215         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
216                 return -EBADMSG;
217
218         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219                 return -ENODATA;
220
221         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222                 return -ENODATA;
223
224         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226             !VALID64(le64toh(f->header->tail_object_offset)) ||
227             !VALID64(le64toh(f->header->entry_array_offset)))
228                 return -ENODATA;
229
230         if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231             le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232             le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233             le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
234                 return -ENODATA;
235
236         if (f->writable) {
237                 uint8_t state;
238                 sd_id128_t machine_id;
239                 int r;
240
241                 r = sd_id128_get_machine(&machine_id);
242                 if (r < 0)
243                         return r;
244
245                 if (!sd_id128_equal(machine_id, f->header->machine_id))
246                         return -EHOSTDOWN;
247
248                 state = f->header->state;
249
250                 if (state == STATE_ONLINE) {
251                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
252                         return -EBUSY;
253                 } else if (state == STATE_ARCHIVED)
254                         return -ESHUTDOWN;
255                 else if (state != STATE_OFFLINE) {
256                         log_debug("Journal file %s has unknown state %u.", f->path, state);
257                         return -EBUSY;
258                 }
259         }
260
261         f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
262
263         if (f->writable)
264                 f->seal = JOURNAL_HEADER_SEALED(f->header);
265
266         return 0;
267 }
268
269 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
270         uint64_t old_size, new_size;
271         int r;
272
273         assert(f);
274
275         /* We assume that this file is not sparse, and we know that
276          * for sure, since we always call posix_fallocate()
277          * ourselves */
278
279         old_size =
280                 le64toh(f->header->header_size) +
281                 le64toh(f->header->arena_size);
282
283         new_size = PAGE_ALIGN(offset + size);
284         if (new_size < le64toh(f->header->header_size))
285                 new_size = le64toh(f->header->header_size);
286
287         if (new_size <= old_size)
288                 return 0;
289
290         if (f->metrics.max_size > 0 &&
291             new_size > f->metrics.max_size)
292                 return -E2BIG;
293
294         if (new_size > f->metrics.min_size &&
295             f->metrics.keep_free > 0) {
296                 struct statvfs svfs;
297
298                 if (fstatvfs(f->fd, &svfs) >= 0) {
299                         uint64_t available;
300
301                         available = svfs.f_bfree * svfs.f_bsize;
302
303                         if (available >= f->metrics.keep_free)
304                                 available -= f->metrics.keep_free;
305                         else
306                                 available = 0;
307
308                         if (new_size - old_size > available)
309                                 return -E2BIG;
310                 }
311         }
312
313         /* Note that the glibc fallocate() fallback is very
314            inefficient, hence we try to minimize the allocation area
315            as we can. */
316         r = posix_fallocate(f->fd, old_size, new_size - old_size);
317         if (r != 0)
318                 return -r;
319
320         if (fstat(f->fd, &f->last_stat) < 0)
321                 return -errno;
322
323         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
324
325         return 0;
326 }
327
328 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
329         assert(f);
330         assert(ret);
331
332         if (size <= 0)
333                 return -EINVAL;
334
335         /* Avoid SIGBUS on invalid accesses */
336         if (offset + size > (uint64_t) f->last_stat.st_size) {
337                 /* Hmm, out of range? Let's refresh the fstat() data
338                  * first, before we trust that check. */
339
340                 if (fstat(f->fd, &f->last_stat) < 0 ||
341                     offset + size > (uint64_t) f->last_stat.st_size)
342                         return -EADDRNOTAVAIL;
343         }
344
345         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
346 }
347
348 static uint64_t minimum_header_size(Object *o) {
349
350         static uint64_t table[] = {
351                 [OBJECT_DATA] = sizeof(DataObject),
352                 [OBJECT_FIELD] = sizeof(FieldObject),
353                 [OBJECT_ENTRY] = sizeof(EntryObject),
354                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
355                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
356                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
357                 [OBJECT_TAG] = sizeof(TagObject),
358         };
359
360         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
361                 return sizeof(ObjectHeader);
362
363         return table[o->object.type];
364 }
365
366 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
367         int r;
368         void *t;
369         Object *o;
370         uint64_t s;
371         unsigned context;
372
373         assert(f);
374         assert(ret);
375
376         /* Objects may only be located at multiple of 64 bit */
377         if (!VALID64(offset))
378                 return -EFAULT;
379
380         /* One context for each type, plus one catch-all for the rest */
381         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
382
383         r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
384         if (r < 0)
385                 return r;
386
387         o = (Object*) t;
388         s = le64toh(o->object.size);
389
390         if (s < sizeof(ObjectHeader))
391                 return -EBADMSG;
392
393         if (o->object.type <= OBJECT_UNUSED)
394                 return -EBADMSG;
395
396         if (s < minimum_header_size(o))
397                 return -EBADMSG;
398
399         if (type >= 0 && o->object.type != type)
400                 return -EBADMSG;
401
402         if (s > sizeof(ObjectHeader)) {
403                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
404                 if (r < 0)
405                         return r;
406
407                 o = (Object*) t;
408         }
409
410         *ret = o;
411         return 0;
412 }
413
414 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
415         uint64_t r;
416
417         assert(f);
418
419         r = le64toh(f->header->tail_entry_seqnum) + 1;
420
421         if (seqnum) {
422                 /* If an external seqnum counter was passed, we update
423                  * both the local and the external one, and set it to
424                  * the maximum of both */
425
426                 if (*seqnum + 1 > r)
427                         r = *seqnum + 1;
428
429                 *seqnum = r;
430         }
431
432         f->header->tail_entry_seqnum = htole64(r);
433
434         if (f->header->head_entry_seqnum == 0)
435                 f->header->head_entry_seqnum = htole64(r);
436
437         return r;
438 }
439
440 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
441         int r;
442         uint64_t p;
443         Object *tail, *o;
444         void *t;
445
446         assert(f);
447         assert(type > 0 && type < _OBJECT_TYPE_MAX);
448         assert(size >= sizeof(ObjectHeader));
449         assert(offset);
450         assert(ret);
451
452         p = le64toh(f->header->tail_object_offset);
453         if (p == 0)
454                 p = le64toh(f->header->header_size);
455         else {
456                 r = journal_file_move_to_object(f, -1, p, &tail);
457                 if (r < 0)
458                         return r;
459
460                 p += ALIGN64(le64toh(tail->object.size));
461         }
462
463         r = journal_file_allocate(f, p, size);
464         if (r < 0)
465                 return r;
466
467         r = journal_file_move_to(f, type, false, p, size, &t);
468         if (r < 0)
469                 return r;
470
471         o = (Object*) t;
472
473         zero(o->object);
474         o->object.type = type;
475         o->object.size = htole64(size);
476
477         f->header->tail_object_offset = htole64(p);
478         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
479
480         *ret = o;
481         *offset = p;
482
483         return 0;
484 }
485
486 static int journal_file_setup_data_hash_table(JournalFile *f) {
487         uint64_t s, p;
488         Object *o;
489         int r;
490
491         assert(f);
492
493         /* We estimate that we need 1 hash table entry per 768 of
494            journal file and we want to make sure we never get beyond
495            75% fill level. Calculate the hash table size for the
496            maximum file size based on these metrics. */
497
498         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
499         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
500                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
501
502         log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
503
504         r = journal_file_append_object(f,
505                                        OBJECT_DATA_HASH_TABLE,
506                                        offsetof(Object, hash_table.items) + s,
507                                        &o, &p);
508         if (r < 0)
509                 return r;
510
511         memset(o->hash_table.items, 0, s);
512
513         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
514         f->header->data_hash_table_size = htole64(s);
515
516         return 0;
517 }
518
519 static int journal_file_setup_field_hash_table(JournalFile *f) {
520         uint64_t s, p;
521         Object *o;
522         int r;
523
524         assert(f);
525
526         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
527         r = journal_file_append_object(f,
528                                        OBJECT_FIELD_HASH_TABLE,
529                                        offsetof(Object, hash_table.items) + s,
530                                        &o, &p);
531         if (r < 0)
532                 return r;
533
534         memset(o->hash_table.items, 0, s);
535
536         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
537         f->header->field_hash_table_size = htole64(s);
538
539         return 0;
540 }
541
542 static int journal_file_map_data_hash_table(JournalFile *f) {
543         uint64_t s, p;
544         void *t;
545         int r;
546
547         assert(f);
548
549         p = le64toh(f->header->data_hash_table_offset);
550         s = le64toh(f->header->data_hash_table_size);
551
552         r = journal_file_move_to(f,
553                                  OBJECT_DATA_HASH_TABLE,
554                                  true,
555                                  p, s,
556                                  &t);
557         if (r < 0)
558                 return r;
559
560         f->data_hash_table = t;
561         return 0;
562 }
563
564 static int journal_file_map_field_hash_table(JournalFile *f) {
565         uint64_t s, p;
566         void *t;
567         int r;
568
569         assert(f);
570
571         p = le64toh(f->header->field_hash_table_offset);
572         s = le64toh(f->header->field_hash_table_size);
573
574         r = journal_file_move_to(f,
575                                  OBJECT_FIELD_HASH_TABLE,
576                                  true,
577                                  p, s,
578                                  &t);
579         if (r < 0)
580                 return r;
581
582         f->field_hash_table = t;
583         return 0;
584 }
585
586 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
587         uint64_t p, h;
588         int r;
589
590         assert(f);
591         assert(o);
592         assert(offset > 0);
593
594         if (o->object.type != OBJECT_DATA)
595                 return -EINVAL;
596
597         /* This might alter the window we are looking at */
598
599         o->data.next_hash_offset = o->data.next_field_offset = 0;
600         o->data.entry_offset = o->data.entry_array_offset = 0;
601         o->data.n_entries = 0;
602
603         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
604         p = le64toh(f->data_hash_table[h].tail_hash_offset);
605         if (p == 0) {
606                 /* Only entry in the hash table is easy */
607                 f->data_hash_table[h].head_hash_offset = htole64(offset);
608         } else {
609                 /* Move back to the previous data object, to patch in
610                  * pointer */
611
612                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
613                 if (r < 0)
614                         return r;
615
616                 o->data.next_hash_offset = htole64(offset);
617         }
618
619         f->data_hash_table[h].tail_hash_offset = htole64(offset);
620
621         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
622                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
623
624         return 0;
625 }
626
627 int journal_file_find_data_object_with_hash(
628                 JournalFile *f,
629                 const void *data, uint64_t size, uint64_t hash,
630                 Object **ret, uint64_t *offset) {
631
632         uint64_t p, osize, h;
633         int r;
634
635         assert(f);
636         assert(data || size == 0);
637
638         osize = offsetof(Object, data.payload) + size;
639
640         if (f->header->data_hash_table_size == 0)
641                 return -EBADMSG;
642
643         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
644         p = le64toh(f->data_hash_table[h].head_hash_offset);
645
646         while (p > 0) {
647                 Object *o;
648
649                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
650                 if (r < 0)
651                         return r;
652
653                 if (le64toh(o->data.hash) != hash)
654                         goto next;
655
656                 if (o->object.flags & OBJECT_COMPRESSED) {
657 #ifdef HAVE_XZ
658                         uint64_t l, rsize;
659
660                         l = le64toh(o->object.size);
661                         if (l <= offsetof(Object, data.payload))
662                                 return -EBADMSG;
663
664                         l -= offsetof(Object, data.payload);
665
666                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
667                                 return -EBADMSG;
668
669                         if (rsize == size &&
670                             memcmp(f->compress_buffer, data, size) == 0) {
671
672                                 if (ret)
673                                         *ret = o;
674
675                                 if (offset)
676                                         *offset = p;
677
678                                 return 1;
679                         }
680 #else
681                         return -EPROTONOSUPPORT;
682 #endif
683
684                 } else if (le64toh(o->object.size) == osize &&
685                            memcmp(o->data.payload, data, size) == 0) {
686
687                         if (ret)
688                                 *ret = o;
689
690                         if (offset)
691                                 *offset = p;
692
693                         return 1;
694                 }
695
696         next:
697                 p = le64toh(o->data.next_hash_offset);
698         }
699
700         return 0;
701 }
702
703 int journal_file_find_data_object(
704                 JournalFile *f,
705                 const void *data, uint64_t size,
706                 Object **ret, uint64_t *offset) {
707
708         uint64_t hash;
709
710         assert(f);
711         assert(data || size == 0);
712
713         hash = hash64(data, size);
714
715         return journal_file_find_data_object_with_hash(f,
716                                                        data, size, hash,
717                                                        ret, offset);
718 }
719
720 static int journal_file_append_data(
721                 JournalFile *f,
722                 const void *data, uint64_t size,
723                 Object **ret, uint64_t *offset) {
724
725         uint64_t hash, p;
726         uint64_t osize;
727         Object *o;
728         int r;
729         bool compressed = false;
730
731         assert(f);
732         assert(data || size == 0);
733
734         hash = hash64(data, size);
735
736         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
737         if (r < 0)
738                 return r;
739         else if (r > 0) {
740
741                 if (ret)
742                         *ret = o;
743
744                 if (offset)
745                         *offset = p;
746
747                 return 0;
748         }
749
750         osize = offsetof(Object, data.payload) + size;
751         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
752         if (r < 0)
753                 return r;
754
755         o->data.hash = htole64(hash);
756
757 #ifdef HAVE_XZ
758         if (f->compress &&
759             size >= COMPRESSION_SIZE_THRESHOLD) {
760                 uint64_t rsize;
761
762                 compressed = compress_blob(data, size, o->data.payload, &rsize);
763
764                 if (compressed) {
765                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
766                         o->object.flags |= OBJECT_COMPRESSED;
767
768                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
769                 }
770         }
771 #endif
772
773         if (!compressed && size > 0)
774                 memcpy(o->data.payload, data, size);
775
776         r = journal_file_link_data(f, o, p, hash);
777         if (r < 0)
778                 return r;
779
780         /* The linking might have altered the window, so let's
781          * refresh our pointer */
782         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
783         if (r < 0)
784                 return r;
785
786 #ifdef HAVE_GCRYPT
787         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
788         if (r < 0)
789                 return r;
790 #endif
791
792         if (ret)
793                 *ret = o;
794
795         if (offset)
796                 *offset = p;
797
798         return 0;
799 }
800
801 uint64_t journal_file_entry_n_items(Object *o) {
802         assert(o);
803
804         if (o->object.type != OBJECT_ENTRY)
805                 return 0;
806
807         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
808 }
809
810 uint64_t journal_file_entry_array_n_items(Object *o) {
811         assert(o);
812
813         if (o->object.type != OBJECT_ENTRY_ARRAY)
814                 return 0;
815
816         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
817 }
818
819 uint64_t journal_file_hash_table_n_items(Object *o) {
820         assert(o);
821
822         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
823             o->object.type != OBJECT_FIELD_HASH_TABLE)
824                 return 0;
825
826         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
827 }
828
829 static int link_entry_into_array(JournalFile *f,
830                                  le64_t *first,
831                                  le64_t *idx,
832                                  uint64_t p) {
833         int r;
834         uint64_t n = 0, ap = 0, q, i, a, hidx;
835         Object *o;
836
837         assert(f);
838         assert(first);
839         assert(idx);
840         assert(p > 0);
841
842         a = le64toh(*first);
843         i = hidx = le64toh(*idx);
844         while (a > 0) {
845
846                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
847                 if (r < 0)
848                         return r;
849
850                 n = journal_file_entry_array_n_items(o);
851                 if (i < n) {
852                         o->entry_array.items[i] = htole64(p);
853                         *idx = htole64(hidx + 1);
854                         return 0;
855                 }
856
857                 i -= n;
858                 ap = a;
859                 a = le64toh(o->entry_array.next_entry_array_offset);
860         }
861
862         if (hidx > n)
863                 n = (hidx+1) * 2;
864         else
865                 n = n * 2;
866
867         if (n < 4)
868                 n = 4;
869
870         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
871                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
872                                        &o, &q);
873         if (r < 0)
874                 return r;
875
876 #ifdef HAVE_GCRYPT
877         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
878         if (r < 0)
879                 return r;
880 #endif
881
882         o->entry_array.items[i] = htole64(p);
883
884         if (ap == 0)
885                 *first = htole64(q);
886         else {
887                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
888                 if (r < 0)
889                         return r;
890
891                 o->entry_array.next_entry_array_offset = htole64(q);
892         }
893
894         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
895                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
896
897         *idx = htole64(hidx + 1);
898
899         return 0;
900 }
901
902 static int link_entry_into_array_plus_one(JournalFile *f,
903                                           le64_t *extra,
904                                           le64_t *first,
905                                           le64_t *idx,
906                                           uint64_t p) {
907
908         int r;
909
910         assert(f);
911         assert(extra);
912         assert(first);
913         assert(idx);
914         assert(p > 0);
915
916         if (*idx == 0)
917                 *extra = htole64(p);
918         else {
919                 le64_t i;
920
921                 i = htole64(le64toh(*idx) - 1);
922                 r = link_entry_into_array(f, first, &i, p);
923                 if (r < 0)
924                         return r;
925         }
926
927         *idx = htole64(le64toh(*idx) + 1);
928         return 0;
929 }
930
931 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
932         uint64_t p;
933         int r;
934         assert(f);
935         assert(o);
936         assert(offset > 0);
937
938         p = le64toh(o->entry.items[i].object_offset);
939         if (p == 0)
940                 return -EINVAL;
941
942         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
943         if (r < 0)
944                 return r;
945
946         return link_entry_into_array_plus_one(f,
947                                               &o->data.entry_offset,
948                                               &o->data.entry_array_offset,
949                                               &o->data.n_entries,
950                                               offset);
951 }
952
953 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
954         uint64_t n, i;
955         int r;
956
957         assert(f);
958         assert(o);
959         assert(offset > 0);
960
961         if (o->object.type != OBJECT_ENTRY)
962                 return -EINVAL;
963
964         __sync_synchronize();
965
966         /* Link up the entry itself */
967         r = link_entry_into_array(f,
968                                   &f->header->entry_array_offset,
969                                   &f->header->n_entries,
970                                   offset);
971         if (r < 0)
972                 return r;
973
974         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
975
976         if (f->header->head_entry_realtime == 0)
977                 f->header->head_entry_realtime = o->entry.realtime;
978
979         f->header->tail_entry_realtime = o->entry.realtime;
980         f->header->tail_entry_monotonic = o->entry.monotonic;
981
982         f->tail_entry_monotonic_valid = true;
983
984         /* Link up the items */
985         n = journal_file_entry_n_items(o);
986         for (i = 0; i < n; i++) {
987                 r = journal_file_link_entry_item(f, o, offset, i);
988                 if (r < 0)
989                         return r;
990         }
991
992         return 0;
993 }
994
995 static int journal_file_append_entry_internal(
996                 JournalFile *f,
997                 const dual_timestamp *ts,
998                 uint64_t xor_hash,
999                 const EntryItem items[], unsigned n_items,
1000                 uint64_t *seqnum,
1001                 Object **ret, uint64_t *offset) {
1002         uint64_t np;
1003         uint64_t osize;
1004         Object *o;
1005         int r;
1006
1007         assert(f);
1008         assert(items || n_items == 0);
1009         assert(ts);
1010
1011         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1012
1013         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1014         if (r < 0)
1015                 return r;
1016
1017         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1018         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1019         o->entry.realtime = htole64(ts->realtime);
1020         o->entry.monotonic = htole64(ts->monotonic);
1021         o->entry.xor_hash = htole64(xor_hash);
1022         o->entry.boot_id = f->header->boot_id;
1023
1024 #ifdef HAVE_GCRYPT
1025         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1026         if (r < 0)
1027                 return r;
1028 #endif
1029
1030         r = journal_file_link_entry(f, o, np);
1031         if (r < 0)
1032                 return r;
1033
1034         if (ret)
1035                 *ret = o;
1036
1037         if (offset)
1038                 *offset = np;
1039
1040         return 0;
1041 }
1042
1043 void journal_file_post_change(JournalFile *f) {
1044         assert(f);
1045
1046         /* inotify() does not receive IN_MODIFY events from file
1047          * accesses done via mmap(). After each access we hence
1048          * trigger IN_MODIFY by truncating the journal file to its
1049          * current size which triggers IN_MODIFY. */
1050
1051         __sync_synchronize();
1052
1053         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1054                 log_error("Failed to truncate file to its own size: %m");
1055 }
1056
1057 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1058         unsigned i;
1059         EntryItem *items;
1060         int r;
1061         uint64_t xor_hash = 0;
1062         struct dual_timestamp _ts;
1063
1064         assert(f);
1065         assert(iovec || n_iovec == 0);
1066
1067         if (!f->writable)
1068                 return -EPERM;
1069
1070         if (!ts) {
1071                 dual_timestamp_get(&_ts);
1072                 ts = &_ts;
1073         }
1074
1075         if (f->tail_entry_monotonic_valid &&
1076             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1077                 return -EINVAL;
1078
1079 #ifdef HAVE_GCRYPT
1080         r = journal_file_maybe_append_tag(f, ts->realtime);
1081         if (r < 0)
1082                 return r;
1083 #endif
1084
1085         /* alloca() can't take 0, hence let's allocate at least one */
1086         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1087
1088         for (i = 0; i < n_iovec; i++) {
1089                 uint64_t p;
1090                 Object *o;
1091
1092                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1093                 if (r < 0)
1094                         return r;
1095
1096                 xor_hash ^= le64toh(o->data.hash);
1097                 items[i].object_offset = htole64(p);
1098                 items[i].hash = o->data.hash;
1099         }
1100
1101         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1102
1103         journal_file_post_change(f);
1104
1105         return r;
1106 }
1107
1108 static int generic_array_get(JournalFile *f,
1109                              uint64_t first,
1110                              uint64_t i,
1111                              Object **ret, uint64_t *offset) {
1112
1113         Object *o;
1114         uint64_t p = 0, a;
1115         int r;
1116
1117         assert(f);
1118
1119         a = first;
1120         while (a > 0) {
1121                 uint64_t n;
1122
1123                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1124                 if (r < 0)
1125                         return r;
1126
1127                 n = journal_file_entry_array_n_items(o);
1128                 if (i < n) {
1129                         p = le64toh(o->entry_array.items[i]);
1130                         break;
1131                 }
1132
1133                 i -= n;
1134                 a = le64toh(o->entry_array.next_entry_array_offset);
1135         }
1136
1137         if (a <= 0 || p <= 0)
1138                 return 0;
1139
1140         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1141         if (r < 0)
1142                 return r;
1143
1144         if (ret)
1145                 *ret = o;
1146
1147         if (offset)
1148                 *offset = p;
1149
1150         return 1;
1151 }
1152
1153 static int generic_array_get_plus_one(JournalFile *f,
1154                                       uint64_t extra,
1155                                       uint64_t first,
1156                                       uint64_t i,
1157                                       Object **ret, uint64_t *offset) {
1158
1159         Object *o;
1160
1161         assert(f);
1162
1163         if (i == 0) {
1164                 int r;
1165
1166                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1167                 if (r < 0)
1168                         return r;
1169
1170                 if (ret)
1171                         *ret = o;
1172
1173                 if (offset)
1174                         *offset = extra;
1175
1176                 return 1;
1177         }
1178
1179         return generic_array_get(f, first, i-1, ret, offset);
1180 }
1181
1182 enum {
1183         TEST_FOUND,
1184         TEST_LEFT,
1185         TEST_RIGHT
1186 };
1187
1188 static int generic_array_bisect(JournalFile *f,
1189                                 uint64_t first,
1190                                 uint64_t n,
1191                                 uint64_t needle,
1192                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1193                                 direction_t direction,
1194                                 Object **ret,
1195                                 uint64_t *offset,
1196                                 uint64_t *idx) {
1197
1198         uint64_t a, p, t = 0, i = 0, last_p = 0;
1199         bool subtract_one = false;
1200         Object *o, *array = NULL;
1201         int r;
1202
1203         assert(f);
1204         assert(test_object);
1205
1206         a = first;
1207         while (a > 0) {
1208                 uint64_t left, right, k, lp;
1209
1210                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1211                 if (r < 0)
1212                         return r;
1213
1214                 k = journal_file_entry_array_n_items(array);
1215                 right = MIN(k, n);
1216                 if (right <= 0)
1217                         return 0;
1218
1219                 i = right - 1;
1220                 lp = p = le64toh(array->entry_array.items[i]);
1221                 if (p <= 0)
1222                         return -EBADMSG;
1223
1224                 r = test_object(f, p, needle);
1225                 if (r < 0)
1226                         return r;
1227
1228                 if (r == TEST_FOUND)
1229                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1230
1231                 if (r == TEST_RIGHT) {
1232                         left = 0;
1233                         right -= 1;
1234                         for (;;) {
1235                                 if (left == right) {
1236                                         if (direction == DIRECTION_UP)
1237                                                 subtract_one = true;
1238
1239                                         i = left;
1240                                         goto found;
1241                                 }
1242
1243                                 assert(left < right);
1244
1245                                 i = (left + right) / 2;
1246                                 p = le64toh(array->entry_array.items[i]);
1247                                 if (p <= 0)
1248                                         return -EBADMSG;
1249
1250                                 r = test_object(f, p, needle);
1251                                 if (r < 0)
1252                                         return r;
1253
1254                                 if (r == TEST_FOUND)
1255                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1256
1257                                 if (r == TEST_RIGHT)
1258                                         right = i;
1259                                 else
1260                                         left = i + 1;
1261                         }
1262                 }
1263
1264                 if (k > n) {
1265                         if (direction == DIRECTION_UP) {
1266                                 i = n;
1267                                 subtract_one = true;
1268                                 goto found;
1269                         }
1270
1271                         return 0;
1272                 }
1273
1274                 last_p = lp;
1275
1276                 n -= k;
1277                 t += k;
1278                 a = le64toh(array->entry_array.next_entry_array_offset);
1279         }
1280
1281         return 0;
1282
1283 found:
1284         if (subtract_one && t == 0 && i == 0)
1285                 return 0;
1286
1287         if (subtract_one && i == 0)
1288                 p = last_p;
1289         else if (subtract_one)
1290                 p = le64toh(array->entry_array.items[i-1]);
1291         else
1292                 p = le64toh(array->entry_array.items[i]);
1293
1294         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1295         if (r < 0)
1296                 return r;
1297
1298         if (ret)
1299                 *ret = o;
1300
1301         if (offset)
1302                 *offset = p;
1303
1304         if (idx)
1305                 *idx = t + i + (subtract_one ? -1 : 0);
1306
1307         return 1;
1308 }
1309
1310 static int generic_array_bisect_plus_one(JournalFile *f,
1311                                          uint64_t extra,
1312                                          uint64_t first,
1313                                          uint64_t n,
1314                                          uint64_t needle,
1315                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1316                                          direction_t direction,
1317                                          Object **ret,
1318                                          uint64_t *offset,
1319                                          uint64_t *idx) {
1320
1321         int r;
1322         bool step_back = false;
1323         Object *o;
1324
1325         assert(f);
1326         assert(test_object);
1327
1328         if (n <= 0)
1329                 return 0;
1330
1331         /* This bisects the array in object 'first', but first checks
1332          * an extra  */
1333         r = test_object(f, extra, needle);
1334         if (r < 0)
1335                 return r;
1336
1337         if (r == TEST_FOUND)
1338                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1339
1340         /* if we are looking with DIRECTION_UP then we need to first
1341            see if in the actual array there is a matching entry, and
1342            return the last one of that. But if there isn't any we need
1343            to return this one. Hence remember this, and return it
1344            below. */
1345         if (r == TEST_LEFT)
1346                 step_back = direction == DIRECTION_UP;
1347
1348         if (r == TEST_RIGHT) {
1349                 if (direction == DIRECTION_DOWN)
1350                         goto found;
1351                 else
1352                         return 0;
1353         }
1354
1355         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1356
1357         if (r == 0 && step_back)
1358                 goto found;
1359
1360         if (r > 0 && idx)
1361                 (*idx) ++;
1362
1363         return r;
1364
1365 found:
1366         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1367         if (r < 0)
1368                 return r;
1369
1370         if (ret)
1371                 *ret = o;
1372
1373         if (offset)
1374                 *offset = extra;
1375
1376         if (idx)
1377                 *idx = 0;
1378
1379         return 1;
1380 }
1381
1382 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1383         assert(f);
1384         assert(p > 0);
1385
1386         if (p == needle)
1387                 return TEST_FOUND;
1388         else if (p < needle)
1389                 return TEST_LEFT;
1390         else
1391                 return TEST_RIGHT;
1392 }
1393
1394 int journal_file_move_to_entry_by_offset(
1395                 JournalFile *f,
1396                 uint64_t p,
1397                 direction_t direction,
1398                 Object **ret,
1399                 uint64_t *offset) {
1400
1401         return generic_array_bisect(f,
1402                                     le64toh(f->header->entry_array_offset),
1403                                     le64toh(f->header->n_entries),
1404                                     p,
1405                                     test_object_offset,
1406                                     direction,
1407                                     ret, offset, NULL);
1408 }
1409
1410
1411 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1412         Object *o;
1413         int r;
1414
1415         assert(f);
1416         assert(p > 0);
1417
1418         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1419         if (r < 0)
1420                 return r;
1421
1422         if (le64toh(o->entry.seqnum) == needle)
1423                 return TEST_FOUND;
1424         else if (le64toh(o->entry.seqnum) < needle)
1425                 return TEST_LEFT;
1426         else
1427                 return TEST_RIGHT;
1428 }
1429
1430 int journal_file_move_to_entry_by_seqnum(
1431                 JournalFile *f,
1432                 uint64_t seqnum,
1433                 direction_t direction,
1434                 Object **ret,
1435                 uint64_t *offset) {
1436
1437         return generic_array_bisect(f,
1438                                     le64toh(f->header->entry_array_offset),
1439                                     le64toh(f->header->n_entries),
1440                                     seqnum,
1441                                     test_object_seqnum,
1442                                     direction,
1443                                     ret, offset, NULL);
1444 }
1445
1446 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1447         Object *o;
1448         int r;
1449
1450         assert(f);
1451         assert(p > 0);
1452
1453         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1454         if (r < 0)
1455                 return r;
1456
1457         if (le64toh(o->entry.realtime) == needle)
1458                 return TEST_FOUND;
1459         else if (le64toh(o->entry.realtime) < needle)
1460                 return TEST_LEFT;
1461         else
1462                 return TEST_RIGHT;
1463 }
1464
1465 int journal_file_move_to_entry_by_realtime(
1466                 JournalFile *f,
1467                 uint64_t realtime,
1468                 direction_t direction,
1469                 Object **ret,
1470                 uint64_t *offset) {
1471
1472         return generic_array_bisect(f,
1473                                     le64toh(f->header->entry_array_offset),
1474                                     le64toh(f->header->n_entries),
1475                                     realtime,
1476                                     test_object_realtime,
1477                                     direction,
1478                                     ret, offset, NULL);
1479 }
1480
1481 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1482         Object *o;
1483         int r;
1484
1485         assert(f);
1486         assert(p > 0);
1487
1488         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1489         if (r < 0)
1490                 return r;
1491
1492         if (le64toh(o->entry.monotonic) == needle)
1493                 return TEST_FOUND;
1494         else if (le64toh(o->entry.monotonic) < needle)
1495                 return TEST_LEFT;
1496         else
1497                 return TEST_RIGHT;
1498 }
1499
1500 int journal_file_move_to_entry_by_monotonic(
1501                 JournalFile *f,
1502                 sd_id128_t boot_id,
1503                 uint64_t monotonic,
1504                 direction_t direction,
1505                 Object **ret,
1506                 uint64_t *offset) {
1507
1508         char t[9+32+1] = "_BOOT_ID=";
1509         Object *o;
1510         int r;
1511
1512         assert(f);
1513
1514         sd_id128_to_string(boot_id, t + 9);
1515         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1516         if (r < 0)
1517                 return r;
1518         if (r == 0)
1519                 return -ENOENT;
1520
1521         return generic_array_bisect_plus_one(f,
1522                                              le64toh(o->data.entry_offset),
1523                                              le64toh(o->data.entry_array_offset),
1524                                              le64toh(o->data.n_entries),
1525                                              monotonic,
1526                                              test_object_monotonic,
1527                                              direction,
1528                                              ret, offset, NULL);
1529 }
1530
1531 int journal_file_next_entry(
1532                 JournalFile *f,
1533                 Object *o, uint64_t p,
1534                 direction_t direction,
1535                 Object **ret, uint64_t *offset) {
1536
1537         uint64_t i, n;
1538         int r;
1539
1540         assert(f);
1541         assert(p > 0 || !o);
1542
1543         n = le64toh(f->header->n_entries);
1544         if (n <= 0)
1545                 return 0;
1546
1547         if (!o)
1548                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1549         else {
1550                 if (o->object.type != OBJECT_ENTRY)
1551                         return -EINVAL;
1552
1553                 r = generic_array_bisect(f,
1554                                          le64toh(f->header->entry_array_offset),
1555                                          le64toh(f->header->n_entries),
1556                                          p,
1557                                          test_object_offset,
1558                                          DIRECTION_DOWN,
1559                                          NULL, NULL,
1560                                          &i);
1561                 if (r <= 0)
1562                         return r;
1563
1564                 if (direction == DIRECTION_DOWN) {
1565                         if (i >= n - 1)
1566                                 return 0;
1567
1568                         i++;
1569                 } else {
1570                         if (i <= 0)
1571                                 return 0;
1572
1573                         i--;
1574                 }
1575         }
1576
1577         /* And jump to it */
1578         return generic_array_get(f,
1579                                  le64toh(f->header->entry_array_offset),
1580                                  i,
1581                                  ret, offset);
1582 }
1583
1584 int journal_file_skip_entry(
1585                 JournalFile *f,
1586                 Object *o, uint64_t p,
1587                 int64_t skip,
1588                 Object **ret, uint64_t *offset) {
1589
1590         uint64_t i, n;
1591         int r;
1592
1593         assert(f);
1594         assert(o);
1595         assert(p > 0);
1596
1597         if (o->object.type != OBJECT_ENTRY)
1598                 return -EINVAL;
1599
1600         r = generic_array_bisect(f,
1601                                  le64toh(f->header->entry_array_offset),
1602                                  le64toh(f->header->n_entries),
1603                                  p,
1604                                  test_object_offset,
1605                                  DIRECTION_DOWN,
1606                                  NULL, NULL,
1607                                  &i);
1608         if (r <= 0)
1609                 return r;
1610
1611         /* Calculate new index */
1612         if (skip < 0) {
1613                 if ((uint64_t) -skip >= i)
1614                         i = 0;
1615                 else
1616                         i = i - (uint64_t) -skip;
1617         } else
1618                 i  += (uint64_t) skip;
1619
1620         n = le64toh(f->header->n_entries);
1621         if (n <= 0)
1622                 return -EBADMSG;
1623
1624         if (i >= n)
1625                 i = n-1;
1626
1627         return generic_array_get(f,
1628                                  le64toh(f->header->entry_array_offset),
1629                                  i,
1630                                  ret, offset);
1631 }
1632
1633 int journal_file_next_entry_for_data(
1634                 JournalFile *f,
1635                 Object *o, uint64_t p,
1636                 uint64_t data_offset,
1637                 direction_t direction,
1638                 Object **ret, uint64_t *offset) {
1639
1640         uint64_t n, i;
1641         int r;
1642         Object *d;
1643
1644         assert(f);
1645         assert(p > 0 || !o);
1646
1647         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1648         if (r < 0)
1649                 return r;
1650
1651         n = le64toh(d->data.n_entries);
1652         if (n <= 0)
1653                 return n;
1654
1655         if (!o)
1656                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1657         else {
1658                 if (o->object.type != OBJECT_ENTRY)
1659                         return -EINVAL;
1660
1661                 r = generic_array_bisect_plus_one(f,
1662                                                   le64toh(d->data.entry_offset),
1663                                                   le64toh(d->data.entry_array_offset),
1664                                                   le64toh(d->data.n_entries),
1665                                                   p,
1666                                                   test_object_offset,
1667                                                   DIRECTION_DOWN,
1668                                                   NULL, NULL,
1669                                                   &i);
1670
1671                 if (r <= 0)
1672                         return r;
1673
1674                 if (direction == DIRECTION_DOWN) {
1675                         if (i >= n - 1)
1676                                 return 0;
1677
1678                         i++;
1679                 } else {
1680                         if (i <= 0)
1681                                 return 0;
1682
1683                         i--;
1684                 }
1685
1686         }
1687
1688         return generic_array_get_plus_one(f,
1689                                           le64toh(d->data.entry_offset),
1690                                           le64toh(d->data.entry_array_offset),
1691                                           i,
1692                                           ret, offset);
1693 }
1694
1695 int journal_file_move_to_entry_by_offset_for_data(
1696                 JournalFile *f,
1697                 uint64_t data_offset,
1698                 uint64_t p,
1699                 direction_t direction,
1700                 Object **ret, uint64_t *offset) {
1701
1702         int r;
1703         Object *d;
1704
1705         assert(f);
1706
1707         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1708         if (r < 0)
1709                 return r;
1710
1711         return generic_array_bisect_plus_one(f,
1712                                              le64toh(d->data.entry_offset),
1713                                              le64toh(d->data.entry_array_offset),
1714                                              le64toh(d->data.n_entries),
1715                                              p,
1716                                              test_object_offset,
1717                                              direction,
1718                                              ret, offset, NULL);
1719 }
1720
1721 int journal_file_move_to_entry_by_monotonic_for_data(
1722                 JournalFile *f,
1723                 uint64_t data_offset,
1724                 sd_id128_t boot_id,
1725                 uint64_t monotonic,
1726                 direction_t direction,
1727                 Object **ret, uint64_t *offset) {
1728
1729         char t[9+32+1] = "_BOOT_ID=";
1730         Object *o, *d;
1731         int r;
1732         uint64_t b, z;
1733
1734         assert(f);
1735
1736         /* First, seek by time */
1737         sd_id128_to_string(boot_id, t + 9);
1738         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1739         if (r < 0)
1740                 return r;
1741         if (r == 0)
1742                 return -ENOENT;
1743
1744         r = generic_array_bisect_plus_one(f,
1745                                           le64toh(o->data.entry_offset),
1746                                           le64toh(o->data.entry_array_offset),
1747                                           le64toh(o->data.n_entries),
1748                                           monotonic,
1749                                           test_object_monotonic,
1750                                           direction,
1751                                           NULL, &z, NULL);
1752         if (r <= 0)
1753                 return r;
1754
1755         /* And now, continue seeking until we find an entry that
1756          * exists in both bisection arrays */
1757
1758         for (;;) {
1759                 Object *qo;
1760                 uint64_t p, q;
1761
1762                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1763                 if (r < 0)
1764                         return r;
1765
1766                 r = generic_array_bisect_plus_one(f,
1767                                                   le64toh(d->data.entry_offset),
1768                                                   le64toh(d->data.entry_array_offset),
1769                                                   le64toh(d->data.n_entries),
1770                                                   z,
1771                                                   test_object_offset,
1772                                                   direction,
1773                                                   NULL, &p, NULL);
1774                 if (r <= 0)
1775                         return r;
1776
1777                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1778                 if (r < 0)
1779                         return r;
1780
1781                 r = generic_array_bisect_plus_one(f,
1782                                                   le64toh(o->data.entry_offset),
1783                                                   le64toh(o->data.entry_array_offset),
1784                                                   le64toh(o->data.n_entries),
1785                                                   p,
1786                                                   test_object_offset,
1787                                                   direction,
1788                                                   &qo, &q, NULL);
1789
1790                 if (r <= 0)
1791                         return r;
1792
1793                 if (p == q) {
1794                         if (ret)
1795                                 *ret = qo;
1796                         if (offset)
1797                                 *offset = q;
1798
1799                         return 1;
1800                 }
1801
1802                 z = q;
1803         }
1804
1805         return 0;
1806 }
1807
1808 int journal_file_move_to_entry_by_seqnum_for_data(
1809                 JournalFile *f,
1810                 uint64_t data_offset,
1811                 uint64_t seqnum,
1812                 direction_t direction,
1813                 Object **ret, uint64_t *offset) {
1814
1815         Object *d;
1816         int r;
1817
1818         assert(f);
1819
1820         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1821         if (r < 0)
1822                 return r;
1823
1824         return generic_array_bisect_plus_one(f,
1825                                              le64toh(d->data.entry_offset),
1826                                              le64toh(d->data.entry_array_offset),
1827                                              le64toh(d->data.n_entries),
1828                                              seqnum,
1829                                              test_object_seqnum,
1830                                              direction,
1831                                              ret, offset, NULL);
1832 }
1833
1834 int journal_file_move_to_entry_by_realtime_for_data(
1835                 JournalFile *f,
1836                 uint64_t data_offset,
1837                 uint64_t realtime,
1838                 direction_t direction,
1839                 Object **ret, uint64_t *offset) {
1840
1841         Object *d;
1842         int r;
1843
1844         assert(f);
1845
1846         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1847         if (r < 0)
1848                 return r;
1849
1850         return generic_array_bisect_plus_one(f,
1851                                              le64toh(d->data.entry_offset),
1852                                              le64toh(d->data.entry_array_offset),
1853                                              le64toh(d->data.n_entries),
1854                                              realtime,
1855                                              test_object_realtime,
1856                                              direction,
1857                                              ret, offset, NULL);
1858 }
1859
1860 void journal_file_dump(JournalFile *f) {
1861         Object *o;
1862         int r;
1863         uint64_t p;
1864
1865         assert(f);
1866
1867         journal_file_print_header(f);
1868
1869         p = le64toh(f->header->header_size);
1870         while (p != 0) {
1871                 r = journal_file_move_to_object(f, -1, p, &o);
1872                 if (r < 0)
1873                         goto fail;
1874
1875                 switch (o->object.type) {
1876
1877                 case OBJECT_UNUSED:
1878                         printf("Type: OBJECT_UNUSED\n");
1879                         break;
1880
1881                 case OBJECT_DATA:
1882                         printf("Type: OBJECT_DATA\n");
1883                         break;
1884
1885                 case OBJECT_ENTRY:
1886                         printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1887                                (unsigned long long) le64toh(o->entry.seqnum),
1888                                (unsigned long long) le64toh(o->entry.monotonic),
1889                                (unsigned long long) le64toh(o->entry.realtime));
1890                         break;
1891
1892                 case OBJECT_FIELD_HASH_TABLE:
1893                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1894                         break;
1895
1896                 case OBJECT_DATA_HASH_TABLE:
1897                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1898                         break;
1899
1900                 case OBJECT_ENTRY_ARRAY:
1901                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1902                         break;
1903
1904                 case OBJECT_TAG:
1905                         printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1906                                (unsigned long long) le64toh(o->tag.seqnum),
1907                                (unsigned long long) le64toh(o->tag.epoch));
1908                         break;
1909                 }
1910
1911                 if (o->object.flags & OBJECT_COMPRESSED)
1912                         printf("Flags: COMPRESSED\n");
1913
1914                 if (p == le64toh(f->header->tail_object_offset))
1915                         p = 0;
1916                 else
1917                         p = p + ALIGN64(le64toh(o->object.size));
1918         }
1919
1920         return;
1921 fail:
1922         log_error("File corrupt");
1923 }
1924
1925 void journal_file_print_header(JournalFile *f) {
1926         char a[33], b[33], c[33];
1927         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1928         struct stat st;
1929         char bytes[FORMAT_BYTES_MAX];
1930
1931         assert(f);
1932
1933         printf("File Path: %s\n"
1934                "File ID: %s\n"
1935                "Machine ID: %s\n"
1936                "Boot ID: %s\n"
1937                "Sequential Number ID: %s\n"
1938                "State: %s\n"
1939                "Compatible Flags:%s%s\n"
1940                "Incompatible Flags:%s%s\n"
1941                "Header size: %llu\n"
1942                "Arena size: %llu\n"
1943                "Data Hash Table Size: %llu\n"
1944                "Field Hash Table Size: %llu\n"
1945                "Rotate Suggested: %s\n"
1946                "Head Sequential Number: %llu\n"
1947                "Tail Sequential Number: %llu\n"
1948                "Head Realtime Timestamp: %s\n"
1949                "Tail Realtime Timestamp: %s\n"
1950                "Objects: %llu\n"
1951                "Entry Objects: %llu\n",
1952                f->path,
1953                sd_id128_to_string(f->header->file_id, a),
1954                sd_id128_to_string(f->header->machine_id, b),
1955                sd_id128_to_string(f->header->boot_id, c),
1956                sd_id128_to_string(f->header->seqnum_id, c),
1957                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1958                f->header->state == STATE_ONLINE ? "ONLINE" :
1959                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1960                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1961                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1962                JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1963                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1964                (unsigned long long) le64toh(f->header->header_size),
1965                (unsigned long long) le64toh(f->header->arena_size),
1966                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1967                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1968                yes_no(journal_file_rotate_suggested(f)),
1969                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1970                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1971                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1972                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1973                (unsigned long long) le64toh(f->header->n_objects),
1974                (unsigned long long) le64toh(f->header->n_entries));
1975
1976         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1977                 printf("Data Objects: %llu\n"
1978                        "Data Hash Table Fill: %.1f%%\n",
1979                        (unsigned long long) le64toh(f->header->n_data),
1980                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1981
1982         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1983                 printf("Field Objects: %llu\n"
1984                        "Field Hash Table Fill: %.1f%%\n",
1985                        (unsigned long long) le64toh(f->header->n_fields),
1986                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1987
1988         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1989                 printf("Tag Objects: %llu\n",
1990                        (unsigned long long) le64toh(f->header->n_tags));
1991         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1992                 printf("Entry Array Objects: %llu\n",
1993                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1994
1995         if (fstat(f->fd, &st) >= 0)
1996                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1997 }
1998
1999 int journal_file_open(
2000                 const char *fname,
2001                 int flags,
2002                 mode_t mode,
2003                 bool compress,
2004                 bool seal,
2005                 JournalMetrics *metrics,
2006                 MMapCache *mmap_cache,
2007                 JournalFile *template,
2008                 JournalFile **ret) {
2009
2010         JournalFile *f;
2011         int r;
2012         bool newly_created = false;
2013
2014         assert(fname);
2015
2016         if ((flags & O_ACCMODE) != O_RDONLY &&
2017             (flags & O_ACCMODE) != O_RDWR)
2018                 return -EINVAL;
2019
2020         if (!endswith(fname, ".journal") &&
2021             !endswith(fname, ".journal~"))
2022                 return -EINVAL;
2023
2024         f = new0(JournalFile, 1);
2025         if (!f)
2026                 return -ENOMEM;
2027
2028         f->fd = -1;
2029         f->mode = mode;
2030
2031         f->flags = flags;
2032         f->prot = prot_from_flags(flags);
2033         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2034 #ifdef HAVE_XZ
2035         f->compress = compress;
2036 #endif
2037 #ifdef HAVE_GCRYPT
2038         f->seal = seal;
2039 #endif
2040
2041         if (mmap_cache)
2042                 f->mmap = mmap_cache_ref(mmap_cache);
2043         else {
2044                 f->mmap = mmap_cache_new();
2045                 if (!f->mmap) {
2046                         r = -ENOMEM;
2047                         goto fail;
2048                 }
2049         }
2050
2051         f->path = strdup(fname);
2052         if (!f->path) {
2053                 r = -ENOMEM;
2054                 goto fail;
2055         }
2056
2057         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2058         if (f->fd < 0) {
2059                 r = -errno;
2060                 goto fail;
2061         }
2062
2063         if (fstat(f->fd, &f->last_stat) < 0) {
2064                 r = -errno;
2065                 goto fail;
2066         }
2067
2068         if (f->last_stat.st_size == 0 && f->writable) {
2069                 newly_created = true;
2070
2071 #ifdef HAVE_GCRYPT
2072                 /* Try to load the FSPRG state, and if we can't, then
2073                  * just don't do sealing */
2074                 if (f->seal) {
2075                         r = journal_file_fss_load(f);
2076                         if (r < 0)
2077                                 f->seal = false;
2078                 }
2079 #endif
2080
2081                 r = journal_file_init_header(f, template);
2082                 if (r < 0)
2083                         goto fail;
2084
2085                 if (fstat(f->fd, &f->last_stat) < 0) {
2086                         r = -errno;
2087                         goto fail;
2088                 }
2089         }
2090
2091         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2092                 r = -EIO;
2093                 goto fail;
2094         }
2095
2096         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2097         if (f->header == MAP_FAILED) {
2098                 f->header = NULL;
2099                 r = -errno;
2100                 goto fail;
2101         }
2102
2103         if (!newly_created) {
2104                 r = journal_file_verify_header(f);
2105                 if (r < 0)
2106                         goto fail;
2107         }
2108
2109 #ifdef HAVE_GCRYPT
2110         if (!newly_created && f->writable) {
2111                 r = journal_file_fss_load(f);
2112                 if (r < 0)
2113                         goto fail;
2114         }
2115 #endif
2116
2117         if (f->writable) {
2118                 if (metrics) {
2119                         journal_default_metrics(metrics, f->fd);
2120                         f->metrics = *metrics;
2121                 } else if (template)
2122                         f->metrics = template->metrics;
2123
2124                 r = journal_file_refresh_header(f);
2125                 if (r < 0)
2126                         goto fail;
2127         }
2128
2129 #ifdef HAVE_GCRYPT
2130         r = journal_file_hmac_setup(f);
2131         if (r < 0)
2132                 goto fail;
2133 #endif
2134
2135         if (newly_created) {
2136                 r = journal_file_setup_field_hash_table(f);
2137                 if (r < 0)
2138                         goto fail;
2139
2140                 r = journal_file_setup_data_hash_table(f);
2141                 if (r < 0)
2142                         goto fail;
2143
2144 #ifdef HAVE_GCRYPT
2145                 r = journal_file_append_first_tag(f);
2146                 if (r < 0)
2147                         goto fail;
2148 #endif
2149         }
2150
2151         r = journal_file_map_field_hash_table(f);
2152         if (r < 0)
2153                 goto fail;
2154
2155         r = journal_file_map_data_hash_table(f);
2156         if (r < 0)
2157                 goto fail;
2158
2159         if (ret)
2160                 *ret = f;
2161
2162         return 0;
2163
2164 fail:
2165         journal_file_close(f);
2166
2167         return r;
2168 }
2169
2170 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2171         char *p;
2172         size_t l;
2173         JournalFile *old_file, *new_file = NULL;
2174         int r;
2175
2176         assert(f);
2177         assert(*f);
2178
2179         old_file = *f;
2180
2181         if (!old_file->writable)
2182                 return -EINVAL;
2183
2184         if (!endswith(old_file->path, ".journal"))
2185                 return -EINVAL;
2186
2187         l = strlen(old_file->path);
2188
2189         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2190         if (!p)
2191                 return -ENOMEM;
2192
2193         memcpy(p, old_file->path, l - 8);
2194         p[l-8] = '@';
2195         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2196         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2197                  "-%016llx-%016llx.journal",
2198                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2199                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2200
2201         r = rename(old_file->path, p);
2202         free(p);
2203
2204         if (r < 0)
2205                 return -errno;
2206
2207         old_file->header->state = STATE_ARCHIVED;
2208
2209         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2210         journal_file_close(old_file);
2211
2212         *f = new_file;
2213         return r;
2214 }
2215
2216 int journal_file_open_reliably(
2217                 const char *fname,
2218                 int flags,
2219                 mode_t mode,
2220                 bool compress,
2221                 bool seal,
2222                 JournalMetrics *metrics,
2223                 MMapCache *mmap_cache,
2224                 JournalFile *template,
2225                 JournalFile **ret) {
2226
2227         int r;
2228         size_t l;
2229         char *p;
2230
2231         r = journal_file_open(fname, flags, mode, compress, seal,
2232                               metrics, mmap_cache, template, ret);
2233         if (r != -EBADMSG && /* corrupted */
2234             r != -ENODATA && /* truncated */
2235             r != -EHOSTDOWN && /* other machine */
2236             r != -EPROTONOSUPPORT && /* incompatible feature */
2237             r != -EBUSY && /* unclean shutdown */
2238             r != -ESHUTDOWN /* already archived */)
2239                 return r;
2240
2241         if ((flags & O_ACCMODE) == O_RDONLY)
2242                 return r;
2243
2244         if (!(flags & O_CREAT))
2245                 return r;
2246
2247         if (!endswith(fname, ".journal"))
2248                 return r;
2249
2250         /* The file is corrupted. Rotate it away and try it again (but only once) */
2251
2252         l = strlen(fname);
2253         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2254                      (int) (l-8), fname,
2255                      (unsigned long long) now(CLOCK_REALTIME),
2256                      random_ull()) < 0)
2257                 return -ENOMEM;
2258
2259         r = rename(fname, p);
2260         free(p);
2261         if (r < 0)
2262                 return -errno;
2263
2264         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2265
2266         return journal_file_open(fname, flags, mode, compress, seal,
2267                                  metrics, mmap_cache, template, ret);
2268 }
2269
2270
2271 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2272         uint64_t i, n;
2273         uint64_t q, xor_hash = 0;
2274         int r;
2275         EntryItem *items;
2276         dual_timestamp ts;
2277
2278         assert(from);
2279         assert(to);
2280         assert(o);
2281         assert(p);
2282
2283         if (!to->writable)
2284                 return -EPERM;
2285
2286         ts.monotonic = le64toh(o->entry.monotonic);
2287         ts.realtime = le64toh(o->entry.realtime);
2288
2289         if (to->tail_entry_monotonic_valid &&
2290             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2291                 return -EINVAL;
2292
2293         n = journal_file_entry_n_items(o);
2294         items = alloca(sizeof(EntryItem) * n);
2295
2296         for (i = 0; i < n; i++) {
2297                 uint64_t l, h;
2298                 le64_t le_hash;
2299                 size_t t;
2300                 void *data;
2301                 Object *u;
2302
2303                 q = le64toh(o->entry.items[i].object_offset);
2304                 le_hash = o->entry.items[i].hash;
2305
2306                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2307                 if (r < 0)
2308                         return r;
2309
2310                 if (le_hash != o->data.hash)
2311                         return -EBADMSG;
2312
2313                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2314                 t = (size_t) l;
2315
2316                 /* We hit the limit on 32bit machines */
2317                 if ((uint64_t) t != l)
2318                         return -E2BIG;
2319
2320                 if (o->object.flags & OBJECT_COMPRESSED) {
2321 #ifdef HAVE_XZ
2322                         uint64_t rsize;
2323
2324                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2325                                 return -EBADMSG;
2326
2327                         data = from->compress_buffer;
2328                         l = rsize;
2329 #else
2330                         return -EPROTONOSUPPORT;
2331 #endif
2332                 } else
2333                         data = o->data.payload;
2334
2335                 r = journal_file_append_data(to, data, l, &u, &h);
2336                 if (r < 0)
2337                         return r;
2338
2339                 xor_hash ^= le64toh(u->data.hash);
2340                 items[i].object_offset = htole64(h);
2341                 items[i].hash = u->data.hash;
2342
2343                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2344                 if (r < 0)
2345                         return r;
2346         }
2347
2348         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2349 }
2350
2351 void journal_default_metrics(JournalMetrics *m, int fd) {
2352         uint64_t fs_size = 0;
2353         struct statvfs ss;
2354         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2355
2356         assert(m);
2357         assert(fd >= 0);
2358
2359         if (fstatvfs(fd, &ss) >= 0)
2360                 fs_size = ss.f_frsize * ss.f_blocks;
2361
2362         if (m->max_use == (uint64_t) -1) {
2363
2364                 if (fs_size > 0) {
2365                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2366
2367                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2368                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2369
2370                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2371                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2372                 } else
2373                         m->max_use = DEFAULT_MAX_USE_LOWER;
2374         } else {
2375                 m->max_use = PAGE_ALIGN(m->max_use);
2376
2377                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2378                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2379         }
2380
2381         if (m->max_size == (uint64_t) -1) {
2382                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2383
2384                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2385                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2386         } else
2387                 m->max_size = PAGE_ALIGN(m->max_size);
2388
2389         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2390                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2391
2392         if (m->max_size*2 > m->max_use)
2393                 m->max_use = m->max_size*2;
2394
2395         if (m->min_size == (uint64_t) -1)
2396                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2397         else {
2398                 m->min_size = PAGE_ALIGN(m->min_size);
2399
2400                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2401                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2402
2403                 if (m->min_size > m->max_size)
2404                         m->max_size = m->min_size;
2405         }
2406
2407         if (m->keep_free == (uint64_t) -1) {
2408
2409                 if (fs_size > 0) {
2410                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2411
2412                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2413                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2414
2415                 } else
2416                         m->keep_free = DEFAULT_KEEP_FREE;
2417         }
2418
2419         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2420                   format_bytes(a, sizeof(a), m->max_use),
2421                   format_bytes(b, sizeof(b), m->max_size),
2422                   format_bytes(c, sizeof(c), m->min_size),
2423                   format_bytes(d, sizeof(d), m->keep_free));
2424 }
2425
2426 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2427         assert(f);
2428         assert(from || to);
2429
2430         if (from) {
2431                 if (f->header->head_entry_realtime == 0)
2432                         return -ENOENT;
2433
2434                 *from = le64toh(f->header->head_entry_realtime);
2435         }
2436
2437         if (to) {
2438                 if (f->header->tail_entry_realtime == 0)
2439                         return -ENOENT;
2440
2441                 *to = le64toh(f->header->tail_entry_realtime);
2442         }
2443
2444         return 1;
2445 }
2446
2447 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2448         char t[9+32+1] = "_BOOT_ID=";
2449         Object *o;
2450         uint64_t p;
2451         int r;
2452
2453         assert(f);
2454         assert(from || to);
2455
2456         sd_id128_to_string(boot_id, t + 9);
2457
2458         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2459         if (r <= 0)
2460                 return r;
2461
2462         if (le64toh(o->data.n_entries) <= 0)
2463                 return 0;
2464
2465         if (from) {
2466                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2467                 if (r < 0)
2468                         return r;
2469
2470                 *from = le64toh(o->entry.monotonic);
2471         }
2472
2473         if (to) {
2474                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2475                 if (r < 0)
2476                         return r;
2477
2478                 r = generic_array_get_plus_one(f,
2479                                                le64toh(o->data.entry_offset),
2480                                                le64toh(o->data.entry_array_offset),
2481                                                le64toh(o->data.n_entries)-1,
2482                                                &o, NULL);
2483                 if (r <= 0)
2484                         return r;
2485
2486                 *to = le64toh(o->entry.monotonic);
2487         }
2488
2489         return 1;
2490 }
2491
2492 bool journal_file_rotate_suggested(JournalFile *f) {
2493         assert(f);
2494
2495         /* If we gained new header fields we gained new features,
2496          * hence suggest a rotation */
2497         if (le64toh(f->header->header_size) < sizeof(Header)) {
2498                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2499                 return true;
2500         }
2501
2502         /* Let's check if the hash tables grew over a certain fill
2503          * level (75%, borrowing this value from Java's hash table
2504          * implementation), and if so suggest a rotation. To calculate
2505          * the fill level we need the n_data field, which only exists
2506          * in newer versions. */
2507
2508         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2509                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2510                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2511                                   f->path,
2512                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2513                                   (unsigned long long) le64toh(f->header->n_data),
2514                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2515                                   (unsigned long long) (f->last_stat.st_size),
2516                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2517                         return true;
2518                 }
2519
2520         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2521                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2522                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2523                                   f->path,
2524                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2525                                   (unsigned long long) le64toh(f->header->n_fields),
2526                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2527                         return true;
2528                 }
2529
2530         return false;
2531 }