chiark / gitweb /
sd-journal: properly parse cursor strings
[elogind.git] / src / journal / journal-file.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4   This file is part of systemd.
5
6   Copyright 2011 Lennart Poettering
7
8   systemd is free software; you can redistribute it and/or modify it
9   under the terms of the GNU Lesser General Public License as published by
10   the Free Software Foundation; either version 2.1 of the License, or
11   (at your option) any later version.
12
13   systemd is distributed in the hope that it will be useful, but
14   WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17
18   You should have received a copy of the GNU Lesser General Public License
19   along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mman.h>
23 #include <errno.h>
24 #include <sys/uio.h>
25 #include <unistd.h>
26 #include <sys/statvfs.h>
27 #include <fcntl.h>
28 #include <stddef.h>
29
30 #include "journal-def.h"
31 #include "journal-file.h"
32 #include "journal-authenticate.h"
33 #include "lookup3.h"
34 #include "compress.h"
35 #include "fsprg.h"
36
37 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
38 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
39
40 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
41
42 /* This is the minimum journal file size */
43 #define JOURNAL_FILE_SIZE_MIN (64ULL*1024ULL)                  /* 64 KiB */
44
45 /* These are the lower and upper bounds if we deduce the max_use value
46  * from the file system size */
47 #define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL)           /* 1 MiB */
48 #define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL)   /* 4 GiB */
49
50 /* This is the upper bound if we deduce max_size from max_use */
51 #define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL)        /* 128 MiB */
52
53 /* This is the upper bound if we deduce the keep_free value from the
54  * file system size */
55 #define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
56
57 /* This is the keep_free value when we can't determine the system
58  * size */
59 #define DEFAULT_KEEP_FREE (1024ULL*1024ULL)                    /* 1 MB */
60
61 /* n_data was the first entry we added after the initial file format design */
62 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
63
64 void journal_file_close(JournalFile *f) {
65         assert(f);
66
67 #ifdef HAVE_GCRYPT
68         /* Write the final tag */
69         if (f->seal && f->writable)
70                 journal_file_append_tag(f);
71 #endif
72
73         /* Sync everything to disk, before we mark the file offline */
74         if (f->mmap && f->fd >= 0)
75                 mmap_cache_close_fd(f->mmap, f->fd);
76
77         if (f->writable && f->fd >= 0)
78                 fdatasync(f->fd);
79
80         if (f->header) {
81                 /* Mark the file offline. Don't override the archived state if it already is set */
82                 if (f->writable && f->header->state == STATE_ONLINE)
83                         f->header->state = STATE_OFFLINE;
84
85                 munmap(f->header, PAGE_ALIGN(sizeof(Header)));
86         }
87
88         if (f->fd >= 0)
89                 close_nointr_nofail(f->fd);
90
91         free(f->path);
92
93         if (f->mmap)
94                 mmap_cache_unref(f->mmap);
95
96 #ifdef HAVE_XZ
97         free(f->compress_buffer);
98 #endif
99
100 #ifdef HAVE_GCRYPT
101         if (f->fss_file)
102                 munmap(f->fss_file, PAGE_ALIGN(f->fss_file_size));
103         else if (f->fsprg_state)
104                 free(f->fsprg_state);
105
106         free(f->fsprg_seed);
107
108         if (f->hmac)
109                 gcry_md_close(f->hmac);
110 #endif
111
112         free(f);
113 }
114
115 static int journal_file_init_header(JournalFile *f, JournalFile *template) {
116         Header h;
117         ssize_t k;
118         int r;
119
120         assert(f);
121
122         zero(h);
123         memcpy(h.signature, HEADER_SIGNATURE, 8);
124         h.header_size = htole64(ALIGN64(sizeof(h)));
125
126         h.incompatible_flags =
127                 htole32(f->compress ? HEADER_INCOMPATIBLE_COMPRESSED : 0);
128
129         h.compatible_flags =
130                 htole32(f->seal ? HEADER_COMPATIBLE_SEALED : 0);
131
132         r = sd_id128_randomize(&h.file_id);
133         if (r < 0)
134                 return r;
135
136         if (template) {
137                 h.seqnum_id = template->header->seqnum_id;
138                 h.tail_entry_seqnum = template->header->tail_entry_seqnum;
139         } else
140                 h.seqnum_id = h.file_id;
141
142         k = pwrite(f->fd, &h, sizeof(h), 0);
143         if (k < 0)
144                 return -errno;
145
146         if (k != sizeof(h))
147                 return -EIO;
148
149         return 0;
150 }
151
152 static int journal_file_refresh_header(JournalFile *f) {
153         int r;
154         sd_id128_t boot_id;
155
156         assert(f);
157
158         r = sd_id128_get_machine(&f->header->machine_id);
159         if (r < 0)
160                 return r;
161
162         r = sd_id128_get_boot(&boot_id);
163         if (r < 0)
164                 return r;
165
166         if (sd_id128_equal(boot_id, f->header->boot_id))
167                 f->tail_entry_monotonic_valid = true;
168
169         f->header->boot_id = boot_id;
170
171         f->header->state = STATE_ONLINE;
172
173         /* Sync the online state to disk */
174         msync(f->header, PAGE_ALIGN(sizeof(Header)), MS_SYNC);
175         fdatasync(f->fd);
176
177         return 0;
178 }
179
180 static int journal_file_verify_header(JournalFile *f) {
181         assert(f);
182
183         if (memcmp(f->header->signature, HEADER_SIGNATURE, 8))
184                 return -EBADMSG;
185
186         /* In both read and write mode we refuse to open files with
187          * incompatible flags we don't know */
188 #ifdef HAVE_XZ
189         if ((le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) != 0)
190                 return -EPROTONOSUPPORT;
191 #else
192         if (f->header->incompatible_flags != 0)
193                 return -EPROTONOSUPPORT;
194 #endif
195
196         /* When open for writing we refuse to open files with
197          * compatible flags, too */
198         if (f->writable) {
199 #ifdef HAVE_GCRYPT
200                 if ((le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) != 0)
201                         return -EPROTONOSUPPORT;
202 #else
203                 if (f->header->compatible_flags != 0)
204                         return -EPROTONOSUPPORT;
205 #endif
206         }
207
208         if (f->header->state >= _STATE_MAX)
209                 return -EBADMSG;
210
211         /* The first addition was n_data, so check that we are at least this large */
212         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
213                 return -EBADMSG;
214
215         if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
216                 return -EBADMSG;
217
218         if ((le64toh(f->header->header_size) + le64toh(f->header->arena_size)) > (uint64_t) f->last_stat.st_size)
219                 return -ENODATA;
220
221         if (le64toh(f->header->tail_object_offset) > (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
222                 return -ENODATA;
223
224         if (!VALID64(le64toh(f->header->data_hash_table_offset)) ||
225             !VALID64(le64toh(f->header->field_hash_table_offset)) ||
226             !VALID64(le64toh(f->header->tail_object_offset)) ||
227             !VALID64(le64toh(f->header->entry_array_offset)))
228                 return -ENODATA;
229
230         if (le64toh(f->header->data_hash_table_offset) < le64toh(f->header->header_size) ||
231             le64toh(f->header->field_hash_table_offset) < le64toh(f->header->header_size) ||
232             le64toh(f->header->tail_object_offset) < le64toh(f->header->header_size) ||
233             le64toh(f->header->entry_array_offset) < le64toh(f->header->header_size))
234                 return -ENODATA;
235
236         if (f->writable) {
237                 uint8_t state;
238                 sd_id128_t machine_id;
239                 int r;
240
241                 r = sd_id128_get_machine(&machine_id);
242                 if (r < 0)
243                         return r;
244
245                 if (!sd_id128_equal(machine_id, f->header->machine_id))
246                         return -EHOSTDOWN;
247
248                 state = f->header->state;
249
250                 if (state == STATE_ONLINE) {
251                         log_debug("Journal file %s is already online. Assuming unclean closing.", f->path);
252                         return -EBUSY;
253                 } else if (state == STATE_ARCHIVED)
254                         return -ESHUTDOWN;
255                 else if (state != STATE_OFFLINE) {
256                         log_debug("Journal file %s has unknown state %u.", f->path, state);
257                         return -EBUSY;
258                 }
259         }
260
261         f->compress = JOURNAL_HEADER_COMPRESSED(f->header);
262
263         f->seal = JOURNAL_HEADER_SEALED(f->header);
264
265         return 0;
266 }
267
268 static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
269         uint64_t old_size, new_size;
270         int r;
271
272         assert(f);
273
274         /* We assume that this file is not sparse, and we know that
275          * for sure, since we always call posix_fallocate()
276          * ourselves */
277
278         old_size =
279                 le64toh(f->header->header_size) +
280                 le64toh(f->header->arena_size);
281
282         new_size = PAGE_ALIGN(offset + size);
283         if (new_size < le64toh(f->header->header_size))
284                 new_size = le64toh(f->header->header_size);
285
286         if (new_size <= old_size)
287                 return 0;
288
289         if (f->metrics.max_size > 0 &&
290             new_size > f->metrics.max_size)
291                 return -E2BIG;
292
293         if (new_size > f->metrics.min_size &&
294             f->metrics.keep_free > 0) {
295                 struct statvfs svfs;
296
297                 if (fstatvfs(f->fd, &svfs) >= 0) {
298                         uint64_t available;
299
300                         available = svfs.f_bfree * svfs.f_bsize;
301
302                         if (available >= f->metrics.keep_free)
303                                 available -= f->metrics.keep_free;
304                         else
305                                 available = 0;
306
307                         if (new_size - old_size > available)
308                                 return -E2BIG;
309                 }
310         }
311
312         /* Note that the glibc fallocate() fallback is very
313            inefficient, hence we try to minimize the allocation area
314            as we can. */
315         r = posix_fallocate(f->fd, old_size, new_size - old_size);
316         if (r != 0)
317                 return -r;
318
319         if (fstat(f->fd, &f->last_stat) < 0)
320                 return -errno;
321
322         f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
323
324         return 0;
325 }
326
327 static int journal_file_move_to(JournalFile *f, int context, bool keep_always, uint64_t offset, uint64_t size, void **ret) {
328         assert(f);
329         assert(ret);
330
331         if (size <= 0)
332                 return -EINVAL;
333
334         /* Avoid SIGBUS on invalid accesses */
335         if (offset + size > (uint64_t) f->last_stat.st_size) {
336                 /* Hmm, out of range? Let's refresh the fstat() data
337                  * first, before we trust that check. */
338
339                 if (fstat(f->fd, &f->last_stat) < 0 ||
340                     offset + size > (uint64_t) f->last_stat.st_size)
341                         return -EADDRNOTAVAIL;
342         }
343
344         return mmap_cache_get(f->mmap, f->fd, f->prot, context, keep_always, offset, size, &f->last_stat, ret);
345 }
346
347 static uint64_t minimum_header_size(Object *o) {
348
349         static uint64_t table[] = {
350                 [OBJECT_DATA] = sizeof(DataObject),
351                 [OBJECT_FIELD] = sizeof(FieldObject),
352                 [OBJECT_ENTRY] = sizeof(EntryObject),
353                 [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
354                 [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
355                 [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
356                 [OBJECT_TAG] = sizeof(TagObject),
357         };
358
359         if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
360                 return sizeof(ObjectHeader);
361
362         return table[o->object.type];
363 }
364
365 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
366         int r;
367         void *t;
368         Object *o;
369         uint64_t s;
370         unsigned context;
371
372         assert(f);
373         assert(ret);
374
375         /* Objects may only be located at multiple of 64 bit */
376         if (!VALID64(offset))
377                 return -EFAULT;
378
379         /* One context for each type, plus one catch-all for the rest */
380         context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
381
382         r = journal_file_move_to(f, context, false, offset, sizeof(ObjectHeader), &t);
383         if (r < 0)
384                 return r;
385
386         o = (Object*) t;
387         s = le64toh(o->object.size);
388
389         if (s < sizeof(ObjectHeader))
390                 return -EBADMSG;
391
392         if (o->object.type <= OBJECT_UNUSED)
393                 return -EBADMSG;
394
395         if (s < minimum_header_size(o))
396                 return -EBADMSG;
397
398         if (type >= 0 && o->object.type != type)
399                 return -EBADMSG;
400
401         if (s > sizeof(ObjectHeader)) {
402                 r = journal_file_move_to(f, o->object.type, false, offset, s, &t);
403                 if (r < 0)
404                         return r;
405
406                 o = (Object*) t;
407         }
408
409         *ret = o;
410         return 0;
411 }
412
413 static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
414         uint64_t r;
415
416         assert(f);
417
418         r = le64toh(f->header->tail_entry_seqnum) + 1;
419
420         if (seqnum) {
421                 /* If an external seqnum counter was passed, we update
422                  * both the local and the external one, and set it to
423                  * the maximum of both */
424
425                 if (*seqnum + 1 > r)
426                         r = *seqnum + 1;
427
428                 *seqnum = r;
429         }
430
431         f->header->tail_entry_seqnum = htole64(r);
432
433         if (f->header->head_entry_seqnum == 0)
434                 f->header->head_entry_seqnum = htole64(r);
435
436         return r;
437 }
438
439 int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
440         int r;
441         uint64_t p;
442         Object *tail, *o;
443         void *t;
444
445         assert(f);
446         assert(type > 0 && type < _OBJECT_TYPE_MAX);
447         assert(size >= sizeof(ObjectHeader));
448         assert(offset);
449         assert(ret);
450
451         p = le64toh(f->header->tail_object_offset);
452         if (p == 0)
453                 p = le64toh(f->header->header_size);
454         else {
455                 r = journal_file_move_to_object(f, -1, p, &tail);
456                 if (r < 0)
457                         return r;
458
459                 p += ALIGN64(le64toh(tail->object.size));
460         }
461
462         r = journal_file_allocate(f, p, size);
463         if (r < 0)
464                 return r;
465
466         r = journal_file_move_to(f, type, false, p, size, &t);
467         if (r < 0)
468                 return r;
469
470         o = (Object*) t;
471
472         zero(o->object);
473         o->object.type = type;
474         o->object.size = htole64(size);
475
476         f->header->tail_object_offset = htole64(p);
477         f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
478
479         *ret = o;
480         *offset = p;
481
482         return 0;
483 }
484
485 static int journal_file_setup_data_hash_table(JournalFile *f) {
486         uint64_t s, p;
487         Object *o;
488         int r;
489
490         assert(f);
491
492         /* We estimate that we need 1 hash table entry per 768 of
493            journal file and we want to make sure we never get beyond
494            75% fill level. Calculate the hash table size for the
495            maximum file size based on these metrics. */
496
497         s = (f->metrics.max_size * 4 / 768 / 3) * sizeof(HashItem);
498         if (s < DEFAULT_DATA_HASH_TABLE_SIZE)
499                 s = DEFAULT_DATA_HASH_TABLE_SIZE;
500
501         log_debug("Reserving %llu entries in hash table.", (unsigned long long) (s / sizeof(HashItem)));
502
503         r = journal_file_append_object(f,
504                                        OBJECT_DATA_HASH_TABLE,
505                                        offsetof(Object, hash_table.items) + s,
506                                        &o, &p);
507         if (r < 0)
508                 return r;
509
510         memset(o->hash_table.items, 0, s);
511
512         f->header->data_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
513         f->header->data_hash_table_size = htole64(s);
514
515         return 0;
516 }
517
518 static int journal_file_setup_field_hash_table(JournalFile *f) {
519         uint64_t s, p;
520         Object *o;
521         int r;
522
523         assert(f);
524
525         s = DEFAULT_FIELD_HASH_TABLE_SIZE;
526         r = journal_file_append_object(f,
527                                        OBJECT_FIELD_HASH_TABLE,
528                                        offsetof(Object, hash_table.items) + s,
529                                        &o, &p);
530         if (r < 0)
531                 return r;
532
533         memset(o->hash_table.items, 0, s);
534
535         f->header->field_hash_table_offset = htole64(p + offsetof(Object, hash_table.items));
536         f->header->field_hash_table_size = htole64(s);
537
538         return 0;
539 }
540
541 static int journal_file_map_data_hash_table(JournalFile *f) {
542         uint64_t s, p;
543         void *t;
544         int r;
545
546         assert(f);
547
548         p = le64toh(f->header->data_hash_table_offset);
549         s = le64toh(f->header->data_hash_table_size);
550
551         r = journal_file_move_to(f,
552                                  OBJECT_DATA_HASH_TABLE,
553                                  true,
554                                  p, s,
555                                  &t);
556         if (r < 0)
557                 return r;
558
559         f->data_hash_table = t;
560         return 0;
561 }
562
563 static int journal_file_map_field_hash_table(JournalFile *f) {
564         uint64_t s, p;
565         void *t;
566         int r;
567
568         assert(f);
569
570         p = le64toh(f->header->field_hash_table_offset);
571         s = le64toh(f->header->field_hash_table_size);
572
573         r = journal_file_move_to(f,
574                                  OBJECT_FIELD_HASH_TABLE,
575                                  true,
576                                  p, s,
577                                  &t);
578         if (r < 0)
579                 return r;
580
581         f->field_hash_table = t;
582         return 0;
583 }
584
585 static int journal_file_link_data(JournalFile *f, Object *o, uint64_t offset, uint64_t hash) {
586         uint64_t p, h;
587         int r;
588
589         assert(f);
590         assert(o);
591         assert(offset > 0);
592
593         if (o->object.type != OBJECT_DATA)
594                 return -EINVAL;
595
596         /* This might alter the window we are looking at */
597
598         o->data.next_hash_offset = o->data.next_field_offset = 0;
599         o->data.entry_offset = o->data.entry_array_offset = 0;
600         o->data.n_entries = 0;
601
602         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
603         p = le64toh(f->data_hash_table[h].tail_hash_offset);
604         if (p == 0) {
605                 /* Only entry in the hash table is easy */
606                 f->data_hash_table[h].head_hash_offset = htole64(offset);
607         } else {
608                 /* Move back to the previous data object, to patch in
609                  * pointer */
610
611                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
612                 if (r < 0)
613                         return r;
614
615                 o->data.next_hash_offset = htole64(offset);
616         }
617
618         f->data_hash_table[h].tail_hash_offset = htole64(offset);
619
620         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
621                 f->header->n_data = htole64(le64toh(f->header->n_data) + 1);
622
623         return 0;
624 }
625
626 int journal_file_find_data_object_with_hash(
627                 JournalFile *f,
628                 const void *data, uint64_t size, uint64_t hash,
629                 Object **ret, uint64_t *offset) {
630
631         uint64_t p, osize, h;
632         int r;
633
634         assert(f);
635         assert(data || size == 0);
636
637         osize = offsetof(Object, data.payload) + size;
638
639         if (f->header->data_hash_table_size == 0)
640                 return -EBADMSG;
641
642         h = hash % (le64toh(f->header->data_hash_table_size) / sizeof(HashItem));
643         p = le64toh(f->data_hash_table[h].head_hash_offset);
644
645         while (p > 0) {
646                 Object *o;
647
648                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
649                 if (r < 0)
650                         return r;
651
652                 if (le64toh(o->data.hash) != hash)
653                         goto next;
654
655                 if (o->object.flags & OBJECT_COMPRESSED) {
656 #ifdef HAVE_XZ
657                         uint64_t l, rsize;
658
659                         l = le64toh(o->object.size);
660                         if (l <= offsetof(Object, data.payload))
661                                 return -EBADMSG;
662
663                         l -= offsetof(Object, data.payload);
664
665                         if (!uncompress_blob(o->data.payload, l, &f->compress_buffer, &f->compress_buffer_size, &rsize))
666                                 return -EBADMSG;
667
668                         if (rsize == size &&
669                             memcmp(f->compress_buffer, data, size) == 0) {
670
671                                 if (ret)
672                                         *ret = o;
673
674                                 if (offset)
675                                         *offset = p;
676
677                                 return 1;
678                         }
679 #else
680                         return -EPROTONOSUPPORT;
681 #endif
682
683                 } else if (le64toh(o->object.size) == osize &&
684                            memcmp(o->data.payload, data, size) == 0) {
685
686                         if (ret)
687                                 *ret = o;
688
689                         if (offset)
690                                 *offset = p;
691
692                         return 1;
693                 }
694
695         next:
696                 p = le64toh(o->data.next_hash_offset);
697         }
698
699         return 0;
700 }
701
702 int journal_file_find_data_object(
703                 JournalFile *f,
704                 const void *data, uint64_t size,
705                 Object **ret, uint64_t *offset) {
706
707         uint64_t hash;
708
709         assert(f);
710         assert(data || size == 0);
711
712         hash = hash64(data, size);
713
714         return journal_file_find_data_object_with_hash(f,
715                                                        data, size, hash,
716                                                        ret, offset);
717 }
718
719 static int journal_file_append_data(
720                 JournalFile *f,
721                 const void *data, uint64_t size,
722                 Object **ret, uint64_t *offset) {
723
724         uint64_t hash, p;
725         uint64_t osize;
726         Object *o;
727         int r;
728         bool compressed = false;
729
730         assert(f);
731         assert(data || size == 0);
732
733         hash = hash64(data, size);
734
735         r = journal_file_find_data_object_with_hash(f, data, size, hash, &o, &p);
736         if (r < 0)
737                 return r;
738         else if (r > 0) {
739
740                 if (ret)
741                         *ret = o;
742
743                 if (offset)
744                         *offset = p;
745
746                 return 0;
747         }
748
749         osize = offsetof(Object, data.payload) + size;
750         r = journal_file_append_object(f, OBJECT_DATA, osize, &o, &p);
751         if (r < 0)
752                 return r;
753
754         o->data.hash = htole64(hash);
755
756 #ifdef HAVE_XZ
757         if (f->compress &&
758             size >= COMPRESSION_SIZE_THRESHOLD) {
759                 uint64_t rsize;
760
761                 compressed = compress_blob(data, size, o->data.payload, &rsize);
762
763                 if (compressed) {
764                         o->object.size = htole64(offsetof(Object, data.payload) + rsize);
765                         o->object.flags |= OBJECT_COMPRESSED;
766
767                         log_debug("Compressed data object %lu -> %lu", (unsigned long) size, (unsigned long) rsize);
768                 }
769         }
770 #endif
771
772         if (!compressed && size > 0)
773                 memcpy(o->data.payload, data, size);
774
775         r = journal_file_link_data(f, o, p, hash);
776         if (r < 0)
777                 return r;
778
779         /* The linking might have altered the window, so let's
780          * refresh our pointer */
781         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
782         if (r < 0)
783                 return r;
784
785 #ifdef HAVE_GCRYPT
786         r = journal_file_hmac_put_object(f, OBJECT_DATA, o, p);
787         if (r < 0)
788                 return r;
789 #endif
790
791         if (ret)
792                 *ret = o;
793
794         if (offset)
795                 *offset = p;
796
797         return 0;
798 }
799
800 uint64_t journal_file_entry_n_items(Object *o) {
801         assert(o);
802
803         if (o->object.type != OBJECT_ENTRY)
804                 return 0;
805
806         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
807 }
808
809 uint64_t journal_file_entry_array_n_items(Object *o) {
810         assert(o);
811
812         if (o->object.type != OBJECT_ENTRY_ARRAY)
813                 return 0;
814
815         return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
816 }
817
818 uint64_t journal_file_hash_table_n_items(Object *o) {
819         assert(o);
820
821         if (o->object.type != OBJECT_DATA_HASH_TABLE &&
822             o->object.type != OBJECT_FIELD_HASH_TABLE)
823                 return 0;
824
825         return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
826 }
827
828 static int link_entry_into_array(JournalFile *f,
829                                  le64_t *first,
830                                  le64_t *idx,
831                                  uint64_t p) {
832         int r;
833         uint64_t n = 0, ap = 0, q, i, a, hidx;
834         Object *o;
835
836         assert(f);
837         assert(first);
838         assert(idx);
839         assert(p > 0);
840
841         a = le64toh(*first);
842         i = hidx = le64toh(*idx);
843         while (a > 0) {
844
845                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
846                 if (r < 0)
847                         return r;
848
849                 n = journal_file_entry_array_n_items(o);
850                 if (i < n) {
851                         o->entry_array.items[i] = htole64(p);
852                         *idx = htole64(hidx + 1);
853                         return 0;
854                 }
855
856                 i -= n;
857                 ap = a;
858                 a = le64toh(o->entry_array.next_entry_array_offset);
859         }
860
861         if (hidx > n)
862                 n = (hidx+1) * 2;
863         else
864                 n = n * 2;
865
866         if (n < 4)
867                 n = 4;
868
869         r = journal_file_append_object(f, OBJECT_ENTRY_ARRAY,
870                                        offsetof(Object, entry_array.items) + n * sizeof(uint64_t),
871                                        &o, &q);
872         if (r < 0)
873                 return r;
874
875 #ifdef HAVE_GCRYPT
876         r = journal_file_hmac_put_object(f, OBJECT_ENTRY_ARRAY, o, q);
877         if (r < 0)
878                 return r;
879 #endif
880
881         o->entry_array.items[i] = htole64(p);
882
883         if (ap == 0)
884                 *first = htole64(q);
885         else {
886                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, ap, &o);
887                 if (r < 0)
888                         return r;
889
890                 o->entry_array.next_entry_array_offset = htole64(q);
891         }
892
893         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
894                 f->header->n_entry_arrays = htole64(le64toh(f->header->n_entry_arrays) + 1);
895
896         *idx = htole64(hidx + 1);
897
898         return 0;
899 }
900
901 static int link_entry_into_array_plus_one(JournalFile *f,
902                                           le64_t *extra,
903                                           le64_t *first,
904                                           le64_t *idx,
905                                           uint64_t p) {
906
907         int r;
908
909         assert(f);
910         assert(extra);
911         assert(first);
912         assert(idx);
913         assert(p > 0);
914
915         if (*idx == 0)
916                 *extra = htole64(p);
917         else {
918                 le64_t i;
919
920                 i = htole64(le64toh(*idx) - 1);
921                 r = link_entry_into_array(f, first, &i, p);
922                 if (r < 0)
923                         return r;
924         }
925
926         *idx = htole64(le64toh(*idx) + 1);
927         return 0;
928 }
929
930 static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
931         uint64_t p;
932         int r;
933         assert(f);
934         assert(o);
935         assert(offset > 0);
936
937         p = le64toh(o->entry.items[i].object_offset);
938         if (p == 0)
939                 return -EINVAL;
940
941         r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
942         if (r < 0)
943                 return r;
944
945         return link_entry_into_array_plus_one(f,
946                                               &o->data.entry_offset,
947                                               &o->data.entry_array_offset,
948                                               &o->data.n_entries,
949                                               offset);
950 }
951
952 static int journal_file_link_entry(JournalFile *f, Object *o, uint64_t offset) {
953         uint64_t n, i;
954         int r;
955
956         assert(f);
957         assert(o);
958         assert(offset > 0);
959
960         if (o->object.type != OBJECT_ENTRY)
961                 return -EINVAL;
962
963         __sync_synchronize();
964
965         /* Link up the entry itself */
966         r = link_entry_into_array(f,
967                                   &f->header->entry_array_offset,
968                                   &f->header->n_entries,
969                                   offset);
970         if (r < 0)
971                 return r;
972
973         /* log_debug("=> %s seqnr=%lu n_entries=%lu", f->path, (unsigned long) o->entry.seqnum, (unsigned long) f->header->n_entries); */
974
975         if (f->header->head_entry_realtime == 0)
976                 f->header->head_entry_realtime = o->entry.realtime;
977
978         f->header->tail_entry_realtime = o->entry.realtime;
979         f->header->tail_entry_monotonic = o->entry.monotonic;
980
981         f->tail_entry_monotonic_valid = true;
982
983         /* Link up the items */
984         n = journal_file_entry_n_items(o);
985         for (i = 0; i < n; i++) {
986                 r = journal_file_link_entry_item(f, o, offset, i);
987                 if (r < 0)
988                         return r;
989         }
990
991         return 0;
992 }
993
994 static int journal_file_append_entry_internal(
995                 JournalFile *f,
996                 const dual_timestamp *ts,
997                 uint64_t xor_hash,
998                 const EntryItem items[], unsigned n_items,
999                 uint64_t *seqnum,
1000                 Object **ret, uint64_t *offset) {
1001         uint64_t np;
1002         uint64_t osize;
1003         Object *o;
1004         int r;
1005
1006         assert(f);
1007         assert(items || n_items == 0);
1008         assert(ts);
1009
1010         osize = offsetof(Object, entry.items) + (n_items * sizeof(EntryItem));
1011
1012         r = journal_file_append_object(f, OBJECT_ENTRY, osize, &o, &np);
1013         if (r < 0)
1014                 return r;
1015
1016         o->entry.seqnum = htole64(journal_file_entry_seqnum(f, seqnum));
1017         memcpy(o->entry.items, items, n_items * sizeof(EntryItem));
1018         o->entry.realtime = htole64(ts->realtime);
1019         o->entry.monotonic = htole64(ts->monotonic);
1020         o->entry.xor_hash = htole64(xor_hash);
1021         o->entry.boot_id = f->header->boot_id;
1022
1023 #ifdef HAVE_GCRYPT
1024         r = journal_file_hmac_put_object(f, OBJECT_ENTRY, o, np);
1025         if (r < 0)
1026                 return r;
1027 #endif
1028
1029         r = journal_file_link_entry(f, o, np);
1030         if (r < 0)
1031                 return r;
1032
1033         if (ret)
1034                 *ret = o;
1035
1036         if (offset)
1037                 *offset = np;
1038
1039         return 0;
1040 }
1041
1042 void journal_file_post_change(JournalFile *f) {
1043         assert(f);
1044
1045         /* inotify() does not receive IN_MODIFY events from file
1046          * accesses done via mmap(). After each access we hence
1047          * trigger IN_MODIFY by truncating the journal file to its
1048          * current size which triggers IN_MODIFY. */
1049
1050         __sync_synchronize();
1051
1052         if (ftruncate(f->fd, f->last_stat.st_size) < 0)
1053                 log_error("Failed to truncate file to its own size: %m");
1054 }
1055
1056 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqnum, Object **ret, uint64_t *offset) {
1057         unsigned i;
1058         EntryItem *items;
1059         int r;
1060         uint64_t xor_hash = 0;
1061         struct dual_timestamp _ts;
1062
1063         assert(f);
1064         assert(iovec || n_iovec == 0);
1065
1066         if (!f->writable)
1067                 return -EPERM;
1068
1069         if (!ts) {
1070                 dual_timestamp_get(&_ts);
1071                 ts = &_ts;
1072         }
1073
1074         if (f->tail_entry_monotonic_valid &&
1075             ts->monotonic < le64toh(f->header->tail_entry_monotonic))
1076                 return -EINVAL;
1077
1078 #ifdef HAVE_GCRYPT
1079         r = journal_file_maybe_append_tag(f, ts->realtime);
1080         if (r < 0)
1081                 return r;
1082 #endif
1083
1084         /* alloca() can't take 0, hence let's allocate at least one */
1085         items = alloca(sizeof(EntryItem) * MAX(1, n_iovec));
1086
1087         for (i = 0; i < n_iovec; i++) {
1088                 uint64_t p;
1089                 Object *o;
1090
1091                 r = journal_file_append_data(f, iovec[i].iov_base, iovec[i].iov_len, &o, &p);
1092                 if (r < 0)
1093                         return r;
1094
1095                 xor_hash ^= le64toh(o->data.hash);
1096                 items[i].object_offset = htole64(p);
1097                 items[i].hash = o->data.hash;
1098         }
1099
1100         r = journal_file_append_entry_internal(f, ts, xor_hash, items, n_iovec, seqnum, ret, offset);
1101
1102         journal_file_post_change(f);
1103
1104         return r;
1105 }
1106
1107 static int generic_array_get(JournalFile *f,
1108                              uint64_t first,
1109                              uint64_t i,
1110                              Object **ret, uint64_t *offset) {
1111
1112         Object *o;
1113         uint64_t p = 0, a;
1114         int r;
1115
1116         assert(f);
1117
1118         a = first;
1119         while (a > 0) {
1120                 uint64_t n;
1121
1122                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
1123                 if (r < 0)
1124                         return r;
1125
1126                 n = journal_file_entry_array_n_items(o);
1127                 if (i < n) {
1128                         p = le64toh(o->entry_array.items[i]);
1129                         break;
1130                 }
1131
1132                 i -= n;
1133                 a = le64toh(o->entry_array.next_entry_array_offset);
1134         }
1135
1136         if (a <= 0 || p <= 0)
1137                 return 0;
1138
1139         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1140         if (r < 0)
1141                 return r;
1142
1143         if (ret)
1144                 *ret = o;
1145
1146         if (offset)
1147                 *offset = p;
1148
1149         return 1;
1150 }
1151
1152 static int generic_array_get_plus_one(JournalFile *f,
1153                                       uint64_t extra,
1154                                       uint64_t first,
1155                                       uint64_t i,
1156                                       Object **ret, uint64_t *offset) {
1157
1158         Object *o;
1159
1160         assert(f);
1161
1162         if (i == 0) {
1163                 int r;
1164
1165                 r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1166                 if (r < 0)
1167                         return r;
1168
1169                 if (ret)
1170                         *ret = o;
1171
1172                 if (offset)
1173                         *offset = extra;
1174
1175                 return 1;
1176         }
1177
1178         return generic_array_get(f, first, i-1, ret, offset);
1179 }
1180
1181 enum {
1182         TEST_FOUND,
1183         TEST_LEFT,
1184         TEST_RIGHT
1185 };
1186
1187 static int generic_array_bisect(JournalFile *f,
1188                                 uint64_t first,
1189                                 uint64_t n,
1190                                 uint64_t needle,
1191                                 int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1192                                 direction_t direction,
1193                                 Object **ret,
1194                                 uint64_t *offset,
1195                                 uint64_t *idx) {
1196
1197         uint64_t a, p, t = 0, i = 0, last_p = 0;
1198         bool subtract_one = false;
1199         Object *o, *array = NULL;
1200         int r;
1201
1202         assert(f);
1203         assert(test_object);
1204
1205         a = first;
1206         while (a > 0) {
1207                 uint64_t left, right, k, lp;
1208
1209                 r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &array);
1210                 if (r < 0)
1211                         return r;
1212
1213                 k = journal_file_entry_array_n_items(array);
1214                 right = MIN(k, n);
1215                 if (right <= 0)
1216                         return 0;
1217
1218                 i = right - 1;
1219                 lp = p = le64toh(array->entry_array.items[i]);
1220                 if (p <= 0)
1221                         return -EBADMSG;
1222
1223                 r = test_object(f, p, needle);
1224                 if (r < 0)
1225                         return r;
1226
1227                 if (r == TEST_FOUND)
1228                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1229
1230                 if (r == TEST_RIGHT) {
1231                         left = 0;
1232                         right -= 1;
1233                         for (;;) {
1234                                 if (left == right) {
1235                                         if (direction == DIRECTION_UP)
1236                                                 subtract_one = true;
1237
1238                                         i = left;
1239                                         goto found;
1240                                 }
1241
1242                                 assert(left < right);
1243
1244                                 i = (left + right) / 2;
1245                                 p = le64toh(array->entry_array.items[i]);
1246                                 if (p <= 0)
1247                                         return -EBADMSG;
1248
1249                                 r = test_object(f, p, needle);
1250                                 if (r < 0)
1251                                         return r;
1252
1253                                 if (r == TEST_FOUND)
1254                                         r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1255
1256                                 if (r == TEST_RIGHT)
1257                                         right = i;
1258                                 else
1259                                         left = i + 1;
1260                         }
1261                 }
1262
1263                 if (k > n) {
1264                         if (direction == DIRECTION_UP) {
1265                                 i = n;
1266                                 subtract_one = true;
1267                                 goto found;
1268                         }
1269
1270                         return 0;
1271                 }
1272
1273                 last_p = lp;
1274
1275                 n -= k;
1276                 t += k;
1277                 a = le64toh(array->entry_array.next_entry_array_offset);
1278         }
1279
1280         return 0;
1281
1282 found:
1283         if (subtract_one && t == 0 && i == 0)
1284                 return 0;
1285
1286         if (subtract_one && i == 0)
1287                 p = last_p;
1288         else if (subtract_one)
1289                 p = le64toh(array->entry_array.items[i-1]);
1290         else
1291                 p = le64toh(array->entry_array.items[i]);
1292
1293         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1294         if (r < 0)
1295                 return r;
1296
1297         if (ret)
1298                 *ret = o;
1299
1300         if (offset)
1301                 *offset = p;
1302
1303         if (idx)
1304                 *idx = t + i + (subtract_one ? -1 : 0);
1305
1306         return 1;
1307 }
1308
1309 static int generic_array_bisect_plus_one(JournalFile *f,
1310                                          uint64_t extra,
1311                                          uint64_t first,
1312                                          uint64_t n,
1313                                          uint64_t needle,
1314                                          int (*test_object)(JournalFile *f, uint64_t p, uint64_t needle),
1315                                          direction_t direction,
1316                                          Object **ret,
1317                                          uint64_t *offset,
1318                                          uint64_t *idx) {
1319
1320         int r;
1321         bool step_back = false;
1322         Object *o;
1323
1324         assert(f);
1325         assert(test_object);
1326
1327         if (n <= 0)
1328                 return 0;
1329
1330         /* This bisects the array in object 'first', but first checks
1331          * an extra  */
1332         r = test_object(f, extra, needle);
1333         if (r < 0)
1334                 return r;
1335
1336         if (r == TEST_FOUND)
1337                 r = direction == DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT;
1338
1339         /* if we are looking with DIRECTION_UP then we need to first
1340            see if in the actual array there is a matching entry, and
1341            return the last one of that. But if there isn't any we need
1342            to return this one. Hence remember this, and return it
1343            below. */
1344         if (r == TEST_LEFT)
1345                 step_back = direction == DIRECTION_UP;
1346
1347         if (r == TEST_RIGHT) {
1348                 if (direction == DIRECTION_DOWN)
1349                         goto found;
1350                 else
1351                         return 0;
1352         }
1353
1354         r = generic_array_bisect(f, first, n-1, needle, test_object, direction, ret, offset, idx);
1355
1356         if (r == 0 && step_back)
1357                 goto found;
1358
1359         if (r > 0 && idx)
1360                 (*idx) ++;
1361
1362         return r;
1363
1364 found:
1365         r = journal_file_move_to_object(f, OBJECT_ENTRY, extra, &o);
1366         if (r < 0)
1367                 return r;
1368
1369         if (ret)
1370                 *ret = o;
1371
1372         if (offset)
1373                 *offset = extra;
1374
1375         if (idx)
1376                 *idx = 0;
1377
1378         return 1;
1379 }
1380
1381 static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle) {
1382         assert(f);
1383         assert(p > 0);
1384
1385         if (p == needle)
1386                 return TEST_FOUND;
1387         else if (p < needle)
1388                 return TEST_LEFT;
1389         else
1390                 return TEST_RIGHT;
1391 }
1392
1393 int journal_file_move_to_entry_by_offset(
1394                 JournalFile *f,
1395                 uint64_t p,
1396                 direction_t direction,
1397                 Object **ret,
1398                 uint64_t *offset) {
1399
1400         return generic_array_bisect(f,
1401                                     le64toh(f->header->entry_array_offset),
1402                                     le64toh(f->header->n_entries),
1403                                     p,
1404                                     test_object_offset,
1405                                     direction,
1406                                     ret, offset, NULL);
1407 }
1408
1409
1410 static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
1411         Object *o;
1412         int r;
1413
1414         assert(f);
1415         assert(p > 0);
1416
1417         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1418         if (r < 0)
1419                 return r;
1420
1421         if (le64toh(o->entry.seqnum) == needle)
1422                 return TEST_FOUND;
1423         else if (le64toh(o->entry.seqnum) < needle)
1424                 return TEST_LEFT;
1425         else
1426                 return TEST_RIGHT;
1427 }
1428
1429 int journal_file_move_to_entry_by_seqnum(
1430                 JournalFile *f,
1431                 uint64_t seqnum,
1432                 direction_t direction,
1433                 Object **ret,
1434                 uint64_t *offset) {
1435
1436         return generic_array_bisect(f,
1437                                     le64toh(f->header->entry_array_offset),
1438                                     le64toh(f->header->n_entries),
1439                                     seqnum,
1440                                     test_object_seqnum,
1441                                     direction,
1442                                     ret, offset, NULL);
1443 }
1444
1445 static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
1446         Object *o;
1447         int r;
1448
1449         assert(f);
1450         assert(p > 0);
1451
1452         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1453         if (r < 0)
1454                 return r;
1455
1456         if (le64toh(o->entry.realtime) == needle)
1457                 return TEST_FOUND;
1458         else if (le64toh(o->entry.realtime) < needle)
1459                 return TEST_LEFT;
1460         else
1461                 return TEST_RIGHT;
1462 }
1463
1464 int journal_file_move_to_entry_by_realtime(
1465                 JournalFile *f,
1466                 uint64_t realtime,
1467                 direction_t direction,
1468                 Object **ret,
1469                 uint64_t *offset) {
1470
1471         return generic_array_bisect(f,
1472                                     le64toh(f->header->entry_array_offset),
1473                                     le64toh(f->header->n_entries),
1474                                     realtime,
1475                                     test_object_realtime,
1476                                     direction,
1477                                     ret, offset, NULL);
1478 }
1479
1480 static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
1481         Object *o;
1482         int r;
1483
1484         assert(f);
1485         assert(p > 0);
1486
1487         r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
1488         if (r < 0)
1489                 return r;
1490
1491         if (le64toh(o->entry.monotonic) == needle)
1492                 return TEST_FOUND;
1493         else if (le64toh(o->entry.monotonic) < needle)
1494                 return TEST_LEFT;
1495         else
1496                 return TEST_RIGHT;
1497 }
1498
1499 int journal_file_move_to_entry_by_monotonic(
1500                 JournalFile *f,
1501                 sd_id128_t boot_id,
1502                 uint64_t monotonic,
1503                 direction_t direction,
1504                 Object **ret,
1505                 uint64_t *offset) {
1506
1507         char t[9+32+1] = "_BOOT_ID=";
1508         Object *o;
1509         int r;
1510
1511         assert(f);
1512
1513         sd_id128_to_string(boot_id, t + 9);
1514         r = journal_file_find_data_object(f, t, strlen(t), &o, NULL);
1515         if (r < 0)
1516                 return r;
1517         if (r == 0)
1518                 return -ENOENT;
1519
1520         return generic_array_bisect_plus_one(f,
1521                                              le64toh(o->data.entry_offset),
1522                                              le64toh(o->data.entry_array_offset),
1523                                              le64toh(o->data.n_entries),
1524                                              monotonic,
1525                                              test_object_monotonic,
1526                                              direction,
1527                                              ret, offset, NULL);
1528 }
1529
1530 int journal_file_next_entry(
1531                 JournalFile *f,
1532                 Object *o, uint64_t p,
1533                 direction_t direction,
1534                 Object **ret, uint64_t *offset) {
1535
1536         uint64_t i, n;
1537         int r;
1538
1539         assert(f);
1540         assert(p > 0 || !o);
1541
1542         n = le64toh(f->header->n_entries);
1543         if (n <= 0)
1544                 return 0;
1545
1546         if (!o)
1547                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1548         else {
1549                 if (o->object.type != OBJECT_ENTRY)
1550                         return -EINVAL;
1551
1552                 r = generic_array_bisect(f,
1553                                          le64toh(f->header->entry_array_offset),
1554                                          le64toh(f->header->n_entries),
1555                                          p,
1556                                          test_object_offset,
1557                                          DIRECTION_DOWN,
1558                                          NULL, NULL,
1559                                          &i);
1560                 if (r <= 0)
1561                         return r;
1562
1563                 if (direction == DIRECTION_DOWN) {
1564                         if (i >= n - 1)
1565                                 return 0;
1566
1567                         i++;
1568                 } else {
1569                         if (i <= 0)
1570                                 return 0;
1571
1572                         i--;
1573                 }
1574         }
1575
1576         /* And jump to it */
1577         return generic_array_get(f,
1578                                  le64toh(f->header->entry_array_offset),
1579                                  i,
1580                                  ret, offset);
1581 }
1582
1583 int journal_file_skip_entry(
1584                 JournalFile *f,
1585                 Object *o, uint64_t p,
1586                 int64_t skip,
1587                 Object **ret, uint64_t *offset) {
1588
1589         uint64_t i, n;
1590         int r;
1591
1592         assert(f);
1593         assert(o);
1594         assert(p > 0);
1595
1596         if (o->object.type != OBJECT_ENTRY)
1597                 return -EINVAL;
1598
1599         r = generic_array_bisect(f,
1600                                  le64toh(f->header->entry_array_offset),
1601                                  le64toh(f->header->n_entries),
1602                                  p,
1603                                  test_object_offset,
1604                                  DIRECTION_DOWN,
1605                                  NULL, NULL,
1606                                  &i);
1607         if (r <= 0)
1608                 return r;
1609
1610         /* Calculate new index */
1611         if (skip < 0) {
1612                 if ((uint64_t) -skip >= i)
1613                         i = 0;
1614                 else
1615                         i = i - (uint64_t) -skip;
1616         } else
1617                 i  += (uint64_t) skip;
1618
1619         n = le64toh(f->header->n_entries);
1620         if (n <= 0)
1621                 return -EBADMSG;
1622
1623         if (i >= n)
1624                 i = n-1;
1625
1626         return generic_array_get(f,
1627                                  le64toh(f->header->entry_array_offset),
1628                                  i,
1629                                  ret, offset);
1630 }
1631
1632 int journal_file_next_entry_for_data(
1633                 JournalFile *f,
1634                 Object *o, uint64_t p,
1635                 uint64_t data_offset,
1636                 direction_t direction,
1637                 Object **ret, uint64_t *offset) {
1638
1639         uint64_t n, i;
1640         int r;
1641         Object *d;
1642
1643         assert(f);
1644         assert(p > 0 || !o);
1645
1646         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1647         if (r < 0)
1648                 return r;
1649
1650         n = le64toh(d->data.n_entries);
1651         if (n <= 0)
1652                 return n;
1653
1654         if (!o)
1655                 i = direction == DIRECTION_DOWN ? 0 : n - 1;
1656         else {
1657                 if (o->object.type != OBJECT_ENTRY)
1658                         return -EINVAL;
1659
1660                 r = generic_array_bisect_plus_one(f,
1661                                                   le64toh(d->data.entry_offset),
1662                                                   le64toh(d->data.entry_array_offset),
1663                                                   le64toh(d->data.n_entries),
1664                                                   p,
1665                                                   test_object_offset,
1666                                                   DIRECTION_DOWN,
1667                                                   NULL, NULL,
1668                                                   &i);
1669
1670                 if (r <= 0)
1671                         return r;
1672
1673                 if (direction == DIRECTION_DOWN) {
1674                         if (i >= n - 1)
1675                                 return 0;
1676
1677                         i++;
1678                 } else {
1679                         if (i <= 0)
1680                                 return 0;
1681
1682                         i--;
1683                 }
1684
1685         }
1686
1687         return generic_array_get_plus_one(f,
1688                                           le64toh(d->data.entry_offset),
1689                                           le64toh(d->data.entry_array_offset),
1690                                           i,
1691                                           ret, offset);
1692 }
1693
1694 int journal_file_move_to_entry_by_offset_for_data(
1695                 JournalFile *f,
1696                 uint64_t data_offset,
1697                 uint64_t p,
1698                 direction_t direction,
1699                 Object **ret, uint64_t *offset) {
1700
1701         int r;
1702         Object *d;
1703
1704         assert(f);
1705
1706         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1707         if (r < 0)
1708                 return r;
1709
1710         return generic_array_bisect_plus_one(f,
1711                                              le64toh(d->data.entry_offset),
1712                                              le64toh(d->data.entry_array_offset),
1713                                              le64toh(d->data.n_entries),
1714                                              p,
1715                                              test_object_offset,
1716                                              direction,
1717                                              ret, offset, NULL);
1718 }
1719
1720 int journal_file_move_to_entry_by_monotonic_for_data(
1721                 JournalFile *f,
1722                 uint64_t data_offset,
1723                 sd_id128_t boot_id,
1724                 uint64_t monotonic,
1725                 direction_t direction,
1726                 Object **ret, uint64_t *offset) {
1727
1728         char t[9+32+1] = "_BOOT_ID=";
1729         Object *o, *d;
1730         int r;
1731         uint64_t b, z;
1732
1733         assert(f);
1734
1735         /* First, seek by time */
1736         sd_id128_to_string(boot_id, t + 9);
1737         r = journal_file_find_data_object(f, t, strlen(t), &o, &b);
1738         if (r < 0)
1739                 return r;
1740         if (r == 0)
1741                 return -ENOENT;
1742
1743         r = generic_array_bisect_plus_one(f,
1744                                           le64toh(o->data.entry_offset),
1745                                           le64toh(o->data.entry_array_offset),
1746                                           le64toh(o->data.n_entries),
1747                                           monotonic,
1748                                           test_object_monotonic,
1749                                           direction,
1750                                           NULL, &z, NULL);
1751         if (r <= 0)
1752                 return r;
1753
1754         /* And now, continue seeking until we find an entry that
1755          * exists in both bisection arrays */
1756
1757         for (;;) {
1758                 Object *qo;
1759                 uint64_t p, q;
1760
1761                 r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1762                 if (r < 0)
1763                         return r;
1764
1765                 r = generic_array_bisect_plus_one(f,
1766                                                   le64toh(d->data.entry_offset),
1767                                                   le64toh(d->data.entry_array_offset),
1768                                                   le64toh(d->data.n_entries),
1769                                                   z,
1770                                                   test_object_offset,
1771                                                   direction,
1772                                                   NULL, &p, NULL);
1773                 if (r <= 0)
1774                         return r;
1775
1776                 r = journal_file_move_to_object(f, OBJECT_DATA, b, &o);
1777                 if (r < 0)
1778                         return r;
1779
1780                 r = generic_array_bisect_plus_one(f,
1781                                                   le64toh(o->data.entry_offset),
1782                                                   le64toh(o->data.entry_array_offset),
1783                                                   le64toh(o->data.n_entries),
1784                                                   p,
1785                                                   test_object_offset,
1786                                                   direction,
1787                                                   &qo, &q, NULL);
1788
1789                 if (r <= 0)
1790                         return r;
1791
1792                 if (p == q) {
1793                         if (ret)
1794                                 *ret = qo;
1795                         if (offset)
1796                                 *offset = q;
1797
1798                         return 1;
1799                 }
1800
1801                 z = q;
1802         }
1803
1804         return 0;
1805 }
1806
1807 int journal_file_move_to_entry_by_seqnum_for_data(
1808                 JournalFile *f,
1809                 uint64_t data_offset,
1810                 uint64_t seqnum,
1811                 direction_t direction,
1812                 Object **ret, uint64_t *offset) {
1813
1814         Object *d;
1815         int r;
1816
1817         assert(f);
1818
1819         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1820         if (r < 0)
1821                 return r;
1822
1823         return generic_array_bisect_plus_one(f,
1824                                              le64toh(d->data.entry_offset),
1825                                              le64toh(d->data.entry_array_offset),
1826                                              le64toh(d->data.n_entries),
1827                                              seqnum,
1828                                              test_object_seqnum,
1829                                              direction,
1830                                              ret, offset, NULL);
1831 }
1832
1833 int journal_file_move_to_entry_by_realtime_for_data(
1834                 JournalFile *f,
1835                 uint64_t data_offset,
1836                 uint64_t realtime,
1837                 direction_t direction,
1838                 Object **ret, uint64_t *offset) {
1839
1840         Object *d;
1841         int r;
1842
1843         assert(f);
1844
1845         r = journal_file_move_to_object(f, OBJECT_DATA, data_offset, &d);
1846         if (r < 0)
1847                 return r;
1848
1849         return generic_array_bisect_plus_one(f,
1850                                              le64toh(d->data.entry_offset),
1851                                              le64toh(d->data.entry_array_offset),
1852                                              le64toh(d->data.n_entries),
1853                                              realtime,
1854                                              test_object_realtime,
1855                                              direction,
1856                                              ret, offset, NULL);
1857 }
1858
1859 void journal_file_dump(JournalFile *f) {
1860         Object *o;
1861         int r;
1862         uint64_t p;
1863
1864         assert(f);
1865
1866         journal_file_print_header(f);
1867
1868         p = le64toh(f->header->header_size);
1869         while (p != 0) {
1870                 r = journal_file_move_to_object(f, -1, p, &o);
1871                 if (r < 0)
1872                         goto fail;
1873
1874                 switch (o->object.type) {
1875
1876                 case OBJECT_UNUSED:
1877                         printf("Type: OBJECT_UNUSED\n");
1878                         break;
1879
1880                 case OBJECT_DATA:
1881                         printf("Type: OBJECT_DATA\n");
1882                         break;
1883
1884                 case OBJECT_ENTRY:
1885                         printf("Type: OBJECT_ENTRY seqnum=%llu monotonic=%llu realtime=%llu\n",
1886                                (unsigned long long) le64toh(o->entry.seqnum),
1887                                (unsigned long long) le64toh(o->entry.monotonic),
1888                                (unsigned long long) le64toh(o->entry.realtime));
1889                         break;
1890
1891                 case OBJECT_FIELD_HASH_TABLE:
1892                         printf("Type: OBJECT_FIELD_HASH_TABLE\n");
1893                         break;
1894
1895                 case OBJECT_DATA_HASH_TABLE:
1896                         printf("Type: OBJECT_DATA_HASH_TABLE\n");
1897                         break;
1898
1899                 case OBJECT_ENTRY_ARRAY:
1900                         printf("Type: OBJECT_ENTRY_ARRAY\n");
1901                         break;
1902
1903                 case OBJECT_TAG:
1904                         printf("Type: OBJECT_TAG seqnum=%llu epoch=%llu\n",
1905                                (unsigned long long) le64toh(o->tag.seqnum),
1906                                (unsigned long long) le64toh(o->tag.epoch));
1907                         break;
1908                 }
1909
1910                 if (o->object.flags & OBJECT_COMPRESSED)
1911                         printf("Flags: COMPRESSED\n");
1912
1913                 if (p == le64toh(f->header->tail_object_offset))
1914                         p = 0;
1915                 else
1916                         p = p + ALIGN64(le64toh(o->object.size));
1917         }
1918
1919         return;
1920 fail:
1921         log_error("File corrupt");
1922 }
1923
1924 void journal_file_print_header(JournalFile *f) {
1925         char a[33], b[33], c[33];
1926         char x[FORMAT_TIMESTAMP_MAX], y[FORMAT_TIMESTAMP_MAX];
1927         struct stat st;
1928         char bytes[FORMAT_BYTES_MAX];
1929
1930         assert(f);
1931
1932         printf("File Path: %s\n"
1933                "File ID: %s\n"
1934                "Machine ID: %s\n"
1935                "Boot ID: %s\n"
1936                "Sequential Number ID: %s\n"
1937                "State: %s\n"
1938                "Compatible Flags:%s%s\n"
1939                "Incompatible Flags:%s%s\n"
1940                "Header size: %llu\n"
1941                "Arena size: %llu\n"
1942                "Data Hash Table Size: %llu\n"
1943                "Field Hash Table Size: %llu\n"
1944                "Rotate Suggested: %s\n"
1945                "Head Sequential Number: %llu\n"
1946                "Tail Sequential Number: %llu\n"
1947                "Head Realtime Timestamp: %s\n"
1948                "Tail Realtime Timestamp: %s\n"
1949                "Objects: %llu\n"
1950                "Entry Objects: %llu\n",
1951                f->path,
1952                sd_id128_to_string(f->header->file_id, a),
1953                sd_id128_to_string(f->header->machine_id, b),
1954                sd_id128_to_string(f->header->boot_id, c),
1955                sd_id128_to_string(f->header->seqnum_id, c),
1956                f->header->state == STATE_OFFLINE ? "OFFLINE" :
1957                f->header->state == STATE_ONLINE ? "ONLINE" :
1958                f->header->state == STATE_ARCHIVED ? "ARCHIVED" : "UNKNOWN",
1959                JOURNAL_HEADER_SEALED(f->header) ? " SEALED" : "",
1960                (le32toh(f->header->compatible_flags) & ~HEADER_COMPATIBLE_SEALED) ? " ???" : "",
1961                JOURNAL_HEADER_COMPRESSED(f->header) ? " COMPRESSED" : "",
1962                (le32toh(f->header->incompatible_flags) & ~HEADER_INCOMPATIBLE_COMPRESSED) ? " ???" : "",
1963                (unsigned long long) le64toh(f->header->header_size),
1964                (unsigned long long) le64toh(f->header->arena_size),
1965                (unsigned long long) le64toh(f->header->data_hash_table_size) / sizeof(HashItem),
1966                (unsigned long long) le64toh(f->header->field_hash_table_size) / sizeof(HashItem),
1967                yes_no(journal_file_rotate_suggested(f)),
1968                (unsigned long long) le64toh(f->header->head_entry_seqnum),
1969                (unsigned long long) le64toh(f->header->tail_entry_seqnum),
1970                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
1971                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)),
1972                (unsigned long long) le64toh(f->header->n_objects),
1973                (unsigned long long) le64toh(f->header->n_entries));
1974
1975         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
1976                 printf("Data Objects: %llu\n"
1977                        "Data Hash Table Fill: %.1f%%\n",
1978                        (unsigned long long) le64toh(f->header->n_data),
1979                        100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))));
1980
1981         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
1982                 printf("Field Objects: %llu\n"
1983                        "Field Hash Table Fill: %.1f%%\n",
1984                        (unsigned long long) le64toh(f->header->n_fields),
1985                        100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))));
1986
1987         if (JOURNAL_HEADER_CONTAINS(f->header, n_tags))
1988                 printf("Tag Objects: %llu\n",
1989                        (unsigned long long) le64toh(f->header->n_tags));
1990         if (JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
1991                 printf("Entry Array Objects: %llu\n",
1992                        (unsigned long long) le64toh(f->header->n_entry_arrays));
1993
1994         if (fstat(f->fd, &st) >= 0)
1995                 printf("Disk usage: %s\n", format_bytes(bytes, sizeof(bytes), (off_t) st.st_blocks * 512ULL));
1996 }
1997
1998 int journal_file_open(
1999                 const char *fname,
2000                 int flags,
2001                 mode_t mode,
2002                 bool compress,
2003                 bool seal,
2004                 JournalMetrics *metrics,
2005                 MMapCache *mmap_cache,
2006                 JournalFile *template,
2007                 JournalFile **ret) {
2008
2009         JournalFile *f;
2010         int r;
2011         bool newly_created = false;
2012
2013         assert(fname);
2014         assert(ret);
2015
2016         if ((flags & O_ACCMODE) != O_RDONLY &&
2017             (flags & O_ACCMODE) != O_RDWR)
2018                 return -EINVAL;
2019
2020         if (!endswith(fname, ".journal") &&
2021             !endswith(fname, ".journal~"))
2022                 return -EINVAL;
2023
2024         f = new0(JournalFile, 1);
2025         if (!f)
2026                 return -ENOMEM;
2027
2028         f->fd = -1;
2029         f->mode = mode;
2030
2031         f->flags = flags;
2032         f->prot = prot_from_flags(flags);
2033         f->writable = (flags & O_ACCMODE) != O_RDONLY;
2034 #ifdef HAVE_XZ
2035         f->compress = compress;
2036 #endif
2037 #ifdef HAVE_GCRYPT
2038         f->seal = seal;
2039 #endif
2040
2041         if (mmap_cache)
2042                 f->mmap = mmap_cache_ref(mmap_cache);
2043         else {
2044                 f->mmap = mmap_cache_new();
2045                 if (!f->mmap) {
2046                         r = -ENOMEM;
2047                         goto fail;
2048                 }
2049         }
2050
2051         f->path = strdup(fname);
2052         if (!f->path) {
2053                 r = -ENOMEM;
2054                 goto fail;
2055         }
2056
2057         f->fd = open(f->path, f->flags|O_CLOEXEC, f->mode);
2058         if (f->fd < 0) {
2059                 r = -errno;
2060                 goto fail;
2061         }
2062
2063         if (fstat(f->fd, &f->last_stat) < 0) {
2064                 r = -errno;
2065                 goto fail;
2066         }
2067
2068         if (f->last_stat.st_size == 0 && f->writable) {
2069                 newly_created = true;
2070
2071 #ifdef HAVE_GCRYPT
2072                 /* Try to load the FSPRG state, and if we can't, then
2073                  * just don't do sealing */
2074                 if (f->seal) {
2075                         r = journal_file_fss_load(f);
2076                         if (r < 0)
2077                                 f->seal = false;
2078                 }
2079 #endif
2080
2081                 r = journal_file_init_header(f, template);
2082                 if (r < 0)
2083                         goto fail;
2084
2085                 if (fstat(f->fd, &f->last_stat) < 0) {
2086                         r = -errno;
2087                         goto fail;
2088                 }
2089         }
2090
2091         if (f->last_stat.st_size < (off_t) HEADER_SIZE_MIN) {
2092                 r = -EIO;
2093                 goto fail;
2094         }
2095
2096         f->header = mmap(NULL, PAGE_ALIGN(sizeof(Header)), prot_from_flags(flags), MAP_SHARED, f->fd, 0);
2097         if (f->header == MAP_FAILED) {
2098                 f->header = NULL;
2099                 r = -errno;
2100                 goto fail;
2101         }
2102
2103         if (!newly_created) {
2104                 r = journal_file_verify_header(f);
2105                 if (r < 0)
2106                         goto fail;
2107         }
2108
2109 #ifdef HAVE_GCRYPT
2110         if (!newly_created && f->writable) {
2111                 r = journal_file_fss_load(f);
2112                 if (r < 0)
2113                         goto fail;
2114         }
2115 #endif
2116
2117         if (f->writable) {
2118                 if (metrics) {
2119                         journal_default_metrics(metrics, f->fd);
2120                         f->metrics = *metrics;
2121                 } else if (template)
2122                         f->metrics = template->metrics;
2123
2124                 r = journal_file_refresh_header(f);
2125                 if (r < 0)
2126                         goto fail;
2127         }
2128
2129 #ifdef HAVE_GCRYPT
2130         r = journal_file_hmac_setup(f);
2131         if (r < 0)
2132                 goto fail;
2133 #endif
2134
2135         if (newly_created) {
2136                 r = journal_file_setup_field_hash_table(f);
2137                 if (r < 0)
2138                         goto fail;
2139
2140                 r = journal_file_setup_data_hash_table(f);
2141                 if (r < 0)
2142                         goto fail;
2143
2144 #ifdef HAVE_GCRYPT
2145                 r = journal_file_append_first_tag(f);
2146                 if (r < 0)
2147                         goto fail;
2148 #endif
2149         }
2150
2151         r = journal_file_map_field_hash_table(f);
2152         if (r < 0)
2153                 goto fail;
2154
2155         r = journal_file_map_data_hash_table(f);
2156         if (r < 0)
2157                 goto fail;
2158
2159         *ret = f;
2160         return 0;
2161
2162 fail:
2163         journal_file_close(f);
2164
2165         return r;
2166 }
2167
2168 int journal_file_rotate(JournalFile **f, bool compress, bool seal) {
2169         char *p;
2170         size_t l;
2171         JournalFile *old_file, *new_file = NULL;
2172         int r;
2173
2174         assert(f);
2175         assert(*f);
2176
2177         old_file = *f;
2178
2179         if (!old_file->writable)
2180                 return -EINVAL;
2181
2182         if (!endswith(old_file->path, ".journal"))
2183                 return -EINVAL;
2184
2185         l = strlen(old_file->path);
2186
2187         p = new(char, l + 1 + 32 + 1 + 16 + 1 + 16 + 1);
2188         if (!p)
2189                 return -ENOMEM;
2190
2191         memcpy(p, old_file->path, l - 8);
2192         p[l-8] = '@';
2193         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
2194         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
2195                  "-%016llx-%016llx.journal",
2196                  (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
2197                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
2198
2199         r = rename(old_file->path, p);
2200         free(p);
2201
2202         if (r < 0)
2203                 return -errno;
2204
2205         old_file->header->state = STATE_ARCHIVED;
2206
2207         r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, seal, NULL, old_file->mmap, old_file, &new_file);
2208         journal_file_close(old_file);
2209
2210         *f = new_file;
2211         return r;
2212 }
2213
2214 int journal_file_open_reliably(
2215                 const char *fname,
2216                 int flags,
2217                 mode_t mode,
2218                 bool compress,
2219                 bool seal,
2220                 JournalMetrics *metrics,
2221                 MMapCache *mmap_cache,
2222                 JournalFile *template,
2223                 JournalFile **ret) {
2224
2225         int r;
2226         size_t l;
2227         char *p;
2228
2229         r = journal_file_open(fname, flags, mode, compress, seal,
2230                               metrics, mmap_cache, template, ret);
2231         if (r != -EBADMSG && /* corrupted */
2232             r != -ENODATA && /* truncated */
2233             r != -EHOSTDOWN && /* other machine */
2234             r != -EPROTONOSUPPORT && /* incompatible feature */
2235             r != -EBUSY && /* unclean shutdown */
2236             r != -ESHUTDOWN /* already archived */)
2237                 return r;
2238
2239         if ((flags & O_ACCMODE) == O_RDONLY)
2240                 return r;
2241
2242         if (!(flags & O_CREAT))
2243                 return r;
2244
2245         if (!endswith(fname, ".journal"))
2246                 return r;
2247
2248         /* The file is corrupted. Rotate it away and try it again (but only once) */
2249
2250         l = strlen(fname);
2251         if (asprintf(&p, "%.*s@%016llx-%016llx.journal~",
2252                      (int) (l-8), fname,
2253                      (unsigned long long) now(CLOCK_REALTIME),
2254                      random_ull()) < 0)
2255                 return -ENOMEM;
2256
2257         r = rename(fname, p);
2258         free(p);
2259         if (r < 0)
2260                 return -errno;
2261
2262         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
2263
2264         return journal_file_open(fname, flags, mode, compress, seal,
2265                                  metrics, mmap_cache, template, ret);
2266 }
2267
2268
2269 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
2270         uint64_t i, n;
2271         uint64_t q, xor_hash = 0;
2272         int r;
2273         EntryItem *items;
2274         dual_timestamp ts;
2275
2276         assert(from);
2277         assert(to);
2278         assert(o);
2279         assert(p);
2280
2281         if (!to->writable)
2282                 return -EPERM;
2283
2284         ts.monotonic = le64toh(o->entry.monotonic);
2285         ts.realtime = le64toh(o->entry.realtime);
2286
2287         if (to->tail_entry_monotonic_valid &&
2288             ts.monotonic < le64toh(to->header->tail_entry_monotonic))
2289                 return -EINVAL;
2290
2291         n = journal_file_entry_n_items(o);
2292         items = alloca(sizeof(EntryItem) * n);
2293
2294         for (i = 0; i < n; i++) {
2295                 uint64_t l, h;
2296                 le64_t le_hash;
2297                 size_t t;
2298                 void *data;
2299                 Object *u;
2300
2301                 q = le64toh(o->entry.items[i].object_offset);
2302                 le_hash = o->entry.items[i].hash;
2303
2304                 r = journal_file_move_to_object(from, OBJECT_DATA, q, &o);
2305                 if (r < 0)
2306                         return r;
2307
2308                 if (le_hash != o->data.hash)
2309                         return -EBADMSG;
2310
2311                 l = le64toh(o->object.size) - offsetof(Object, data.payload);
2312                 t = (size_t) l;
2313
2314                 /* We hit the limit on 32bit machines */
2315                 if ((uint64_t) t != l)
2316                         return -E2BIG;
2317
2318                 if (o->object.flags & OBJECT_COMPRESSED) {
2319 #ifdef HAVE_XZ
2320                         uint64_t rsize;
2321
2322                         if (!uncompress_blob(o->data.payload, l, &from->compress_buffer, &from->compress_buffer_size, &rsize))
2323                                 return -EBADMSG;
2324
2325                         data = from->compress_buffer;
2326                         l = rsize;
2327 #else
2328                         return -EPROTONOSUPPORT;
2329 #endif
2330                 } else
2331                         data = o->data.payload;
2332
2333                 r = journal_file_append_data(to, data, l, &u, &h);
2334                 if (r < 0)
2335                         return r;
2336
2337                 xor_hash ^= le64toh(u->data.hash);
2338                 items[i].object_offset = htole64(h);
2339                 items[i].hash = u->data.hash;
2340
2341                 r = journal_file_move_to_object(from, OBJECT_ENTRY, p, &o);
2342                 if (r < 0)
2343                         return r;
2344         }
2345
2346         return journal_file_append_entry_internal(to, &ts, xor_hash, items, n, seqnum, ret, offset);
2347 }
2348
2349 void journal_default_metrics(JournalMetrics *m, int fd) {
2350         uint64_t fs_size = 0;
2351         struct statvfs ss;
2352         char a[FORMAT_BYTES_MAX], b[FORMAT_BYTES_MAX], c[FORMAT_BYTES_MAX], d[FORMAT_BYTES_MAX];
2353
2354         assert(m);
2355         assert(fd >= 0);
2356
2357         if (fstatvfs(fd, &ss) >= 0)
2358                 fs_size = ss.f_frsize * ss.f_blocks;
2359
2360         if (m->max_use == (uint64_t) -1) {
2361
2362                 if (fs_size > 0) {
2363                         m->max_use = PAGE_ALIGN(fs_size / 10); /* 10% of file system size */
2364
2365                         if (m->max_use > DEFAULT_MAX_USE_UPPER)
2366                                 m->max_use = DEFAULT_MAX_USE_UPPER;
2367
2368                         if (m->max_use < DEFAULT_MAX_USE_LOWER)
2369                                 m->max_use = DEFAULT_MAX_USE_LOWER;
2370                 } else
2371                         m->max_use = DEFAULT_MAX_USE_LOWER;
2372         } else {
2373                 m->max_use = PAGE_ALIGN(m->max_use);
2374
2375                 if (m->max_use < JOURNAL_FILE_SIZE_MIN*2)
2376                         m->max_use = JOURNAL_FILE_SIZE_MIN*2;
2377         }
2378
2379         if (m->max_size == (uint64_t) -1) {
2380                 m->max_size = PAGE_ALIGN(m->max_use / 8); /* 8 chunks */
2381
2382                 if (m->max_size > DEFAULT_MAX_SIZE_UPPER)
2383                         m->max_size = DEFAULT_MAX_SIZE_UPPER;
2384         } else
2385                 m->max_size = PAGE_ALIGN(m->max_size);
2386
2387         if (m->max_size < JOURNAL_FILE_SIZE_MIN)
2388                 m->max_size = JOURNAL_FILE_SIZE_MIN;
2389
2390         if (m->max_size*2 > m->max_use)
2391                 m->max_use = m->max_size*2;
2392
2393         if (m->min_size == (uint64_t) -1)
2394                 m->min_size = JOURNAL_FILE_SIZE_MIN;
2395         else {
2396                 m->min_size = PAGE_ALIGN(m->min_size);
2397
2398                 if (m->min_size < JOURNAL_FILE_SIZE_MIN)
2399                         m->min_size = JOURNAL_FILE_SIZE_MIN;
2400
2401                 if (m->min_size > m->max_size)
2402                         m->max_size = m->min_size;
2403         }
2404
2405         if (m->keep_free == (uint64_t) -1) {
2406
2407                 if (fs_size > 0) {
2408                         m->keep_free = PAGE_ALIGN(fs_size / 20); /* 5% of file system size */
2409
2410                         if (m->keep_free > DEFAULT_KEEP_FREE_UPPER)
2411                                 m->keep_free = DEFAULT_KEEP_FREE_UPPER;
2412
2413                 } else
2414                         m->keep_free = DEFAULT_KEEP_FREE;
2415         }
2416
2417         log_debug("Fixed max_use=%s max_size=%s min_size=%s keep_free=%s",
2418                   format_bytes(a, sizeof(a), m->max_use),
2419                   format_bytes(b, sizeof(b), m->max_size),
2420                   format_bytes(c, sizeof(c), m->min_size),
2421                   format_bytes(d, sizeof(d), m->keep_free));
2422 }
2423
2424 int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *to) {
2425         assert(f);
2426         assert(from || to);
2427
2428         if (from) {
2429                 if (f->header->head_entry_realtime == 0)
2430                         return -ENOENT;
2431
2432                 *from = le64toh(f->header->head_entry_realtime);
2433         }
2434
2435         if (to) {
2436                 if (f->header->tail_entry_realtime == 0)
2437                         return -ENOENT;
2438
2439                 *to = le64toh(f->header->tail_entry_realtime);
2440         }
2441
2442         return 1;
2443 }
2444
2445 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot_id, usec_t *from, usec_t *to) {
2446         char t[9+32+1] = "_BOOT_ID=";
2447         Object *o;
2448         uint64_t p;
2449         int r;
2450
2451         assert(f);
2452         assert(from || to);
2453
2454         sd_id128_to_string(boot_id, t + 9);
2455
2456         r = journal_file_find_data_object(f, t, strlen(t), &o, &p);
2457         if (r <= 0)
2458                 return r;
2459
2460         if (le64toh(o->data.n_entries) <= 0)
2461                 return 0;
2462
2463         if (from) {
2464                 r = journal_file_move_to_object(f, OBJECT_ENTRY, le64toh(o->data.entry_offset), &o);
2465                 if (r < 0)
2466                         return r;
2467
2468                 *from = le64toh(o->entry.monotonic);
2469         }
2470
2471         if (to) {
2472                 r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
2473                 if (r < 0)
2474                         return r;
2475
2476                 r = generic_array_get_plus_one(f,
2477                                                le64toh(o->data.entry_offset),
2478                                                le64toh(o->data.entry_array_offset),
2479                                                le64toh(o->data.n_entries)-1,
2480                                                &o, NULL);
2481                 if (r <= 0)
2482                         return r;
2483
2484                 *to = le64toh(o->entry.monotonic);
2485         }
2486
2487         return 1;
2488 }
2489
2490 bool journal_file_rotate_suggested(JournalFile *f) {
2491         assert(f);
2492
2493         /* If we gained new header fields we gained new features,
2494          * hence suggest a rotation */
2495         if (le64toh(f->header->header_size) < sizeof(Header)) {
2496                 log_debug("%s uses an outdated header, suggesting rotation.", f->path);
2497                 return true;
2498         }
2499
2500         /* Let's check if the hash tables grew over a certain fill
2501          * level (75%, borrowing this value from Java's hash table
2502          * implementation), and if so suggest a rotation. To calculate
2503          * the fill level we need the n_data field, which only exists
2504          * in newer versions. */
2505
2506         if (JOURNAL_HEADER_CONTAINS(f->header, n_data))
2507                 if (le64toh(f->header->n_data) * 4ULL > (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2508                         log_debug("Data hash table of %s has a fill level at %.1f (%llu of %llu items, %llu file size, %llu bytes per hash table item), suggesting rotation.",
2509                                   f->path,
2510                                   100.0 * (double) le64toh(f->header->n_data) / ((double) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem))),
2511                                   (unsigned long long) le64toh(f->header->n_data),
2512                                   (unsigned long long) (le64toh(f->header->data_hash_table_size) / sizeof(HashItem)),
2513                                   (unsigned long long) (f->last_stat.st_size),
2514                                   (unsigned long long) (f->last_stat.st_size / le64toh(f->header->n_data)));
2515                         return true;
2516                 }
2517
2518         if (JOURNAL_HEADER_CONTAINS(f->header, n_fields))
2519                 if (le64toh(f->header->n_fields) * 4ULL > (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)) * 3ULL) {
2520                         log_debug("Field hash table of %s has a fill level at %.1f (%llu of %llu items), suggesting rotation.",
2521                                   f->path,
2522                                   100.0 * (double) le64toh(f->header->n_fields) / ((double) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem))),
2523                                   (unsigned long long) le64toh(f->header->n_fields),
2524                                   (unsigned long long) (le64toh(f->header->field_hash_table_size) / sizeof(HashItem)));
2525                         return true;
2526                 }
2527
2528         return false;
2529 }